Try_compile() based CMAKE SSE checking

master
ismagom 9 years ago
parent c64882d9c2
commit 309791cc61

@ -87,13 +87,11 @@ FIND_PACKAGE(SSE)
IF(CMAKE_COMPILER_IS_GNUCC) IF(CMAKE_COMPILER_IS_GNUCC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-write-strings -Wno-format-extra-args -Winline -Wno-unused-result -Wno-format -std=c99 -D_GNU_SOURCE -g -march=native -O3") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-write-strings -Wno-format-extra-args -Winline -Wno-unused-result -Wno-format -std=c99 -D_GNU_SOURCE -g -march=native -O3")
IF(AVX_FOUND) IF(HAVE_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpmath=sse -mavx -DLV_HAVE_AVX -DLV_HAVE_SSE") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpmath=sse -mavx -DLV_HAVE_AVX -DLV_HAVE_SSE")
ELSEIF(SSE4_2_FOUND) ELSEIF(HAVE_SSE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpmath=sse -msse_4.2 -DLV_HAVE_SSE") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpmath=sse -msse4.1 -DLV_HAVE_SSE")
ELSEIF(SSE4_1_FOUND) ENDIF(HAVE_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpmath=sse -msse_4.1 -DLV_HAVE_SSE")
ENDIF(AVX_FOUND)
# IF(${CMAKE_BUILD_TYPE} STREQUAL "Debug") # IF(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror -Wno-error=implicit-function-declaration -Wno-error=unused-but-set-variable") # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror -Wno-error=implicit-function-declaration -Wno-error=unused-but-set-variable")

@ -0,0 +1,107 @@
#.rst:
# CheckCSourceRuns
# ----------------
#
# Check if the given C source code compiles and runs.
#
# CHECK_C_SOURCE_RUNS(<code> <var>)
#
# ::
#
# <code> - source code to try to compile
# <var> - variable to store the result
# (1 for success, empty for failure)
# Will be created as an internal cache variable.
#
# The following variables may be set before calling this macro to modify
# the way the check is run:
#
# ::
#
# CMAKE_REQUIRED_FLAGS = string of compile command line flags
# CMAKE_REQUIRED_DEFINITIONS = list of macros to define (-DFOO=bar)
# CMAKE_REQUIRED_INCLUDES = list of include directories
# CMAKE_REQUIRED_LIBRARIES = list of libraries to link
# CMAKE_REQUIRED_QUIET = execute quietly without messages
#=============================================================================
# Copyright 2006-2009 Kitware, Inc.
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
macro(CHECK_C_SOURCE_RUNS SOURCE VAR)
if(NOT DEFINED "${VAR}")
set(MACRO_CHECK_FUNCTION_DEFINITIONS
"-D${VAR} ${CMAKE_REQUIRED_FLAGS}")
if(CMAKE_REQUIRED_LIBRARIES)
set(CHECK_C_SOURCE_COMPILES_ADD_LIBRARIES
LINK_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES})
else()
set(CHECK_C_SOURCE_COMPILES_ADD_LIBRARIES)
endif()
if(CMAKE_REQUIRED_INCLUDES)
set(CHECK_C_SOURCE_COMPILES_ADD_INCLUDES
"-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}")
else()
set(CHECK_C_SOURCE_COMPILES_ADD_INCLUDES)
endif()
file(WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.c"
"${SOURCE}\n")
if(NOT CMAKE_REQUIRED_QUIET)
message(STATUS "Performing Test ${VAR}")
endif()
try_run(${VAR}_EXITCODE ${VAR}_COMPILED
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.c
COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}
${CHECK_C_SOURCE_COMPILES_ADD_LIBRARIES}
CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_FUNCTION_DEFINITIONS}
-DCMAKE_SKIP_RPATH:BOOL=${CMAKE_SKIP_RPATH}
"${CHECK_C_SOURCE_COMPILES_ADD_INCLUDES}"
COMPILE_OUTPUT_VARIABLE OUTPUT)
# if it did not compile make the return value fail code of 1
if(NOT ${VAR}_COMPILED)
set(${VAR}_EXITCODE 1)
endif()
# if the return value was 0 then it worked
if("${${VAR}_EXITCODE}" EQUAL 0)
set(${VAR} 1 CACHE INTERNAL "Test ${VAR}")
if(NOT CMAKE_REQUIRED_QUIET)
message(STATUS "Performing Test ${VAR} - Success")
endif()
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
"Performing C SOURCE FILE Test ${VAR} succeeded with the following output:\n"
"${OUTPUT}\n"
"Return value: ${${VAR}}\n"
"Source file was:\n${SOURCE}\n")
else()
if(CMAKE_CROSSCOMPILING AND "${${VAR}_EXITCODE}" MATCHES "FAILED_TO_RUN")
set(${VAR} "${${VAR}_EXITCODE}")
else()
set(${VAR} "" CACHE INTERNAL "Test ${VAR}")
endif()
if(NOT CMAKE_REQUIRED_QUIET)
message(STATUS "Performing Test ${VAR} - Failed")
endif()
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
"Performing C SOURCE FILE Test ${VAR} failed with the following output:\n"
"${OUTPUT}\n"
"Return value: ${${VAR}_EXITCODE}\n"
"Source file was:\n${SOURCE}\n")
endif()
endif()
endmacro()

@ -1,85 +1,58 @@
# Check if SSE instructions are available on the machine where if (NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|^i[3,9]86$")
# the project is compiled. return()
endif()
# Minimum requirement to enable SSE turbo decoder is SSE4.1
# Since SSE 4.1 includes all previous SSE, look only for this one. include(CheckCSourceRuns)
# Check also AVX availability (for equalizer) option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON)
option(ENABLE_AVX "Enable compile-time AVX support." ON)
IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) if (ENABLE_SSE)
#
STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO}) # Check compiler for SSE4_1 intrinsics
STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE) #
IF (SSE41_TRUE) if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG )
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") set(CMAKE_REQUIRED_FLAGS "-msse4.1")
ELSE (SSE41_TRUE) check_c_source_runs("
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") #include <emmintrin.h>
ENDIF (SSE41_TRUE) #include <smmintrin.h>
STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO}) int main()
STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE) {
IF (SSE42_TRUE) __m128i a = _mm_setzero_si128();
set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host") __m128i b = _mm_minpos_epu16(a);
ELSE (SSE42_TRUE) return 0;
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") }"
ENDIF (SSE42_TRUE) HAVE_SSE)
endif()
STRING(REGEX REPLACE "^.*(avx).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "avx" "${SSE_THERE}" AVX_TRUE) if (HAVE_SSE)
IF (AVX_TRUE) message(STATUS "SSE4.1 is enabled - target CPU must support it")
set(AVX_FOUND true CACHE BOOL "AVX available on host") endif()
ELSE (AVX_TRUE)
set(AVX_FOUND false CACHE BOOL "AVX available on host") if (ENABLE_AVX)
ENDIF (AVX_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") #
EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE # Check compiler for AVX intrinsics
CPUINFO) #
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG )
STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO}) set(CMAKE_REQUIRED_FLAGS "-mavx")
STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE) check_c_source_runs("
IF (SSE41_TRUE) #include <immintrin.h>
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE) int main()
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") {
ENDIF (SSE41_TRUE) __m256i a = _mm256_setzero_si256();
return 0;
STRING(REGEX REPLACE "^.*(SSE4.2).*$" "\\1" SSE_THERE ${CPUINFO}) }"
STRING(COMPARE EQUAL "SSE4.2" "${SSE_THERE}" SSE42_TRUE) HAVE_AVX)
IF (SSE42_TRUE) endif()
set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host")
ELSE (SSE42_TRUE) if (HAVE_AVX)
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") message(STATUS "AVX is enabled - target CPU must support it")
ENDIF (SSE42_TRUE) endif()
endif()
STRING(REGEX REPLACE "^.*(AVX).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "AVX" "${SSE_THERE}" AVX_TRUE) endif()
IF (AVX_TRUE)
set(AVX_FOUND true CACHE BOOL "AVX available on host") mark_as_advanced(HAVE_SSE, HAVE_AVX)
ELSE (AVX_TRUE)
set(AVX_FOUND false CACHE BOOL "AVX available on host")
ENDIF (AVX_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
# TODO
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(AVX_FOUND false CACHE BOOL "AVX available on host")
ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(AVX_FOUND false CACHE BOOL "AVX available on host")
ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
if(NOT SSE4_1_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.")
endif(NOT SSE4_1_FOUND)
if(NOT SSE4_2_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE4.2 on this machine.")
endif(NOT SSE4_2_FOUND)
if(NOT AVX_FOUND)
MESSAGE(STATUS "Could not find hardware support for AVX on this machine.")
endif(NOT AVX_FOUND)
mark_as_advanced(SSE4_1_FOUND AVX_FOUND)

@ -326,8 +326,8 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len,
/* Simplify load if we do not need to wrap (ie high rates) */ /* Simplify load if we do not need to wrap (ie high rates) */
if (in_len <= out_len) { if (in_len <= out_len) {
for (int i=0;i<in_len/8;i++) { for (int i=0;i<in_len/8;i++) {
xVal = _mm_load_si128(xPtr); xVal = _mm_loadu_si128(xPtr);
lutVal = _mm_load_si128(lutPtr); lutVal = _mm_loadu_si128(lutPtr);
for (int j=0;j<8;j++) { for (int j=0;j<8;j++) {
int16_t x = (int16_t) _mm_extract_epi16(xVal, j); int16_t x = (int16_t) _mm_extract_epi16(xVal, j);
@ -346,7 +346,7 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len,
int nwrapps = 0; int nwrapps = 0;
while(inputCnt < in_len - 8) { while(inputCnt < in_len - 8) {
xVal = _mm_loadu_si128(xPtr); xVal = _mm_loadu_si128(xPtr);
lutVal = _mm_load_si128(lutPtr); lutVal = _mm_loadu_si128(lutPtr);
for (int j=0;j<8;j++) { for (int j=0;j<8;j++) {
int16_t x = (int16_t) _mm_extract_epi16(xVal, j); int16_t x = (int16_t) _mm_extract_epi16(xVal, j);

@ -36,8 +36,6 @@
#include "srslte/utils/vector_simd.h" #include "srslte/utils/vector_simd.h"
#include "srslte/utils/bit.h" #include "srslte/utils/bit.h"
#define HAVE_VECTOR_SIMD
#ifdef HAVE_VOLK #ifdef HAVE_VOLK
#include "volk/volk.h" #include "volk/volk.h"
#endif #endif
@ -106,7 +104,7 @@ void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
} }
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) { void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD #ifndef LV_HAVE_SSE
int i; int i;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = x[i]-y[i]; z[i] = x[i]-y[i];
@ -132,7 +130,7 @@ void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
} }
void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len) { void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD #ifndef LV_HAVE_SSE
int i; int i;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = x[i]+y[i]; z[i] = x[i]+y[i];
@ -193,7 +191,7 @@ void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
} }
void srslte_vec_sc_div2_sss(short *x, int n_rightshift, short *z, uint32_t len) { void srslte_vec_sc_div2_sss(short *x, int n_rightshift, short *z, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD #ifndef LV_HAVE_SSE
int i; int i;
int pow2_div = 1<<n_rightshift; int pow2_div = 1<<n_rightshift;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
@ -252,7 +250,7 @@ void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len) {
} }
void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) { void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD #ifndef LV_HAVE_SSE
int i; int i;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = (int16_t) (x[i]*scale); z[i] = (int16_t) (x[i]*scale);
@ -269,7 +267,7 @@ void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len) {
} }
void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) { void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD #ifndef LV_HAVE_SSE
for (int i=0;i<len;i++) { for (int i=0;i<len;i++) {
y[lut[i]] = x[i]; y[lut[i]] = x[i];
} }
@ -474,7 +472,7 @@ void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
} }
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) { void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD #ifndef LV_HAVE_SSE
int i; int i;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = x[i]*y[i]; z[i] = x[i]*y[i];

Loading…
Cancel
Save