From 86750b2db73516e3bb6107594005c4b686135176 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Tue, 17 Jan 2017 11:31:03 +0100 Subject: [PATCH 01/16] removed volk dependency. Checked and working --- cmake/modules/FindVolk.cmake | 144 ------------------ srslte/CMakeLists.txt | 19 --- srslte/include/srslte/utils/vector_simd.h | 20 +++ srslte/lib/CMakeLists.txt | 7 - srslte/lib/utils/vector.c | 167 ++++----------------- srslte/lib/utils/vector_simd.c | 170 ++++++++++++++++++++++ 6 files changed, 222 insertions(+), 305 deletions(-) delete mode 100644 cmake/modules/FindVolk.cmake diff --git a/cmake/modules/FindVolk.cmake b/cmake/modules/FindVolk.cmake deleted file mode 100644 index 5dbe17cd5..000000000 --- a/cmake/modules/FindVolk.cmake +++ /dev/null @@ -1,144 +0,0 @@ -INCLUDE(FindPkgConfig) -PKG_CHECK_MODULES(PC_VOLK volk QUIET) - -FIND_PATH( - VOLK_INCLUDE_DIRS - NAMES volk/volk.h - HINTS $ENV{VOLK_DIR}/include - ${CMAKE_INSTALL_PREFIX}/include - ${PC_VOLK_INCLUDE_DIR} - PATHS /usr/local/include - /usr/include -) - -FIND_LIBRARY( - VOLK_LIBRARIES - NAMES volk - HINTS $ENV{VOLK_DIR}/lib - ${CMAKE_INSTALL_PREFIX}/lib - ${CMAKE_INSTALL_PREFIX}/lib64 - ${PC_VOLK_LIBDIR} - PATHS /usr/local/lib - /usr/local/lib64 - /usr/lib - /usr/lib64 -) - -INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS) -MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS VOLK_DEFINITIONS) - -IF(VOLK_FOUND) - SET(CMAKE_REQUIRED_LIBRARIES ${VOLK_LIBRARIES} m) - CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_16u HAVE_VOLK_MAX_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_max_32f HAVE_VOLK_MAX_VEC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_accumulator_s32f HAVE_VOLK_ACC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32fc_multiply_32fc HAVE_VOLK_MULT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_conjugate_32fc HAVE_VOLK_CONJ_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_32fc HAVE_VOLK_MULT2_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_conjugate_32fc HAVE_VOLK_MULT2_CONJ_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_multiply_32fc HAVE_VOLK_MULT_REAL_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_multiply_32f HAVE_VOLK_MULT_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_32f HAVE_VOLK_MAG_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_squared_32f HAVE_VOLK_MAG_SQUARE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_divide_32f HAVE_VOLK_DIVIDE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_dot_prod_32fc HAVE_VOLK_DOTPROD_FC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_dot_prod_32fc HAVE_VOLK_DOTPROD_CFC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_conjugate_dot_prod_32fc HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_dot_prod_32f HAVE_VOLK_DOTPROD_F_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32f_atan2_32f HAVE_VOLK_ATAN_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_convert_16i HAVE_VOLK_CONVERT_FI_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_32f_x2 HAVE_VOLK_DEINTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_interleave_32fc HAVE_VOLK_INTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_subtract_32f HAVE_VOLK_SUB_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_add_32f HAVE_VOLK_ADD_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_square_dist_32f HAVE_VOLK_SQUARE_DIST_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_real_32f HAVE_VOLK_DEINTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_16u HAVE_VOLK_MAX_ABS_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_multiply_32f HAVE_VOLK_MULT_REAL2_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_16i_max_star_16i HAVE_VOLK_MAX_STAR_S_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_8i_convert_16i HAVE_VOLK_CONVERT_CI_FUNCTION) - - - - SET(VOLK_DEFINITIONS "HAVE_VOLK") - IF(${HAVE_VOLK_CONVERT_CI_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_CI_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_STAR_S_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_STAR_S_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_ABS_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_VEC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_VEC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAG_SQUARE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_SQUARE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_SQUARE_DIST_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SQUARE_DIST_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_INTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_INTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_SUB_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SUB_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ADD_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ADD_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT2_CONJ_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_CONJ_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONVERT_FI_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_FI_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ACC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ACC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONJ_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONJ_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT2_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_REAL_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAG_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DIVIDE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DIVIDE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_FC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_FC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_F_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_F_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ATAN_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ATAN_FUNCTION") - ENDIF() -ENDIF(VOLK_FOUND) diff --git a/srslte/CMakeLists.txt b/srslte/CMakeLists.txt index 0ecf9ddf6..82daf4df2 100644 --- a/srslte/CMakeLists.txt +++ b/srslte/CMakeLists.txt @@ -74,25 +74,6 @@ else(BLADERF_FOUND OR UHD_FOUND) add_definitions(-DDISABLE_RF) endif(BLADERF_FOUND OR UHD_FOUND) -include(CheckFunctionExistsMath) -if(${DISABLE_VOLK}) - if(${DISABLE_VOLK} EQUAL 0) - find_package(Volk) - else(${DISABLE_VOLK} EQUAL 0) - message(STATUS "VOLK library disabled (DISABLE_VOLK=1)") - endif(${DISABLE_VOLK} EQUAL 0) -else(${DISABLE_VOLK}) - find_package(Volk) -endif(${DISABLE_VOLK}) - -if(VOLK_FOUND) - include_directories(${VOLK_INCLUDE_DIRS}) - link_directories(${VOLK_LIBRARY_DIRS}) - message(STATUS " Compiling with VOLK SIMD library.") -else(VOLK_FOUND) - message(STATUS " VOLK SIMD library NOT found. Using generic implementation.") -endif(VOLK_FOUND) - ######################################################################## # Add subdirectories ######################################################################## diff --git a/srslte/include/srslte/utils/vector_simd.h b/srslte/include/srslte/utils/vector_simd.h index cd6eb4d28..cfdef5ecd 100644 --- a/srslte/include/srslte/utils/vector_simd.h +++ b/srslte/include/srslte/utils/vector_simd.h @@ -49,6 +49,26 @@ SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); + +SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len); + +SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len); + +SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len); + +SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); + +SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); + +SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len); + +SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len); + +SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len); + +SRSLTE_API void srslte_vec_convert_if_simd(int16_t *x, float *z, float scale, uint32_t len); + #ifdef __cplusplus } #endif diff --git a/srslte/lib/CMakeLists.txt b/srslte/lib/CMakeLists.txt index 3a73d8761..1e7b11df6 100644 --- a/srslte/lib/CMakeLists.txt +++ b/srslte/lib/CMakeLists.txt @@ -90,13 +90,6 @@ if(RF_FOUND) endif(BLADERF_FOUND) endif(RF_FOUND) -if(VOLK_FOUND) - target_link_libraries(srslte ${VOLK_LIBRARIES}) - if(NOT DisableMEX) - target_link_libraries(srslte_static ${VOLK_LIBRARIES}) - endif(NOT DisableMEX) -endif(VOLK_FOUND) - INSTALL(TARGETS srslte DESTINATION ${LIBRARY_DIR}) SRSLTE_SET_PIC(srslte) diff --git a/srslte/lib/utils/vector.c b/srslte/lib/utils/vector.c index 469320177..578a644c2 100644 --- a/srslte/lib/utils/vector.c +++ b/srslte/lib/utils/vector.c @@ -35,10 +35,6 @@ #include "srslte/utils/vector_simd.h" #include "srslte/utils/bit.h" -#ifdef HAVE_VOLK -#include "volk/volk.h" -#endif - int srslte_vec_acc_ii(int *x, uint32_t len) { int i; int z=0; @@ -48,19 +44,14 @@ int srslte_vec_acc_ii(int *x, uint32_t len) { return z; } +// Used in PRACH detector float srslte_vec_acc_ff(float *x, uint32_t len) { -#ifdef HAVE_VOLK_ACC_FUNCTION - float result; - volk_32f_accumulator_s32f(&result,x,len); - return result; -#else int i; float z=0; for (i=0;i y[i]) { @@ -752,17 +656,11 @@ void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) { z[i] = y[i]; } } -#endif } +// CP autocorr uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) { -#ifdef HAVE_VOLK_MAX_ABS_FUNCTION - uint32_t target=0; - volk_32fc_index_max_16u(&target,x,len); - return target; - -#else uint32_t i; float m=-FLT_MAX; uint32_t p=0; @@ -775,7 +673,6 @@ uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) { } } return p; -#endif } void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len) { diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index 1612f2c07..01a3d4c64 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -280,3 +280,173 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len) } #endif } + + +// for enb no-volk +void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) { +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int points = len / 4; + + const float* xPtr = (const float*) x; + const float* yPtr = (const float*) y; + float* zPtr = (float*) z; + + __m128 xVal, yVal, zVal; + for(;number < points; number++){ + + xVal = _mm_load_ps(xPtr); + yVal = _mm_load_ps(yPtr); + + zVal = _mm_add_ps(xVal, yVal); + + _mm_store_ps(zPtr, zVal); + + xPtr += 4; + yPtr += 4; + zPtr += 4; + } + + number = points * 4; + for(;number < len; number++){ + z[number] = x[number] + y[number]; + } +#endif +} + +static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) { + __m128 yl, yh, tmp1, tmp2; + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br + tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di +} + +void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) +{ +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int halfPoints = len / 2; + + __m128 xVal, yVal, zVal; + float* zPtr = (float*) z; + const float* xPtr = (const float*) x; + const float* yPtr = (const float*) y; + + for(; number < halfPoints; number++){ + xVal = _mm_load_ps(xPtr); + yVal = _mm_load_ps(yPtr); + zVal = _mm_complexmul_ps(xVal, yVal); + _mm_store_ps(zPtr, zVal); + + xPtr += 4; + yPtr += 4; + zPtr += 4; + } + + if((len % 2) != 0){ + *zPtr = (*xPtr) * (*yPtr); + } +#endif +} + +static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) { + const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); + y = _mm_xor_ps(y, conjugator); + return _mm_complexmul_ps(x, y); +} + +void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) { +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int halfPoints = len / 2; + + __m128 xVal, yVal, zVal; + float* zPtr = (float*) z; + const float* xPtr = (const float*) x; + const float* yPtr = (const float*) y; + + for(; number < halfPoints; number++){ + xVal = _mm_load_ps(xPtr); + yVal = _mm_load_ps(yPtr); + zVal = _mm_complexmulconj_ps(xVal, yVal); + _mm_store_ps(zPtr, zVal); + + xPtr += 4; + yPtr += 4; + zPtr += 4; + } + + if((len % 2) != 0){ + *zPtr = (*xPtr) * (*yPtr); + } +#endif +} + +void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len) { +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int halfPoints = len / 2; + + __m128 xVal, yl, yh, zVal, tmp1, tmp2; + float* zPtr = (float*) z; + const float* xPtr = (const float*) x; + + // Set up constant scalar vector + yl = _mm_set_ps1(creal(h)); + yh = _mm_set_ps1(cimag(h)); + + for(;number < halfPoints; number++){ + + xVal = _mm_load_ps(xPtr); + tmp1 = _mm_mul_ps(xVal,yl); + xVal = _mm_shuffle_ps(xVal,xVal,0xB1); + tmp2 = _mm_mul_ps(xVal,yh); + zVal = _mm_addsub_ps(tmp1,tmp2); + _mm_storeu_ps(zPtr,zVal); + + xPtr += 4; + zPtr += 4; + } + + if((len % 2) != 0) { + *zPtr = (*xPtr) * h; + } +#endif +} + +void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) { +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int quarterPoints = len / 4; + + const float* xPtr = (const float*) x; + float* zPtr = z; + + __m128 xVal1, xVal2, zVal; + for(; number < quarterPoints; number++){ + xVal1 = _mm_load_ps(xPtr); + xPtr += 4; + xVal2 = _mm_load_ps(xPtr); + xPtr += 4; + xVal1 = _mm_mul_ps(xVal1, xVal1); + xVal2 = _mm_mul_ps(xVal2, xVal2); + zVal = _mm_hadd_ps(xVal1, xVal2); + _mm_store_ps(zPtr, zVal); + zPtr += 4; + } + + number = quarterPoints * 4; + for(; number < len; number++){ + float val1Real = *xPtr++; + float val1Imag = *xPtr++; + *zPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +#endif +} + + + + From 40c161c2e6f74ed62765d7a6767f8d4a98295a8f Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 20 Jan 2017 11:50:16 +0100 Subject: [PATCH 02/16] ifdef for simd functions --- srslte/lib/utils/vector_simd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index 01a3d4c64..c150209ca 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -314,6 +314,7 @@ void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) { #endif } +#ifdef LV_HAVE_SSE static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) { __m128 yl, yh, tmp1, tmp2; yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr @@ -323,6 +324,7 @@ static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) { tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di } +#endif void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) { @@ -352,11 +354,13 @@ void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) #endif } +#ifdef LV_HAVE_SSE static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) { const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); y = _mm_xor_ps(y, conjugator); return _mm_complexmul_ps(x, y); } +#endif void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) { #ifdef LV_HAVE_SSE From 949d4b8df84a1f7895c03375c50ce6ed77eac155 Mon Sep 17 00:00:00 2001 From: Andre Puschmann Date: Wed, 25 Jan 2017 10:18:27 +0100 Subject: [PATCH 03/16] novolk: use unaligned load/store SSE intrinsics, allow debug builds --- srslte/lib/fec/rm_turbo.c | 13 +++++++++---- srslte/lib/utils/vector_simd.c | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/srslte/lib/fec/rm_turbo.c b/srslte/lib/fec/rm_turbo.c index 5856a4e46..751c9fe0c 100644 --- a/srslte/lib/fec/rm_turbo.c +++ b/srslte/lib/fec/rm_turbo.c @@ -327,8 +327,11 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - int16_t x = (int16_t) _mm_extract_epi16(xVal, j); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); + // For -O0 builds: shuffle j-th element to pos 0 and extract from there + _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); + int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); + _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); output[l] += x; } xPtr ++; @@ -346,8 +349,10 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - int16_t x = (int16_t) _mm_extract_epi16(xVal, j); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); + _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); + int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); + _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); output[l] += x; } xPtr++; diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index c150209ca..8d91c5f42 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -227,8 +227,10 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l lutVal = _mm_load_si128(lutPtr); for (int i=0;i<8;i++) { - int16_t x = (int16_t) _mm_extract_epi16(xVal, i); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, i); + _mm_shuffle_epi8(xVal,_mm_set1_epi8(i)); + int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); + _mm_shuffle_epi8(lutVal,_mm_set1_epi8(i)); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); y[l] = x; } xPtr ++; @@ -295,12 +297,12 @@ void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) { __m128 xVal, yVal, zVal; for(;number < points; number++){ - xVal = _mm_load_ps(xPtr); - yVal = _mm_load_ps(yPtr); + xVal = _mm_loadu_ps(xPtr); + yVal = _mm_loadu_ps(yPtr); zVal = _mm_add_ps(xVal, yVal); - _mm_store_ps(zPtr, zVal); + _mm_storeu_ps(zPtr, zVal); xPtr += 4; yPtr += 4; @@ -338,10 +340,10 @@ void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) const float* yPtr = (const float*) y; for(; number < halfPoints; number++){ - xVal = _mm_load_ps(xPtr); - yVal = _mm_load_ps(yPtr); + xVal = _mm_loadu_ps(xPtr); + yVal = _mm_loadu_ps(yPtr); zVal = _mm_complexmul_ps(xVal, yVal); - _mm_store_ps(zPtr, zVal); + _mm_storeu_ps(zPtr, zVal); xPtr += 4; yPtr += 4; From 78de0c718b9f3059d7e47c8c2ce9b661fcdded8e Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 25 Jan 2017 15:20:47 +0100 Subject: [PATCH 04/16] fixed alignment problem in vec_abs_simd --- srslte/lib/utils/vector_simd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index c150209ca..25c1d2b7d 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -431,14 +431,14 @@ void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) { __m128 xVal1, xVal2, zVal; for(; number < quarterPoints; number++){ - xVal1 = _mm_load_ps(xPtr); + xVal1 = _mm_loadu_ps(xPtr); xPtr += 4; - xVal2 = _mm_load_ps(xPtr); + xVal2 = _mm_loadu_ps(xPtr); xPtr += 4; xVal1 = _mm_mul_ps(xVal1, xVal1); xVal2 = _mm_mul_ps(xVal2, xVal2); zVal = _mm_hadd_ps(xVal1, xVal2); - _mm_store_ps(zPtr, zVal); + _mm_storeu_ps(zPtr, zVal); zPtr += 4; } From f2a35c6dd1ec58e3db91029b50e7252ec441e6d2 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 25 Jan 2017 17:30:16 +0100 Subject: [PATCH 05/16] fixed tests with new simd functions --- CMakeLists.txt | 2 - srslte/include/srslte/utils/vector_simd.h | 2 + srslte/lib/utils/vector.c | 11 +-- srslte/lib/utils/vector_simd.c | 109 ++++++++++++++-------- 4 files changed, 77 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 206139295..ae8e9c532 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,9 +115,7 @@ if(CMAKE_COMPILER_IS_GNUCC) if (HAVE_AVX2) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx2 -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") else (HAVE_AVX2) - message("NOT HAVE AVX2") if(HAVE_AVX) - message("HAVE AVX") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") elseif(HAVE_SSE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -msse4.1 -Ofast -funroll-loops -DLV_HAVE_SSE") diff --git a/srslte/include/srslte/utils/vector_simd.h b/srslte/include/srslte/utils/vector_simd.h index cfdef5ecd..81aed443f 100644 --- a/srslte/include/srslte/utils/vector_simd.h +++ b/srslte/include/srslte/utils/vector_simd.h @@ -51,6 +51,8 @@ SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, ui SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); + SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len); diff --git a/srslte/lib/utils/vector.c b/srslte/lib/utils/vector.c index 578a644c2..52e8b54d0 100644 --- a/srslte/lib/utils/vector.c +++ b/srslte/lib/utils/vector.c @@ -200,20 +200,19 @@ void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) { } // Used throughout -void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) { -#ifndef LV_HAVE_SSE +void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) { + #ifndef LV_HAVE_SSE int i; for (i=0;i Date: Wed, 25 Jan 2017 17:38:22 +0100 Subject: [PATCH 06/16] restored rm_turbo (test not passing) --- srslte/lib/fec/rm_turbo.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/srslte/lib/fec/rm_turbo.c b/srslte/lib/fec/rm_turbo.c index 751c9fe0c..31cbd82ca 100644 --- a/srslte/lib/fec/rm_turbo.c +++ b/srslte/lib/fec/rm_turbo.c @@ -327,11 +327,8 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - // For -O0 builds: shuffle j-th element to pos 0 and extract from there - _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); - int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); - _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); + int16_t x = (int16_t) _mm_extract_epi16(xVal, j); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); output[l] += x; } xPtr ++; @@ -349,10 +346,8 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); - int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); - _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); + int16_t x = (int16_t) _mm_extract_epi16(xVal, j); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); output[l] += x; } xPtr++; @@ -718,4 +713,3 @@ int srslte_rm_turbo_rx(float *w_buff, uint32_t w_buff_len, float *input, uint32_ return 0; } - From 2758ba4118bc9764552a5ea59c8eebe49d3f9c6a Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 25 Jan 2017 17:40:28 +0100 Subject: [PATCH 07/16] fixed lut in vector simd (now all tests passing) --- srslte/lib/utils/vector_simd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index ba30e190f..16bbbc0bb 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -227,10 +227,8 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l lutVal = _mm_loadu_si128(lutPtr); for (int i=0;i<8;i++) { - _mm_shuffle_epi8(xVal,_mm_set1_epi8(i)); - int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); - _mm_shuffle_epi8(lutVal,_mm_set1_epi8(i)); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); + int16_t x = (int16_t) _mm_extract_epi16(xVal, i); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, i); y[l] = x; } xPtr ++; From 9acb1002e96c6e5ed655bc885749030b75463132 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 26 Jan 2017 17:48:32 +0100 Subject: [PATCH 08/16] added more functions to simd for UE --- srslte/include/srslte/utils/vector_simd.h | 10 +- srslte/lib/utils/vector.c | 33 ++++- srslte/lib/utils/vector_simd.c | 164 +++++++++++++++++++++- 3 files changed, 193 insertions(+), 14 deletions(-) diff --git a/srslte/include/srslte/utils/vector_simd.h b/srslte/include/srslte/utils/vector_simd.h index 81aed443f..8380a75de 100644 --- a/srslte/include/srslte/utils/vector_simd.h +++ b/srslte/include/srslte/utils/vector_simd.h @@ -53,11 +53,15 @@ SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, uint32_t len); + SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len); -SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len); +SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len); + +SRSLTE_API void srslte_vec_sub_fff_simd(float *x, float *h, float *z, uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); @@ -65,6 +69,10 @@ SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len); +SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); + +SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); + SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len); diff --git a/srslte/lib/utils/vector.c b/srslte/lib/utils/vector.c index 52e8b54d0..10fe38165 100644 --- a/srslte/lib/utils/vector.c +++ b/srslte/lib/utils/vector.c @@ -44,7 +44,7 @@ int srslte_vec_acc_ii(int *x, uint32_t len) { return z; } -// Used in PRACH detector +// Used in PRACH detector, AGC and chest_dl for noise averaging float srslte_vec_acc_ff(float *x, uint32_t len) { int i; float z=0; @@ -79,10 +79,14 @@ void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t } void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) { +#ifndef LV_HAVE_SSE int i; for (i=0;i Date: Fri, 27 Jan 2017 11:56:59 +0000 Subject: [PATCH 09/16] Added GCC_ARCH option to manually set -march in GCC --- CMakeLists.txt | 15 ++++++++------- cmake/modules/FindSSE.cmake | 8 ++++---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ae8e9c532..d36de57aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,6 +54,7 @@ configure_file( option(DisableMEX "DisableMEX" ON) option(StaticMKL "StaticMKL" OFF) option(DisableBladeRF "DisableBladeRF" OFF) +set(GCC_ARCH native CACHE STRING "GCC compile for specific architecture.") ######################################################################## # Install Dirs @@ -101,24 +102,24 @@ endif(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_COMPILER_IS_GNUCC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-write-strings -Wno-format-extra-args -Winline -Wno-unused-result -Wno-format -std=c99 -D_GNU_SOURCE -g") + find_package(SSE) + if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") - find_package(SSE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0") if(HAVE_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx -DLV_HAVE_AVX -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -mavx -DLV_HAVE_AVX -DLV_HAVE_SSE") elseif(HAVE_SSE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -msse4.1 -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -msse4.1 -DLV_HAVE_SSE") endif(HAVE_AVX) else(${CMAKE_BUILD_TYPE} STREQUAL "Debug") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3") - find_package(SSE) if (HAVE_AVX2) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx2 -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -mavx2 -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") else (HAVE_AVX2) if(HAVE_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -mavx -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") elseif(HAVE_SSE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -msse4.1 -Ofast -funroll-loops -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -msse4.1 -Ofast -funroll-loops -DLV_HAVE_SSE") endif(HAVE_AVX) endif (HAVE_AVX2) diff --git a/cmake/modules/FindSSE.cmake b/cmake/modules/FindSSE.cmake index 7b258f70f..9dbbeef3e 100644 --- a/cmake/modules/FindSSE.cmake +++ b/cmake/modules/FindSSE.cmake @@ -1,9 +1,9 @@ include(CheckCSourceRuns) -option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) -option(ENABLE_AVX "Enable compile-time AVX support." ON) -option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) +option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) +option(ENABLE_AVX "Enable compile-time AVX support." ON) +option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) if (ENABLE_SSE) # @@ -96,4 +96,4 @@ if (ENABLE_SSE) endif() -mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2) \ No newline at end of file +mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2) From 979a590dc92e2a83aac74eb9b7c270031528ff97 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 7 Apr 2017 14:54:24 +0200 Subject: [PATCH 10/16] comment references to uhd::register_handler --- srslte/lib/rf/uhd_c_api.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/srslte/lib/rf/uhd_c_api.cpp b/srslte/lib/rf/uhd_c_api.cpp index 92af48f69..1b7772e68 100644 --- a/srslte/lib/rf/uhd_c_api.cpp +++ b/srslte/lib/rf/uhd_c_api.cpp @@ -9,6 +9,7 @@ extern "C" { #include "uhd_c_api.h" } +/* #if UHD_VERSION < 31100 static void (*handler)(const char*); @@ -26,6 +27,7 @@ void rf_uhd_register_msg_handler_c(void (*new_handler)(const char*)) uhd::msg::register_handler(translate_handler); #endif } +*/ void uhd_tx_metadata_set_time_spec(uhd_tx_metadata_handle *md, time_t secs, double frac_secs) { From 8e440f512ab1ac9490aad73a5688882c31fc70e1 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 7 Apr 2017 15:06:51 +0200 Subject: [PATCH 11/16] comment references to uhd::register_handler --- srslte/lib/rf/uhd_c_api.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/srslte/lib/rf/uhd_c_api.cpp b/srslte/lib/rf/uhd_c_api.cpp index 1b7772e68..93792722d 100644 --- a/srslte/lib/rf/uhd_c_api.cpp +++ b/srslte/lib/rf/uhd_c_api.cpp @@ -19,15 +19,17 @@ void translate_handler(uhd::msg::type_t type, const std::string & msg) handler(msg.c_str()); } #endif +*/ void rf_uhd_register_msg_handler_c(void (*new_handler)(const char*)) { +/* #if UHD_VERSION < 31100 handler = new_handler; uhd::msg::register_handler(translate_handler); #endif -} */ +} void uhd_tx_metadata_set_time_spec(uhd_tx_metadata_handle *md, time_t secs, double frac_secs) { From ec34d56e77082a66a01d45c825f7dce739c4d17e Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 8 Jun 2017 15:15:00 +0200 Subject: [PATCH 12/16] final merging changes --- CMakeLists.txt | 2 - lib/src/phy/rf/uhd_c_api.cpp | 4 -- lib/src/phy/utils/vector.c | 20 ++++-- lib/src/phy/utils/vector_simd.c | 8 +-- srslte/CMakeLists.txt | 84 ---------------------- srslte/include/srslte/utils/vector_simd.h | 86 ----------------------- 6 files changed, 17 insertions(+), 187 deletions(-) delete mode 100644 srslte/CMakeLists.txt delete mode 100644 srslte/include/srslte/utils/vector_simd.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cc2640e2..d7871d71d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,8 +195,6 @@ endmacro(ADD_CXX_COMPILER_FLAG_IF_AVAILABLE) if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${GCC_ARCH} -Wall -Wno-comment -Wno-reorder -Wno-unused-but-set-variable -Wno-unused-variable -std=c++03") - find_package(SSE) - if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -DDEBUG_MODE") else(${CMAKE_BUILD_TYPE} STREQUAL "Debug") diff --git a/lib/src/phy/rf/uhd_c_api.cpp b/lib/src/phy/rf/uhd_c_api.cpp index c0fb6bfd3..da348c17b 100644 --- a/lib/src/phy/rf/uhd_c_api.cpp +++ b/lib/src/phy/rf/uhd_c_api.cpp @@ -9,7 +9,6 @@ extern "C" { #include "uhd_c_api.h" } -/* #if UHD_VERSION < 31100 static void (*handler)(const char*); @@ -19,16 +18,13 @@ void translate_handler(uhd::msg::type_t type, const std::string & msg) handler(msg.c_str()); } #endif -*/ void rf_uhd_register_msg_handler_c(void (*new_handler)(const char*)) { -/* #if UHD_VERSION < 31100 handler = new_handler; uhd::msg::register_handler(translate_handler); #endif -*/ } void uhd_tx_metadata_set_time_spec(uhd_tx_metadata_handle *md, time_t secs, double frac_secs) diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 3146c760d..daa03d0b4 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -57,12 +57,18 @@ int srslte_vec_acc_ii(int *x, uint32_t len) { // Used in PRACH detector, AGC and chest_dl for noise averaging float srslte_vec_acc_ff(float *x, uint32_t len) { - int i; - float z=0; - for (i=0;i -#include -#include "srslte/config.h" - -SRSLTE_API int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len); - -SRSLTE_API void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len); - -SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len); - -SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, uint32_t len); - -SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len); - -SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len); - -SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len); - -SRSLTE_API void srslte_vec_sub_fff_simd(float *x, float *h, float *z, uint32_t len); - -SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); - -SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len); - -SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); - -SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); - -SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len); - -SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len); - -SRSLTE_API void srslte_vec_convert_if_simd(int16_t *x, float *z, float scale, uint32_t len); - -#ifdef __cplusplus -} -#endif - -#endif From 2bba9d187d32ce18fc0e752f423c637b10ab0335 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 7 Jul 2017 16:36:27 +0200 Subject: [PATCH 13/16] fixed dotprodconj. Removed unaligned load/store --- lib/src/phy/utils/vector.c | 2 +- lib/src/phy/utils/vector_simd.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 61d0271b6..8ceefe3da 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -608,7 +608,7 @@ cf_t srslte_vec_dot_prod_conj_ccc(cf_t *x, cf_t *y, uint32_t len) { uint32_t i; cf_t res = 0; for (i=0;i Date: Fri, 7 Jul 2017 18:04:59 +0200 Subject: [PATCH 14/16] missing return statement --- lib/include/srslte/common/metrics_hub.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/include/srslte/common/metrics_hub.h b/lib/include/srslte/common/metrics_hub.h index 8443ef65a..9575347a2 100644 --- a/lib/include/srslte/common/metrics_hub.h +++ b/lib/include/srslte/common/metrics_hub.h @@ -34,6 +34,7 @@ public: bool init(metrics_interface *m_, float report_period_secs=1.0) { m = m_; start_periodic(report_period_secs*1e6); + return true; } void stop() { thread_cancel(); @@ -47,7 +48,7 @@ private: void run_period() { metrics_t metric; m->get_metrics(metric); - for (int i=0;iset_metrics(metric); } } From f629e10fcfdf70cbc43b0e7c00a4eb18ac15f08b Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 7 Jul 2017 18:44:17 +0200 Subject: [PATCH 15/16] fixed bug in sub_sse() and added couple of avx functions --- lib/include/srslte/phy/utils/vector_simd.h | 4 ++ lib/src/phy/utils/vector.c | 12 +++- lib/src/phy/utils/vector_simd.c | 65 +++++++++++++++++++++- 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 6fe55f89d..1010cbed6 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -49,8 +49,12 @@ SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t l SRSLTE_API void srslte_vec_sum_fff_sse(float *x, float *y, float *z, uint32_t len); +SRSLTE_API void srslte_vec_sum_fff_avx(float *x, float *y, float *z, uint32_t len); + SRSLTE_API void srslte_vec_sub_fff_sse(float *x, float *y, float *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_fff_avx(float *x, float *y, float *z, uint32_t len); + SRSLTE_API void srslte_vec_sc_prod_fff_sse(float *x, float h, float *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_ccc_sse(cf_t *x, cf_t h, cf_t *z, uint32_t len); diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 8ceefe3da..2a2001ae3 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -101,9 +101,13 @@ void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) { for (i=0;i Date: Tue, 11 Jul 2017 13:17:26 +0200 Subject: [PATCH 16/16] fixed segfault due to race condition in scrambling sequence pre-generation --- lib/include/srslte/phy/phch/pdsch.h | 3 ++- lib/include/srslte/phy/phch/pucch.h | 3 ++- lib/include/srslte/phy/phch/pusch.h | 3 ++- lib/src/phy/phch/pdsch.c | 24 +++++++++++++----------- lib/src/phy/phch/pucch.c | 8 +++++--- lib/src/phy/phch/pusch.c | 27 ++++++++++++++------------- 6 files changed, 38 insertions(+), 30 deletions(-) diff --git a/lib/include/srslte/phy/phch/pdsch.h b/lib/include/srslte/phy/phch/pdsch.h index 7730d2fa1..ad01c4ef8 100644 --- a/lib/include/srslte/phy/phch/pdsch.h +++ b/lib/include/srslte/phy/phch/pdsch.h @@ -48,7 +48,8 @@ #include "srslte/phy/phch/pdsch_cfg.h" typedef struct { - srslte_sequence_t seq[SRSLTE_NSUBFRAMES_X_FRAME]; + srslte_sequence_t seq[SRSLTE_NSUBFRAMES_X_FRAME]; + bool sequence_generated; } srslte_pdsch_user_t; /* PDSCH object */ diff --git a/lib/include/srslte/phy/phch/pucch.h b/lib/include/srslte/phy/phch/pucch.h index 3542dc53f..56d512418 100644 --- a/lib/include/srslte/phy/phch/pucch.h +++ b/lib/include/srslte/phy/phch/pucch.h @@ -80,7 +80,8 @@ typedef struct SRSLTE_API { } srslte_pucch_cfg_t; typedef struct { - srslte_sequence_t seq_f2[SRSLTE_NSUBFRAMES_X_FRAME]; + srslte_sequence_t seq_f2[SRSLTE_NSUBFRAMES_X_FRAME]; + bool sequence_generated; } srslte_pucch_user_t; /* PUCCH object */ diff --git a/lib/include/srslte/phy/phch/pusch.h b/lib/include/srslte/phy/phch/pusch.h index bf04a4781..e5ee43995 100644 --- a/lib/include/srslte/phy/phch/pusch.h +++ b/lib/include/srslte/phy/phch/pusch.h @@ -61,7 +61,8 @@ typedef struct { } srslte_pusch_hopping_cfg_t; typedef struct { - srslte_sequence_t seq[SRSLTE_NSUBFRAMES_X_FRAME]; + srslte_sequence_t seq[SRSLTE_NSUBFRAMES_X_FRAME]; + bool sequences_generated; } srslte_pusch_user_t; /* PUSCH object */ diff --git a/lib/src/phy/phch/pdsch.c b/lib/src/phy/phch/pdsch.c index 8791ac8e7..9b6128c64 100644 --- a/lib/src/phy/phch/pdsch.c +++ b/lib/src/phy/phch/pdsch.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "prb_dl.h" #include "srslte/phy/phch/pdsch.h" @@ -362,6 +363,7 @@ int srslte_pdsch_set_rnti(srslte_pdsch_t *q, uint16_t rnti) { return SRSLTE_ERROR; } } + q->users[rnti]->sequence_generated = true; } } return SRSLTE_SUCCESS; @@ -467,15 +469,15 @@ int srslte_pdsch_decode_multi(srslte_pdsch_t *q, srslte_demod_soft_demodulate_s(cfg->grant.mcs.mod, q->d, q->e, cfg->nbits.nof_re); /* descramble */ - if (!q->users[rnti]) { - srslte_sequence_t seq; + if (q->users[rnti] && q->users[rnti]->sequence_generated) { + srslte_scrambling_s_offset(&q->users[rnti]->seq[cfg->sf_idx], q->e, 0, cfg->nbits.nof_bits); + } else { + srslte_sequence_t seq; if (srslte_sequence_pdsch(&seq, rnti, 0, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } - srslte_scrambling_s_offset(&seq, q->e, 0, cfg->nbits.nof_bits); + srslte_scrambling_s_offset(&seq, q->e, 0, cfg->nbits.nof_bits); srslte_sequence_free(&seq); - } else { - srslte_scrambling_s_offset(&q->users[rnti]->seq[cfg->sf_idx], q->e, 0, cfg->nbits.nof_bits); } if (SRSLTE_VERBOSE_ISDEBUG()) { @@ -537,15 +539,15 @@ int srslte_pdsch_encode(srslte_pdsch_t *q, } /* scramble */ - if (!q->users[rnti]) { - srslte_sequence_t seq; + if (q->users[rnti] && q->users[rnti]->sequence_generated) { + srslte_scrambling_bytes(&q->users[rnti]->seq[cfg->sf_idx], (uint8_t*) q->e, cfg->nbits.nof_bits); + } else { + srslte_sequence_t seq; if (srslte_sequence_pdsch(&seq, rnti, 0, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } srslte_scrambling_bytes(&seq, (uint8_t*) q->e, cfg->nbits.nof_bits); srslte_sequence_free(&seq); - } else { - srslte_scrambling_bytes(&q->users[rnti]->seq[cfg->sf_idx], (uint8_t*) q->e, cfg->nbits.nof_bits); } srslte_mod_modulate_bytes(&q->mod[cfg->grant.mcs.mod], (uint8_t*) q->e, q->d, cfg->nbits.nof_bits); diff --git a/lib/src/phy/phch/pucch.c b/lib/src/phy/phch/pucch.c index c58f69871..6a889b89c 100644 --- a/lib/src/phy/phch/pucch.c +++ b/lib/src/phy/phch/pucch.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "srslte/phy/ch_estimation/refsignal_ul.h" #include "srslte/phy/phch/pucch.h" @@ -489,7 +490,7 @@ void srslte_pucch_clear_rnti(srslte_pucch_t *q, uint16_t rnti) { int srslte_pucch_set_crnti(srslte_pucch_t *q, uint16_t rnti) { if (!q->users[rnti]) { - q->users[rnti] = malloc(sizeof(srslte_pucch_user_t)); + q->users[rnti] = calloc(1, sizeof(srslte_pucch_user_t)); if (q->users[rnti]) { for (uint32_t sf_idx=0;sf_idxusers[rnti]->sequence_generated = true; } } return SRSLTE_SUCCESS; @@ -591,7 +593,7 @@ static int uci_mod_bits(srslte_pucch_t *q, srslte_pucch_format_t format, uint8_t case SRSLTE_PUCCH_FORMAT_2: case SRSLTE_PUCCH_FORMAT_2A: case SRSLTE_PUCCH_FORMAT_2B: - if (q->users[rnti]) { + if (q->users[rnti] && q->users[rnti]->sequence_generated) { memcpy(q->bits_scram, bits, SRSLTE_PUCCH2_NOF_BITS*sizeof(uint8_t)); srslte_scrambling_b(&q->users[rnti]->seq_f2[sf_idx], q->bits_scram); srslte_mod_modulate(&q->mod, q->bits_scram, q->d, SRSLTE_PUCCH2_NOF_BITS); @@ -796,7 +798,7 @@ int srslte_pucch_decode(srslte_pucch_t* q, srslte_pucch_format_t format, case SRSLTE_PUCCH_FORMAT_2: case SRSLTE_PUCCH_FORMAT_2A: case SRSLTE_PUCCH_FORMAT_2B: - if (q->users[rnti]) { + if (q->users[rnti] && q->users[rnti]->sequence_generated) { pucch_encode_(q, format, n_pucch, sf_idx, rnti, NULL, ref, true); srslte_vec_prod_conj_ccc(q->z, ref, q->z_tmp, SRSLTE_PUCCH_MAX_SYMBOLS); for (int i=0;imax_re * srslte_mod_bits_x_symbol(SRSLTE_MOD_64QAM))) { return SRSLTE_ERROR; } - } + } + q->users[rnti]->sequences_generated = true; } } return SRSLTE_SUCCESS; @@ -444,15 +445,15 @@ int srslte_pusch_encode(srslte_pusch_t *q, srslte_pusch_cfg_t *cfg, srslte_softb return SRSLTE_ERROR; } - if (!q->users[rnti]) { - srslte_sequence_t seq; + if (q->users[rnti] && q->users[rnti]->sequences_generated) { + srslte_scrambling_bytes(&q->users[rnti]->seq[cfg->sf_idx], (uint8_t*) q->q, cfg->nbits.nof_bits); + } else { + srslte_sequence_t seq; if (srslte_sequence_pusch(&seq, rnti, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } - srslte_scrambling_bytes(&seq, (uint8_t*) q->q, cfg->nbits.nof_bits); + srslte_scrambling_bytes(&seq, (uint8_t*) q->q, cfg->nbits.nof_bits); srslte_sequence_free(&seq); - } else { - srslte_scrambling_bytes(&q->users[rnti]->seq[cfg->sf_idx], (uint8_t*) q->q, cfg->nbits.nof_bits); } // Correct UCI placeholder/repetition bits @@ -535,13 +536,13 @@ int srslte_pusch_decode(srslte_pusch_t *q, srslte_sequence_t *seq = NULL; // Create sequence if does not exist - if (!q->users[rnti]) { - seq = &q->tmp_seq; + if (q->users[rnti] && q->users[rnti]->sequences_generated) { + seq = &q->users[rnti]->seq[cfg->sf_idx]; + } else { + seq = &q->tmp_seq; if (srslte_sequence_pusch(seq, rnti, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } - } else { - seq = &q->users[rnti]->seq[cfg->sf_idx]; } // Decode RI/HARQ bits before descrambling @@ -553,7 +554,7 @@ int srslte_pusch_decode(srslte_pusch_t *q, // Descrambling srslte_scrambling_s_offset(seq, q->q, 0, cfg->nbits.nof_bits); - if (!q->users[rnti]) { + if (!(q->users[rnti] && q->users[rnti]->sequences_generated)) { srslte_sequence_free(seq); }