diff --git a/CMakeLists.txt b/CMakeLists.txt index d72bb5fef..efaa1973a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "") option(ENABLE_SRSUE "Build srsUE application" ON) option(ENABLE_SRSENB "Build srsENB application" ON) -option(ENABLE_VOLK "Enable use of VOLK SIMD library" ON) +option(ENABLE_VOLK "Enable use of VOLK SIMD library" OFF) option(ENABLE_GUI "Enable GUI (using srsGUI)" ON) option(ENABLE_BLADERF "Enable BladeRF" ON) @@ -282,6 +282,11 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") endif(HAVE_AVX) endif (HAVE_AVX2) + if (HAVE_AVX512) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") + endif(HAVE_AVX512) + if(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") if(HAVE_SSE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Ofast -funroll-loops") diff --git a/cmake/modules/FindSSE.cmake b/cmake/modules/FindSSE.cmake index 3440f01c1..4c9673a9d 100644 --- a/cmake/modules/FindSSE.cmake +++ b/cmake/modules/FindSSE.cmake @@ -4,10 +4,11 @@ include(CheckCSourceRuns) -option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) -option(ENABLE_AVX "Enable compile-time AVX support." ON) -option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) -option(ENABLE_FMA "Enable compile-time FMA support." ON) +option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) +option(ENABLE_AVX "Enable compile-time AVX support." ON) +option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) +option(ENABLE_FMA "Enable compile-time FMA support." ON) +option(ENABLE_AVX512 "Enable compile-time AVX512 support." ON) if (ENABLE_SSE) # @@ -135,6 +136,41 @@ if (ENABLE_SSE) endif() endif() + if (ENABLE_AVX512) + + # + # Check compiler for AVX intrinsics + # + if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG ) + set(CMAKE_REQUIRED_FLAGS "-mavx512f") + check_c_source_runs(" + #include + int main() + { + __m512i a, b, c; + const int src[16] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 , 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF}; + int dst[16]; + a = _mm512_loadu_si512( (__m512i*)src ); + b = _mm512_loadu_si512( (__m512i*)src ); + c = _mm512_add_epi32( a, b ); + _mm512_storeu_si512( (__m512i*)dst, c ); + int i = 0; + for( i = 0; i < 16; i++ ){ + if( ( src[i] + src[i] ) != dst[i] ){ + return -1; + } + } + return 0; + }" + HAVE_AVX512) + endif() + + if (HAVE_AVX512) + message(STATUS "AVX512 is enabled - target CPU must support it") + endif() + endif() + + endif() -mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2, HAVE_FMA) +mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2, HAVE_FMA, HAVE_AVX512) diff --git a/cmake/modules/FindVolk.cmake b/cmake/modules/FindVolk.cmake deleted file mode 100644 index 96262a3b8..000000000 --- a/cmake/modules/FindVolk.cmake +++ /dev/null @@ -1,161 +0,0 @@ -INCLUDE(FindPkgConfig) -PKG_CHECK_MODULES(PC_VOLK volk QUIET) - -FIND_PATH( - VOLK_INCLUDE_DIRS - NAMES volk/volk.h - HINTS $ENV{VOLK_DIR}/include - ${CMAKE_INSTALL_PREFIX}/include - ${PC_VOLK_INCLUDE_DIR} - PATHS /usr/local/include - /usr/include -) - -FIND_LIBRARY( - VOLK_LIBRARIES - NAMES volk - HINTS $ENV{VOLK_DIR}/lib - ${CMAKE_INSTALL_PREFIX}/lib - ${CMAKE_INSTALL_PREFIX}/lib64 - ${PC_VOLK_LIBDIR} - PATHS /usr/local/lib - /usr/local/lib64 - /usr/lib - /usr/lib64 -) - -INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS) -MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS VOLK_DEFINITIONS) - -IF(VOLK_FOUND) - SET(CMAKE_REQUIRED_LIBRARIES ${VOLK_LIBRARIES} m) - CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_16u HAVE_VOLK_MAX_FUNCTION_16) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_32u HAVE_VOLK_MAX_FUNCTION_32) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_max_32f HAVE_VOLK_MAX_VEC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_accumulator_s32f HAVE_VOLK_ACC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32fc_multiply_32fc HAVE_VOLK_MULT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_conjugate_32fc HAVE_VOLK_CONJ_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_32fc HAVE_VOLK_MULT2_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_conjugate_32fc HAVE_VOLK_MULT2_CONJ_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_multiply_32fc HAVE_VOLK_MULT_REAL_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_multiply_32f HAVE_VOLK_MULT_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_32f HAVE_VOLK_MAG_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_squared_32f HAVE_VOLK_MAG_SQUARE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_divide_32f HAVE_VOLK_DIVIDE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_dot_prod_32fc HAVE_VOLK_DOTPROD_FC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_dot_prod_32fc HAVE_VOLK_DOTPROD_CFC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_conjugate_dot_prod_32fc HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_dot_prod_32f HAVE_VOLK_DOTPROD_F_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32f_atan2_32f HAVE_VOLK_ATAN_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_convert_16i HAVE_VOLK_CONVERT_FI_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_32f_x2 HAVE_VOLK_DEINTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_interleave_32fc HAVE_VOLK_INTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_subtract_32f HAVE_VOLK_SUB_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_add_32f HAVE_VOLK_ADD_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_square_dist_32f HAVE_VOLK_SQUARE_DIST_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_real_32f HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_16u HAVE_VOLK_MAX_ABS_FUNCTION_16) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_32u HAVE_VOLK_MAX_ABS_FUNCTION_32) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_multiply_32f HAVE_VOLK_MULT_REAL2_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_16i_max_star_16i HAVE_VOLK_MAX_STAR_S_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_8i_convert_16i HAVE_VOLK_CONVERT_CI_FUNCTION) - - - - SET(VOLK_DEFINITIONS "HAVE_VOLK") - IF(${HAVE_VOLK_CONVERT_IF_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_IF_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_REAL2_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL2_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONVERT_CI_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_CI_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_STAR_S_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_STAR_S_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_ABS_FUNCTION_16}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION_16") - ENDIF() - IF(${HAVE_VOLK_MAX_ABS_FUNCTION_32}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION_32") - ENDIF() - IF(${HAVE_VOLK_MAX_VEC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_VEC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAG_SQUARE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_SQUARE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_SQUARE_DIST_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SQUARE_DIST_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_INTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_INTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_SUB_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SUB_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ADD_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ADD_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT2_CONJ_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_CONJ_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONVERT_FI_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_FI_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_FUNCTION_16}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION_16") - ENDIF() - IF(${HAVE_VOLK_MAX_FUNCTION_32}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION_32") - ENDIF() - IF(${HAVE_VOLK_ACC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ACC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONJ_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONJ_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT2_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_REAL_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAG_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DIVIDE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DIVIDE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_FC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_FC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_CFC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CFC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_F_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_F_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ATAN_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ATAN_FUNCTION") - ENDIF() -ENDIF(VOLK_FOUND) diff --git a/lib/examples/cell_measurement.c b/lib/examples/cell_measurement.c index 37796682f..bfb8194df 100644 --- a/lib/examples/cell_measurement.c +++ b/lib/examples/cell_measurement.c @@ -249,7 +249,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Error initiating ue_sync\n"); return -1; } - if (srslte_ue_dl_init(&ue_dl, cell.nof_prb, 1)) { + if (srslte_ue_dl_init(&ue_dl, sf_buffer, cell.nof_prb, 1)) { fprintf(stderr, "Error initiating UE downlink processing module\n"); return -1; } @@ -257,7 +257,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Error initiating UE downlink processing module\n"); return -1; } - if (srslte_ue_mib_init(&ue_mib, cell.nof_prb)) { + if (srslte_ue_mib_init(&ue_mib, sf_buffer, cell.nof_prb)) { fprintf(stderr, "Error initaiting UE MIB decoder\n"); return -1; } @@ -271,8 +271,16 @@ int main(int argc, char **argv) { /* Initialize subframe counter */ sf_cnt = 0; - - if (srslte_ofdm_rx_init(&fft, cell.cp, cell.nof_prb)) { + + int sf_re = SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp); + + cf_t *sf_symbols = srslte_vec_malloc(sf_re * sizeof(cf_t)); + + for (int i=0;i /* SSE implementation for complex reciprocal */ SRSLTE_API __m128 srslte_mat_cf_recip_sse(__m128 a); @@ -90,8 +83,6 @@ SRSLTE_API void srslte_mat_2x2_mmse_sse(__m128 y0, __m128 y1, #ifdef LV_HAVE_AVX -#include - /* AVX implementation for complex reciprocal */ SRSLTE_API __m256 srslte_mat_cf_recip_avx(__m256 a); diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 420d07213..09e9cff8e 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -27,6 +27,18 @@ #ifndef SRSLTE_SIMD_H_H #define SRSLTE_SIMD_H_H +#ifdef LV_HAVE_SSE /* AVX, AVX2, FMA, AVX512 are in this group */ +#ifndef __OPTIMIZE__ +#define __OPTIMIZE__ +#endif +#include +#endif /* LV_HAVE_SSE */ +#include + +#ifdef HAVE_NEON +#include +#endif + /* * SSE Macros */ @@ -44,7 +56,7 @@ /* * AVX Macros */ -#ifdef LV_HAVE_AVX +#ifdef LV_HAVE_AVX2 #define _MM256_MULJ_PS(X) _mm256_permute_ps(_MM256_CONJ_PS(X), 0b10110001) #define _MM256_CONJ_PS(X) (_mm256_xor_ps(X, _mm256_set_ps(-0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f))) @@ -60,7 +72,7 @@ #define _MM256_PROD_PS(a, b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),\ _mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b))) #endif /* LV_HAVE_FMA */ -#endif /* LV_HAVE_AVX */ +#endif /* LV_HAVE_AVX2 */ /* @@ -78,4 +90,1415 @@ _mm256_fmsubadd_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C)) #endif /* LV_HAVE_FMA */ + + +/* Memory Sizes for Single Floating Point and fixed point */ +#ifdef LV_HAVE_AVX512 + +#define SRSLTE_SIMD_F_SIZE 16 +#define SRSLTE_SIMD_CF_SIZE 16 + +#define SRSLTE_SIMD_I_SIZE 16 + +#define SRSLTE_SIMD_S_SIZE 32 +#define SRSLTE_SIMD_C16_SIZE 0 + +#else +#ifdef LV_HAVE_AVX2 + +#define SRSLTE_SIMD_F_SIZE 8 +#define SRSLTE_SIMD_CF_SIZE 8 + +#define SRSLTE_SIMD_I_SIZE 8 + +#define SRSLTE_SIMD_S_SIZE 16 +#define SRSLTE_SIMD_C16_SIZE 16 + +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + +#define SRSLTE_SIMD_F_SIZE 4 +#define SRSLTE_SIMD_CF_SIZE 4 + +#define SRSLTE_SIMD_I_SIZE 4 + +#define SRSLTE_SIMD_S_SIZE 8 +#define SRSLTE_SIMD_C16_SIZE 8 + +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + +#define SRSLTE_SIMD_F_SIZE 4 +#define SRSLTE_SIMD_CF_SIZE 4 + +#define SRSLTE_SIMD_I_SIZE 4 + +#define SRSLTE_SIMD_S_SIZE 8 +#define SRSLTE_SIMD_C16_SIZE 8 + +#else /* LV_HAVE_NEON */ +#define SRSLTE_SIMD_F_SIZE 0 +#define SRSLTE_SIMD_CF_SIZE 0 + +#define SRSLTE_SIMD_I_SIZE 0 + +#define SRSLTE_SIMD_S_SIZE 0 +#define SRSLTE_SIMD_C16_SIZE 0 + +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + + + +#if SRSLTE_SIMD_F_SIZE + +/* Data types */ +#ifdef LV_HAVE_AVX512 +typedef __m512 simd_f_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256 simd_f_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128 simd_f_t; +#else /* HAVE_NEON */ +#ifdef HAVE_NEON +typedef float32x4_t simd_f_t; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + +/* Single precision Floating point functions */ +static inline simd_f_t srslte_simd_f_load(float *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_ps(ptr); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_ps(ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_load_ps(ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_f32(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_loadu(float *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_loadu_ps(ptr); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + return _mm256_loadu_ps(ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_loadu_ps(ptr); +#else /* LV_HAVE_SSE */ + #ifdef HAVE_NEON + return vld1q_f32(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_f_store(float *ptr, simd_f_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_ps(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_ps(ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_ps(ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_f32(ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_f_storeu(float *ptr, simd_f_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_ps(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + _mm256_storeu_ps(ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_ps(ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_f32(ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_set1(float x) { +#ifdef LV_HAVE_AVX512 + return _mm512_set1_ps(x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_set1_ps(x); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_set1_ps(x); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vdupq_n_f32(x); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_mul(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_mul_ps(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_mul_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_mul_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vmulq_f32(a,b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_rcp(simd_f_t a) { +#ifdef LV_HAVE_AVX512 + return _mm512_rcp14_ps(a); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_rcp_ps(a); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_rcp_ps(a); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vmulq_f32(vrecpeq_f32(a), vrecpsq_f32(vrecpeq_f32(a), a)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + __m512 r = _mm512_add_ps(a, b); + return _mm512_mask_sub_ps(r, 0b0101010101010101, a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_addsub_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_addsub_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON + float* a_ptr = (float*) &a; + float* b_ptr = (float*) &b; + simd_f_t ret; + float* c_ptr = (float*) &ret; + for(int i = 0; i<4;i++){ + if(i%2==0){ + c_ptr[i] = a_ptr[i] - b_ptr[i]; + }else{ + c_ptr[i] = a_ptr[i] + b_ptr[i]; + } + } + + return ret; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + + + + +static inline simd_f_t srslte_simd_f_sub(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_sub_ps(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_sub_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_sub_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vsubq_f32(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_add(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_add_ps(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_add_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_add_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vaddq_f32(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_zero (void) { +#ifdef LV_HAVE_AVX512 + return _mm512_setzero_ps(); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_setzero_ps(); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_setzero_ps(); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vdupq_n_f32(0); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_swap(simd_f_t a) { +#ifdef LV_HAVE_AVX512 + return _mm512_permute_ps(a, 0b10110001); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_permute_ps(a, 0b10110001); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_shuffle_ps(a, a, 0b10110001); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vcombine_f32(vrev64_f32(vget_low_f32(a)), vrev64_f32(vget_high_f32(a))); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_hadd(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + const __m512i idx1 = _mm512_setr_epi32((0b00000), (0b00010), + (0b00100), (0b00110), + (0b01000), (0b01010), + (0b01100), (0b01110), + (0b10000), (0b10010), + (0b10100), (0b10110), + (0b11000), (0b11010), + (0b11100), (0b11110)); + const __m512i idx2 = _mm512_or_epi32(idx1, _mm512_set1_epi32(1)); + + simd_f_t a1 = _mm512_permutex2var_ps(a, idx1, b); + simd_f_t b1 = _mm512_permutex2var_ps(a, idx2, b); + return _mm512_add_ps(a1, b1); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + simd_f_t a1 = _mm256_permute2f128_ps(a, b, 0b00100000); + simd_f_t b1 = _mm256_permute2f128_ps(a, b, 0b00110001); + return _mm256_hadd_ps(a1, b1); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_hadd_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vcombine_f32( vpadd_f32( vget_low_f32(a), vget_high_f32(a) ), vpadd_f32( vget_low_f32(b), vget_high_f32(b) ) ); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_sqrt(simd_f_t a) { +#ifdef LV_HAVE_AVX512 + return _mm512_sqrt_ps(a); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + return _mm256_sqrt_ps(a); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_sqrt_ps(a); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + float32x4_t sqrt_reciprocal = vrsqrteq_f32(a); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a,sqrt_reciprocal), sqrt_reciprocal),sqrt_reciprocal); + return vmulq_f32(a,sqrt_reciprocal); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_F_SIZE */ + + +#if SRSLTE_SIMD_CF_SIZE + +#ifdef HAVE_NEON + typedef float32x4x2_t simd_cf_t; +#else +typedef struct { + simd_f_t re; + simd_f_t im; + +} simd_cf_t; +#endif + +/* Complex Single precission Floating point functions */ +static inline simd_cf_t srslte_simd_cfi_load(cf_t *ptr) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + __m512 in1 = _mm512_load_ps((float*)(ptr)); + __m512 in2 = _mm512_load_ps((float*)(ptr + SRSLTE_SIMD_CF_SIZE/2)); + ret.re = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x00, 0x02, 0x04, 0x06, + 0x08, 0x0A, 0x0C, 0x0E, + 0x10, 0x12, 0x14, 0x16, + 0x18, 0x1A, 0x1C, 0x1E), in2); + ret.im = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x01, 0x03, 0x05, 0x07, + 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, + 0x19, 0x1B, 0x1D, 0x1F), in2); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 in1 = _mm256_permute_ps(_mm256_load_ps((float*)(ptr)), 0b11011000); + __m256 in2 = _mm256_permute_ps(_mm256_load_ps((float*)(ptr + 4)), 0b11011000); + ret.re = _mm256_unpacklo_ps(in1, in2); + ret.im = _mm256_unpackhi_ps(in1, in2); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + __m128 i1 = _mm_load_ps((float*)(ptr)); + __m128 i2 = _mm_load_ps((float*)(ptr + 2)); + ret.re = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(2,0,2,0)); + ret.im = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(3,1,3,1)); +#else +#ifdef HAVE_NEON + ret = vld2q_f32((float*)(ptr)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +/* Complex Single precission Floating point functions */ +static inline simd_cf_t srslte_simd_cfi_loadu(cf_t *ptr) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + __m512 in1 = _mm512_loadu_ps((float*)(ptr)); + __m512 in2 = _mm512_loadu_ps((float*)(ptr + SRSLTE_SIMD_CF_SIZE/2)); + ret.re = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x00, 0x02, 0x04, 0x06, + 0x08, 0x0A, 0x0C, 0x0E, + 0x10, 0x12, 0x14, 0x16, + 0x18, 0x1A, 0x1C, 0x1E), in2); + ret.im = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x01, 0x03, 0x05, 0x07, + 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, + 0x19, 0x1B, 0x1D, 0x1F), in2); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + __m256 in1 = _mm256_permute_ps(_mm256_loadu_ps((float*)(ptr)), 0b11011000); + __m256 in2 = _mm256_permute_ps(_mm256_loadu_ps((float*)(ptr + 4)), 0b11011000); + ret.re = _mm256_unpacklo_ps(in1, in2); + ret.im = _mm256_unpackhi_ps(in1, in2); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + __m128 i1 = _mm_loadu_ps((float*)(ptr)); + __m128 i2 = _mm_loadu_ps((float*)(ptr + 2)); + ret.re = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(2,0,2,0)); + ret.im = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(3,1,3,1)); +#else +#ifdef HAVE_NEON + ret = vld2q_f32((float*)(ptr)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_load(float *re, float *im) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_load_ps(re); + ret.im = _mm512_load_ps(im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_load_ps(re); + ret.im = _mm256_load_ps(im); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_load_ps(re); + ret.im = _mm_load_ps(im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + ret.val[0] = vld1q_f32(re); + ret.val[1] = vld1q_f32(im); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_loadu(float *re, float *im) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_loadu_ps(re); + ret.im = _mm512_loadu_ps(im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_loadu_ps(re); + ret.im = _mm256_loadu_ps(im); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_loadu_ps(re); + ret.im = _mm_loadu_ps(im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + ret.val[0] = vld1q_f32(re); + ret.val[1] = vld1q_f32(im); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline void srslte_simd_cfi_store(cf_t *ptr, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + __m512 s1 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x00, 0x10, 0x01, 0x11, + 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, + 0x06, 0x16, 0x07, 0x17), simdreg.im); + __m512 s2 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x08, 0x18, 0x09, 0x19, + 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, + 0x0E, 0x1E, 0x0F, 0x1F), simdreg.im); + _mm512_store_ps((float*)(ptr), s1); + _mm512_store_ps((float*)(ptr + 8), s2); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 out1 = _mm256_permute_ps(simdreg.re, 0b11011000); + __m256 out2 = _mm256_permute_ps(simdreg.im, 0b11011000); + _mm256_store_ps((float*)(ptr), _mm256_unpacklo_ps(out1, out2)); + _mm256_store_ps((float*)(ptr + 4), _mm256_unpackhi_ps(out1, out2)); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_ps((float*)(ptr), _mm_unpacklo_ps(simdreg.re, simdreg.im)); + _mm_store_ps((float*)(ptr + 2), _mm_unpackhi_ps(simdreg.re, simdreg.im)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_f32((float*)(ptr), simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_cfi_storeu(cf_t *ptr, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + __m512 s1 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x00, 0x10, 0x01, 0x11, + 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, + 0x06, 0x16, 0x07, 0x17), simdreg.im); + __m512 s2 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x08, 0x18, 0x09, 0x19, + 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, + 0x0E, 0x1E, 0x0F, 0x1F), simdreg.im); + _mm512_storeu_ps((float*)(ptr), s1); + _mm512_storeu_ps((float*)(ptr + 8), s2); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 out1 = _mm256_permute_ps(simdreg.re, 0b11011000); + __m256 out2 = _mm256_permute_ps(simdreg.im, 0b11011000); + _mm256_storeu_ps((float*)(ptr), _mm256_unpacklo_ps(out1, out2)); + _mm256_storeu_ps((float*)(ptr + 4), _mm256_unpackhi_ps(out1, out2)); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_ps((float*)(ptr), _mm_unpacklo_ps(simdreg.re, simdreg.im)); + _mm_storeu_ps((float*)(ptr + 2), _mm_unpackhi_ps(simdreg.re, simdreg.im)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_f32((float*)(ptr), simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_cf_store(float *re, float *im, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_ps(re, simdreg.re); + _mm512_store_ps(im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_ps((float *) re, simdreg.re); + _mm256_store_ps((float *) im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_SSE + _mm_store_ps((float *) re, simdreg.re); + _mm_store_ps((float *) im, simdreg.im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst1q_f32((float *) re, simdreg.val[0]); + vst1q_f32((float *) im, simdreg.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_cf_storeu(float *re, float *im, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_ps(re, simdreg.re); + _mm512_storeu_ps(im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_storeu_ps((float *) re, simdreg.re); + _mm256_storeu_ps((float *) im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_SSE + _mm_storeu_ps((float *) re, simdreg.re); + _mm_storeu_ps((float *) im, simdreg.im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst1q_f32((float *) re, simdreg.val[0]); + vst1q_f32((float *) im, simdreg.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_cf_t srslte_simd_cf_set1 (cf_t x) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_set1_ps(__real__ x); + ret.im = _mm512_set1_ps(__imag__ x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_set1_ps(__real__ x); + ret.im = _mm256_set1_ps(__imag__ x); +#else +#ifdef LV_HAVE_SSE + ret.re = _mm_set1_ps(__real__ x); + ret.im = _mm_set1_ps(__imag__ x); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + ret.val[0] = vdupq_n_f32(__real__ x); + ret.val[1] = vdupq_n_f32(__imag__ x); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_prod (simd_cf_t a, simd_cf_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_sub_ps(_mm512_mul_ps(a.re, b.re), + _mm512_mul_ps(a.im, b.im)); + ret.im = _mm512_add_ps(_mm512_mul_ps(a.re, b.im), + _mm512_mul_ps(a.im, b.re)); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_sub_ps(_mm256_mul_ps(a.re, b.re), + _mm256_mul_ps(a.im, b.im)); + ret.im = _mm256_add_ps(_mm256_mul_ps(a.re, b.im), + _mm256_mul_ps(a.im, b.re)); +#else +#ifdef LV_HAVE_SSE + ret.re = _mm_sub_ps(_mm_mul_ps(a.re, b.re), + _mm_mul_ps(a.im, b.im)); + ret.im = _mm_add_ps(_mm_mul_ps(a.re, b.im), + _mm_mul_ps(a.im, b.re)); +#else +#ifdef HAVE_NEON + ret.val[0] = vsubq_f32(vmulq_f32(a.val[0],b.val[0]), + vmulq_f32(a.val[1],b.val[1])); + ret.val[1] = vaddq_f32(vmulq_f32(a.val[0],b.val[1]), + vmulq_f32(a.val[1],b.val[0])); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_conjprod (simd_cf_t a, simd_cf_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_add_ps(_mm512_mul_ps(a.re, b.re), + _mm512_mul_ps(a.im, b.im)); + ret.im = _mm512_sub_ps(_mm512_mul_ps(a.im, b.re), + _mm512_mul_ps(a.re, b.im)); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_add_ps(_mm256_mul_ps(a.re, b.re), + _mm256_mul_ps(a.im, b.im)); + ret.im = _mm256_sub_ps(_mm256_mul_ps(a.im, b.re), + _mm256_mul_ps(a.re, b.im)); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_add_ps(_mm_mul_ps(a.re, b.re), + _mm_mul_ps(a.im, b.im)); + ret.im = _mm_sub_ps(_mm_mul_ps(a.im, b.re), + _mm_mul_ps(a.re, b.im)); + #else +#ifdef HAVE_NEON + ret.val[0] = vaddq_f32(vmulq_f32(a.val[0],b.val[0]), + vmulq_f32(a.val[1],b.val[1])); + ret.val[1] = vsubq_f32(vmulq_f32(a.val[1],b.val[0]), + vmulq_f32(a.val[0],b.val[1])); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_add (simd_cf_t a, simd_cf_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_add_ps(a.re, b.re); + ret.im = _mm512_add_ps(a.im, b.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_add_ps(a.re, b.re); + ret.im = _mm256_add_ps(a.im, b.im); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_add_ps(a.re, b.re); + ret.im = _mm_add_ps(a.im, b.im); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + ret.val[0] = vaddq_f32(a.val[0],b.val[0]); + ret.val[1] = vaddq_f32(a.val[1],b.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_mul (simd_cf_t a, simd_f_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_mul_ps(a.re, b); + ret.im = _mm512_mul_ps(a.im, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + b = _mm256_permutevar8x32_ps(b, _mm256_setr_epi32(0,4,1,5,2,6,3,7)); + ret.re = _mm256_mul_ps(a.re, b); + ret.im = _mm256_mul_ps(a.im, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_mul_ps(a.re, b); + ret.im = _mm_mul_ps(a.im, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + ret.val[0] = vmulq_f32(a.val[0],b); + ret.val[1] = vmulq_f32(a.val[1],b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + simd_f_t a2re = _mm512_mul_ps(a.re, a.re); + simd_f_t a2im = _mm512_mul_ps(a.im, a.im); + simd_f_t mod2 = _mm512_add_ps(a2re, a2im); + simd_f_t rcp = _mm512_rcp14_ps(mod2); + simd_f_t neg_a_im = _mm512_xor_ps(_mm512_set1_ps(-0.0f), a.im); + ret.re = _mm512_mul_ps(a.re, rcp); + ret.im = _mm512_mul_ps(neg_a_im, rcp); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + simd_f_t a2re = _mm256_mul_ps(a.re, a.re); + simd_f_t a2im = _mm256_mul_ps(a.im, a.im); + simd_f_t mod2 = _mm256_add_ps(a2re, a2im); + simd_f_t rcp = _mm256_rcp_ps(mod2); + simd_f_t neg_a_im = _mm256_xor_ps(_mm256_set1_ps(-0.0f), a.im); + ret.re = _mm256_mul_ps(a.re, rcp); + ret.im = _mm256_mul_ps(neg_a_im, rcp); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + simd_f_t a2re = _mm_mul_ps(a.re, a.re); + simd_f_t a2im = _mm_mul_ps(a.im, a.im); + simd_f_t mod2 = _mm_add_ps(a2re, a2im); + simd_f_t rcp = _mm_rcp_ps(mod2); + simd_f_t neg_a_im = _mm_xor_ps(_mm_set1_ps(-0.0f), a.im); + ret.re = _mm_mul_ps(a.re, rcp); + ret.im = _mm_mul_ps(neg_a_im, rcp); + #else /* LV_HAVE_SSE */ + #ifdef HAVE_NEON + simd_f_t a2re = vmulq_f32(a.val[0], a.val[0]); + simd_f_t a2im = vmulq_f32(a.val[1], a.val[1]); + simd_f_t mod2 = vaddq_f32(a2re, a2im); + simd_f_t rcp = vmulq_f32(vrecpeq_f32(mod2), vrecpsq_f32(vrecpeq_f32(mod2), mod2)); + simd_f_t neg_a_im = vnegq_f32(a.val[1]); + ret.val[0] = vmulq_f32(a.val[0], rcp); + ret.val[1] = vmulq_f32(neg_a_im, rcp); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_zero (void) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_setzero_ps(); + ret.im = _mm512_setzero_ps(); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_setzero_ps(); + ret.im = _mm256_setzero_ps(); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_setzero_ps(); + ret.im = _mm_setzero_ps(); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + ret.val[0] = vdupq_n_f32(0); + ret.val[1] = vdupq_n_f32(0); +#endif /* HAVE_NEON */ +#endif /* HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +#endif /* SRSLTE_SIMD_CF_SIZE */ + +#if SRSLTE_SIMD_I_SIZE + +#ifdef LV_HAVE_AVX512 +typedef __m512i simd_i_t; +typedef __mmask16 simd_sel_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256i simd_i_t; +typedef __m256 simd_sel_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128i simd_i_t; +typedef __m128 simd_sel_t; +#else /* LV_HAVE_AVX2 */ +#ifdef HAVE_NEON +typedef int32x4_t simd_i_t; +typedef int32x4_t simd_sel_t; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + +static inline simd_i_t srslte_simd_i_load(int *x) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_epi32((__m512i*)x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_si256((__m256i*)x); +#else + #ifdef LV_HAVE_SSE + return _mm_load_si128((__m128i*)x); +#else + #ifdef HAVE_NEON + return vld1q_s32((int*)x); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_i_store(int *x, simd_i_t reg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_epi32((__m512i*)x, reg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_si256((__m256i*)x, reg); +#else +#ifdef LV_HAVE_SSE + _mm_store_si128((__m128i*)x, reg); +#else +#ifdef HAVE_NEON + vst1q_s32((int*)x, reg); +#endif /*HAVE_NEON*/ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_i_set1(int x) { +#ifdef LV_HAVE_AVX512 + return _mm512_set1_epi32(x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_set1_epi32(x); +#else + #ifdef LV_HAVE_SSE + return _mm_set1_epi32(x); +#else + #ifdef HAVE_NEON + return vdupq_n_s32(x); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_i_add(simd_i_t a, simd_i_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_add_epi32(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_add_epi32(a, b); +#else +#ifdef LV_HAVE_SSE + return _mm_add_epi32(a, b); +#else +#ifdef HAVE_NEON + return vaddq_s32(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_sel_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_cmp_ps_mask(a, b, _CMP_GT_OS); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_cmp_ps(a, b, _CMP_GT_OS); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return (simd_sel_t) _mm_cmpgt_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return (simd_sel_t) vcgtq_f32(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_sel_t selector) { +#ifdef LV_HAVE_AVX512 + return (__m512i) _mm512_mask_blend_ps( selector, (__m512)a, (__m512) b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return (__m256i) _mm256_blendv_ps((__m256) a,(__m256) b, selector); +#else + #ifdef LV_HAVE_SSE + return (__m128i) _mm_blendv_ps((__m128)a, (__m128)b, selector); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON + int* a_ptr = (int*) &a; + int* b_ptr = (int*) &b; + simd_i_t ret; + int* sel = (int*) &selector; + int* c_ptr = (int*) &ret; + for(int i = 0;i<4;i++) + { + if(sel[i] == -1){ + c_ptr[i] = b_ptr[i]; + }else{ + c_ptr[i] = a_ptr[i]; + } + } + return ret; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_I_SIZE*/ + + +#if SRSLTE_SIMD_S_SIZE + + +#ifdef LV_HAVE_AVX512 +typedef __m512i simd_s_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256i simd_s_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128i simd_s_t; +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON +typedef int16x8_t simd_s_t; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + +static inline simd_s_t srslte_simd_s_load(int16_t *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_si512(ptr); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_si256((__m256i*) ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_load_si128((__m128i*) ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_s16(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_loadu(int16_t *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_loadu_si512(ptr); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + return _mm256_loadu_si256((__m256i*) ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_loadu_si128((__m128i*) ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_s16(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_s_store(int16_t *ptr, simd_s_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_si512(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_si256((__m256i*) ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_si128((__m128i*) ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_s16( ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_s_storeu(int16_t *ptr, simd_s_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_si512(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_storeu_si256((__m256i*) ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_si128((__m128i*) ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_s16(ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} +static inline simd_s_t srslte_simd_s_zero(void) { +#ifdef LV_HAVE_AVX512 + return _mm512_setzero_si512(); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_setzero_si256(); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_setzero_si128(); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vdupq_n_s16(0); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_mullo_epi16(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_mullo_epi16(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_mullo_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vmulq_s16(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_add(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_add_epi16(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_add_epi16(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_add_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vaddq_s16(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_sub(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_sub_epi16(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_sub_epi16(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_sub_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vsubq_s16(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_S_SIZE */ + + +#if SRSLTE_SIMD_C16_SIZE + +typedef +#ifdef LV_HAVE_AVX512 + struct { + union { + __m512i m512; + int16_t i16[32]; + } re; + union { + __m512i m512; + int16_t i16[32]; + } im; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + struct { + union { + __m256i m256; + int16_t i16[16]; + } re; + union { + __m256i m256; + int16_t i16[16]; + } im; +#else +#ifdef LV_HAVE_SSE + struct { + union { + __m128i m128; + int16_t i16[8]; + } re; + union { + __m128i m128; + int16_t i16[8]; + } im; +#else +#ifdef HAVE_NEON + union { + int16x8x2_t m128; + int16_t i16[16]; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} simd_c16_t; + +/* Fixed point precision (16-bit) functions */ +static inline simd_c16_t srslte_simd_c16i_load(c16_t *ptr) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX512 + __m512i in1 = _mm512_load_si512((__m512i*)(ptr)); + __m512i in2 = _mm512_load_si512((__m512i*)(ptr + 8)); + ret.re.m512 = _mm512_mask_blend_epi16(0xAAAAAAAA, in1,_mm512_shufflelo_epi16(_mm512_shufflehi_epi16(in2, 0b10100000), 0b10100000)); + ret.im.m512 = _mm512_mask_blend_epi16(0xAAAAAAAA, _mm512_shufflelo_epi16(_mm512_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_AVX2 + __m256i in1 = _mm256_load_si256((__m256i*)(ptr)); + __m256i in2 = _mm256_load_si256((__m256i*)(ptr + 8)); + ret.re.m256 = _mm256_blend_epi16(in1,_mm256_shufflelo_epi16(_mm256_shufflehi_epi16(in2, 0b10100000), 0b10100000), 0b10101010); + ret.im.m256 = _mm256_blend_epi16(_mm256_shufflelo_epi16(_mm256_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2, 0b10101010); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + __m128i in1 = _mm_load_si128((__m128i*)(ptr)); + __m128i in2 = _mm_load_si128((__m128i*)(ptr + 8)); + ret.re.m128 = _mm_blend_epi16(in1,_mm_shufflelo_epi16(_mm_shufflehi_epi16(in2, 0b10100000), 0b10100000), 0b10101010); + ret.im.m128 = _mm_blend_epi16(_mm_shufflelo_epi16(_mm_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2, 0b10101010); +#else /* LV_HAVE_SSE*/ +#ifdef HAVE_NEON + ret.m128 = vld2q_s16((int16_t*)(ptr)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_load(int16_t *re, int16_t *im) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_load_si256((__m256i*)(re)); + ret.im.m256 = _mm256_load_si256((__m256i*)(im)); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_load_si128((__m128i*)(re)); + ret.im.m128 = _mm_load_si128((__m128i*)(im)); +#else /* LV_HAVE_SSE*/ +#ifdef HAVE_NEON + ret.m128.val[0] = vld1q_s16((int16_t*)(re)); + ret.m128.val[1] = vld1q_s16((int16_t*)(im)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_loadu(int16_t *re, int16_t *im) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_loadu_si256((__m256i*)(re)); + ret.im.m256 = _mm256_loadu_si256((__m256i*)(im)); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_loadu_si128((__m128i*)(re)); + ret.im.m128 = _mm_loadu_si128((__m128i*)(im)); +#else /* LV_HAVE_SSE*/ +#ifdef HAVE_NEON + ret.m128.val[0] = vld1q_s16((int16_t*)(re)); + ret.m128.val[1] = vld1q_s16((int16_t*)(im)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline void srslte_simd_c16i_store(c16_t *ptr, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + __m256i re_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.re.m256, 0b10110001), 0b10110001); + __m256i im_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.im.m256, 0b10110001), 0b10110001); + _mm256_store_si256((__m256i *) (ptr), _mm256_blend_epi16(simdreg.re.m256, im_sw, 0b10101010)); + _mm256_store_si256((__m256i *) (ptr + 8), _mm256_blend_epi16(re_sw, simdreg.im.m256, 0b10101010)); +#else +#ifdef LV_HAVE_SSE + __m128i re_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.re.m128, 0b10110001), 0b10110001); + __m128i im_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.im.m128, 0b10110001), 0b10110001); + _mm_store_si128((__m128i *) (ptr), _mm_blend_epi16(simdreg.re.m128, im_sw, 0b10101010)); + _mm_store_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_s16((int16_t*)(ptr) ,simdreg.m128); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline void srslte_simd_c16i_storeu(c16_t *ptr, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + __m256i re_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.re.m256, 0b10110001), 0b10110001); + __m256i im_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.im.m256, 0b10110001), 0b10110001); + _mm256_storeu_si256((__m256i *) (ptr), _mm256_blend_epi16(simdreg.re.m256, im_sw, 0b10101010)); + _mm256_storeu_si256((__m256i *) (ptr + 8), _mm256_blend_epi16(re_sw, simdreg.im.m256, 0b10101010)); +#else +#ifdef LV_HAVE_SSE + __m128i re_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.re.m128, 0b10110001), 0b10110001); + __m128i im_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.im.m128, 0b10110001), 0b10110001); + _mm_storeu_si128((__m128i *) (ptr), _mm_blend_epi16(simdreg.re.m128, im_sw, 0b10101010)); + _mm_storeu_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_s16((int16_t*)(ptr) ,simdreg.m128); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline void srslte_simd_c16_store(int16_t *re, int16_t *im, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + _mm256_store_si256((__m256i *) re, simdreg.re.m256); + _mm256_store_si256((__m256i *) im, simdreg.im.m256); +#else +#ifdef LV_HAVE_SSE + _mm_store_si128((__m128i *) re, simdreg.re.m128); + _mm_store_si128((__m128i *) im, simdreg.im.m128); +#else +#ifdef HAVE_NEON + vst1q_s16((int16_t *) re, simdreg.m128.val[0]); + vst1q_s16((int16_t *) im, simdreg.m128.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline void srslte_simd_c16_storeu(int16_t *re, int16_t *im, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + _mm256_storeu_si256((__m256i *) re, simdreg.re.m256); + _mm256_storeu_si256((__m256i *) im, simdreg.im.m256); +#else +#ifdef LV_HAVE_SSE + _mm_storeu_si128((__m128i *) re, simdreg.re.m128); + _mm_storeu_si128((__m128i *) im, simdreg.im.m128); +#else +#ifdef HAVE_NEON + vst1q_s16((int16_t *) re, simdreg.m128.val[0]); + vst1q_s16((int16_t *) im, simdreg.m128.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline simd_c16_t srslte_simd_c16_prod (simd_c16_t a, simd_c16_t b) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_sub_epi16(_mm256_mulhrs_epi16(a.re.m256, _mm256_slli_epi16(b.re.m256, 1)), + _mm256_mulhrs_epi16(a.im.m256, _mm256_slli_epi16(b.im.m256, 1))); + ret.im.m256 = _mm256_add_epi16(_mm256_mulhrs_epi16(a.re.m256, _mm256_slli_epi16(b.im.m256, 1)), + _mm256_mulhrs_epi16(a.im.m256, _mm256_slli_epi16(b.re.m256, 1))); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_sub_epi16(_mm_mulhrs_epi16(a.re.m128, _mm_slli_epi16(b.re.m128, 1)), + _mm_mulhrs_epi16(a.im.m128, _mm_slli_epi16(b.im.m128, 1))); + ret.im.m128 = _mm_add_epi16(_mm_mulhrs_epi16(a.re.m128, _mm_slli_epi16(b.im.m128, 1)), + _mm_mulhrs_epi16(a.im.m128, _mm_slli_epi16(b.re.m128, 1))); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_add (simd_c16_t a, simd_c16_t b) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_add_epi16(a.re.m256, b.re.m256); + ret.im.m256 = _mm256_add_epi16(a.im.m256, b.im.m256); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_add_epi16(a.re.m128, b.re.m128); + ret.im.m128 = _mm_add_epi16(a.im.m128, b.im.m128); +#else +#ifdef HAVE_NEON + ret.m128.val[0] = vaddq_s16(a.m128.val[0],a.m128.val[0]); + ret.m128.val[1] = vaddq_s16(a.m128.val[1],a.m128.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_zero (void) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_setzero_si256(); + ret.im.m256 = _mm256_setzero_si256(); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_setzero_si128(); + ret.im.m128 = _mm_setzero_si128(); +#else +#ifdef HAVE_NEON + ret.m128.val[0] = vdupq_n_s16(0); + ret.m128.val[1] = vdupq_n_s16(0); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +#endif /* SRSLTE_SIMD_C16_SIZE */ + +#if SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE + +static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + __m512 aa = _mm512_permutex2var_ps(a, _mm512_setr_epi32(0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, + 0x18, 0x19, 0x1A, 0x1B), b); + __m512 bb = _mm512_permutex2var_ps(a, _mm512_setr_epi32(0x04, 0x05, 0x06, 0x07, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, + 0x1C, 0x1D, 0x1E, 0x1F), b); + __m512i ai = _mm512_cvttps_epi32(aa); + __m512i bi = _mm512_cvttps_epi32(bb); + return _mm512_packs_epi32(ai, bi); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 aa = _mm256_permute2f128_ps(a, b, 0x20); + __m256 bb = _mm256_permute2f128_ps(a, b, 0x31); + __m256i ai = _mm256_cvttps_epi32(aa); + __m256i bi = _mm256_cvttps_epi32(bb); + return _mm256_packs_epi32(ai, bi); +#else +#ifdef LV_HAVE_SSE + __m128i ai = _mm_cvttps_epi32(a); + __m128i bi = _mm_cvttps_epi32(b); + return _mm_packs_epi32(ai, bi); + #else +#ifdef HAVE_NEON + int32x4_t ai = vcvtq_s32_f32(a); + int32x4_t bi = vcvtq_s32_f32(b); + return (simd_s_t)vcombine_s16(vqmovn_s32(ai), vqmovn_s32(bi)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_C16_SIZE */ + #endif //SRSLTE_SIMD_H_H diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index 4a55d18b6..4a5daefb3 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -54,7 +54,6 @@ extern "C" { #define SRSLTE_VEC_EMA(data, average, alpha) ((alpha)*(data)+(1-alpha)*(average)) /** Return the sum of all the elements */ -SRSLTE_API int srslte_vec_acc_ii(int *x, uint32_t len); SRSLTE_API float srslte_vec_acc_ff(float *x, uint32_t len); SRSLTE_API cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len); @@ -77,52 +76,29 @@ SRSLTE_API void srslte_vec_save_file(char *filename, void *buffer, uint32_t len) SRSLTE_API void srslte_vec_load_file(char *filename, void *buffer, uint32_t len); /* sum two vectors */ -SRSLTE_API void srslte_vec_sum_ch(uint8_t *x, uint8_t *y, char *z, uint32_t len); SRSLTE_API void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_sum_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); -SRSLTE_API void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); +SRSLTE_API void srslte_vec_sum_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); /* substract two vectors z=x-y */ SRSLTE_API void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); -/* EMA filter: output=coeff*new_data + (1-coeff)*average */ -SRSLTE_API void srslte_vec_ema_filter(cf_t *new_data, cf_t *average, cf_t *output, float coeff, uint32_t len); - -/* Square distance */ -SRSLTE_API void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t npoints); - -/* scalar addition */ -SRSLTE_API void srslte_vec_sc_add_fff(float *x, float h, float *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_add_cfc(cf_t *x, float h, cf_t *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_add_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_add_sss(int16_t *x, int16_t h, int16_t *z, uint32_t len); - /* scalar product */ SRSLTE_API void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_div2_sss(short *x, int pow2_div, short *z, uint32_t len); -/* Normalization */ -SRSLTE_API void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len); SRSLTE_API void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len); SRSLTE_API void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len); -SRSLTE_API void srslte_vec_convert_ci(int8_t *x, int16_t *z, uint32_t len); - -SRSLTE_API void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len); -SRSLTE_API void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len); -SRSLTE_API void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len); -SRSLTE_API void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len); - -SRSLTE_API void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len); +SRSLTE_API void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len); /* vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_split(float *x_re, float *x_im, float *y_re, float *y_im, float *z_re, float *z_im, uint32_t len); /* vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_cfc(cf_t *x, float *y, cf_t *z, uint32_t len); @@ -132,7 +108,7 @@ SRSLTE_API void srslte_vec_prod_conj_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len /* real vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len); -SRSLTE_API void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); /* Dot-product */ SRSLTE_API cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len); @@ -142,8 +118,8 @@ SRSLTE_API float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len); SRSLTE_API int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len); /* z=x/y vector division (element-wise) */ -SRSLTE_API void srslte_vec_div_ccc(cf_t *x, cf_t *y, float *y_mod, cf_t *z, float *z_real, float *z_imag, uint32_t len); -void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, float *z_real, float *z_imag, uint32_t len); +SRSLTE_API void srslte_vec_div_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_div_fff(float *x, float *y, float *z, uint32_t len); /* conjugate */ @@ -158,11 +134,6 @@ SRSLTE_API float srslte_vec_corr_ccc(cf_t *x, cf_t *y, uint32_t len); /* return the index of the maximum value in the vector */ SRSLTE_API uint32_t srslte_vec_max_fi(float *x, uint32_t len); SRSLTE_API uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len); -SRSLTE_API int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len); -SRSLTE_API int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len); - -/* maximum between two vectors */ -SRSLTE_API void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len); /* quantify vector of floats or int16 and convert to uint8_t */ SRSLTE_API void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len); @@ -172,9 +143,6 @@ SRSLTE_API void srslte_vec_quant_suc(int16_t *in, uint8_t *out, float gain, int1 SRSLTE_API void srslte_vec_abs_cf(cf_t *x, float *abs, uint32_t len); SRSLTE_API void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t len); -/* argument of each vector element */ -SRSLTE_API void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len); - /* Copy 256 bit aligned vector */ SRSLTE_API void srs_vec_cf_cpy(cf_t *src, cf_t *dst, int len); diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 1894a0803..294cff50f 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -35,32 +35,94 @@ extern "C" { #include #include "srslte/config.h" -SRSLTE_API int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len); +#ifdef LV_HAVE_AVX512 +#define SRSLTE_SIMD_BIT_ALIGN 512 +#define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x3F) == 0) +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX +#define SRSLTE_SIMD_BIT_ALIGN 256 +#define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x1F) == 0) +#else /* LV_HAVE_AVX */ +#ifdef LV_HAVE_SSE +#define SRSLTE_SIMD_BIT_ALIGN 128 +#define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x0F) == 0) +#else /* LV_HAVE_SSE */ +#define SRSLTE_SIMD_BIT_ALIGN 64 +#define SRSLTE_IS_ALIGNED(PTR) (1) +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX */ +#endif /* LV_HAVE_AVX512 */ -SRSLTE_API int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len); +/* SIMD Basic vector math */ +SRSLTE_API void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); -SRSLTE_API void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); -SRSLTE_API void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len); +SRSLTE_API float srslte_vec_acc_ff_simd(float *x, int len); -SRSLTE_API void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API cf_t srslte_vec_acc_cc_simd(cf_t *x, int len); -SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_add_fff_simd(float *x, float *y, float *z, int len); -SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_fff_simd(float *x, float *y, float *z, int len); -SRSLTE_API void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len); +/* SIMD Vector Scalar Product */ +SRSLTE_API void srslte_vec_sc_prod_cfc_simd(const cf_t *x,const float h,cf_t *y,const int len); +SRSLTE_API void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, int len); -SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, int len); -SRSLTE_API void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len); +/* SIMD Vector Product */ +SRSLTE_API void srslte_vec_prod_ccc_split_simd(float *a_re, float *a_im, float *b_re, float *b_im, float *r_re, float *r_im, int len); -SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_c16_simd(int16_t *a_re, int16_t *a_im, int16_t *b_re, int16_t *b_im, int16_t *r_re, + int16_t *r_im, int len); -SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len); +SRSLTE_API void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); + +SRSLTE_API void srslte_vec_prod_cfc_simd(cf_t *x, float *y, cf_t *z, int len); + +SRSLTE_API void srslte_vec_prod_fff_simd(float *x, float *y, float *z, int len); + +SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); + +SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); + +/* SIMD Division */ +SRSLTE_API void srslte_vec_div_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); + +SRSLTE_API void srslte_vec_div_cfc_simd(cf_t *x, float *y, cf_t *z, int len); + +SRSLTE_API void srslte_vec_div_fff_simd(float *x, float *y, float *z, int len); + +/* SIMD Dot product */ +SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, int len); + +SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, int len); + +SRSLTE_API c16_t srslte_vec_dot_prod_ccc_c16i_simd(c16_t *x, c16_t *y, int len); + +SRSLTE_API int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len); + +/* SIMD Modulus functions */ +SRSLTE_API void srslte_vec_abs_cf_simd(cf_t *x, float *z, int len); + +SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, int len); + +/* Other Functions */ +SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, int len); + +SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, int len); + +SRSLTE_API void srslte_vec_cp_simd(cf_t *src, cf_t *dst, int len); + + +/* SIMD Find Max functions */ +SRSLTE_API uint32_t srslte_vec_max_fi_simd(float *x, int len); + +SRSLTE_API uint32_t srslte_vec_max_ci_simd(cf_t *x, int len); -SRSLTE_API void srslte_vec_sc_prod_cfc_avx(const cf_t *x,const float h,cf_t *y,const uint32_t len); #ifdef __cplusplus } #endif diff --git a/lib/src/phy/dft/dft_fftw.c b/lib/src/phy/dft/dft_fftw.c index b4a627742..9d6898117 100644 --- a/lib/src/phy/dft/dft_fftw.c +++ b/lib/src/phy/dft/dft_fftw.c @@ -56,9 +56,7 @@ void srslte_dft_load() { void srslte_dft_exit() { #ifdef FFTW_WISDOM_FILE - if (!fftwf_export_wisdom_to_filename(FFTW_WISDOM_FILE)) { - fprintf(stderr, "Error saving FFTW wisdom to file %s\n", FFTW_WISDOM_FILE); - } + fftwf_export_wisdom_to_filename(FFTW_WISDOM_FILE); #endif } @@ -93,6 +91,27 @@ static void allocate(srslte_dft_plan_t *plan, int size_in, int size_out, int len plan->out = fftwf_malloc(size_out*len); } +int srslte_dft_replan_guru_c(srslte_dft_plan_t *plan, const int new_dft_points, cf_t *in_buffer, + cf_t *out_buffer, int istride, int ostride, int how_many, + int idist, int odist) { + int sign = (plan->forward) ? FFTW_FORWARD : FFTW_BACKWARD; + + const fftwf_iodim iodim = {new_dft_points, istride, ostride}; + const fftwf_iodim howmany_dims = {how_many, idist, odist}; + + /* Destroy current plan */ + fftwf_destroy_plan(plan->p); + + plan->p = fftwf_plan_guru_dft(1, &iodim, 1, &howmany_dims, in_buffer, out_buffer, sign, FFTW_TYPE); + if (!plan->p) { + return -1; + } + plan->size = new_dft_points; + plan->init_size = plan->size; + + return 0; +} + int srslte_dft_replan_c(srslte_dft_plan_t *plan, const int new_dft_points) { int sign = (plan->dir == SRSLTE_DFT_FORWARD) ? FFTW_FORWARD : FFTW_BACKWARD; if (plan->p) { @@ -107,6 +126,32 @@ int srslte_dft_replan_c(srslte_dft_plan_t *plan, const int new_dft_points) { return 0; } +int srslte_dft_plan_guru_c(srslte_dft_plan_t *plan, const int dft_points, srslte_dft_dir_t dir, cf_t *in_buffer, + cf_t *out_buffer, int istride, int ostride, int how_many, + int idist, int odist) { + int sign = (dir == SRSLTE_DFT_FORWARD) ? FFTW_FORWARD : FFTW_BACKWARD; + + const fftwf_iodim iodim = {dft_points, istride, ostride}; + const fftwf_iodim howmany_dims = {how_many, idist, odist}; + + plan->p = fftwf_plan_guru_dft(1, &iodim, 1, &howmany_dims, in_buffer, out_buffer, sign, FFTW_TYPE); + if (!plan->p) { + return -1; + } + plan->size = dft_points; + plan->init_size = plan->size; + plan->mode = SRSLTE_DFT_COMPLEX; + plan->dir = dir; + plan->forward = (dir==SRSLTE_DFT_FORWARD)?true:false; + plan->mirror = false; + plan->db = false; + plan->norm = false; + plan->dc = false; + plan->is_guru = true; + + return 0; +} + int srslte_dft_plan_c(srslte_dft_plan_t *plan, const int dft_points, srslte_dft_dir_t dir) { allocate(plan,sizeof(fftwf_complex),sizeof(fftwf_complex), dft_points); int sign = (dir == SRSLTE_DFT_FORWARD) ? FFTW_FORWARD : FFTW_BACKWARD; @@ -123,6 +168,7 @@ int srslte_dft_plan_c(srslte_dft_plan_t *plan, const int dft_points, srslte_dft_ plan->db = false; plan->norm = false; plan->dc = false; + plan->is_guru = false; return 0; } @@ -232,6 +278,14 @@ void srslte_dft_run_c(srslte_dft_plan_t *plan, cf_t *in, cf_t *out) { plan->forward, plan->mirror, plan->dc); } +void srslte_dft_run_guru_c(srslte_dft_plan_t *plan) { + if (plan->is_guru == true) { + fftwf_execute(plan->p); + } else { + fprintf(stderr, "srslte_dft_run_guru_c: the selected plan is not guru!\n"); + } +} + void srslte_dft_run_r(srslte_dft_plan_t *plan, float *in, float *out) { float norm; int i; @@ -255,8 +309,10 @@ void srslte_dft_run_r(srslte_dft_plan_t *plan, float *in, float *out) { void srslte_dft_plan_free(srslte_dft_plan_t *plan) { if (!plan) return; if (!plan->size) return; - if (plan->in) fftwf_free(plan->in); - if (plan->out) fftwf_free(plan->out); + if (!plan->is_guru) { + if (plan->in) fftwf_free(plan->in); + if (plan->out) fftwf_free(plan->out); + } if (plan->p) fftwf_destroy_plan(plan->p); bzero(plan, sizeof(srslte_dft_plan_t)); } diff --git a/lib/src/phy/dft/ofdm.c b/lib/src/phy/dft/ofdm.c index db5939274..8ea690bb5 100644 --- a/lib/src/phy/dft/ofdm.c +++ b/lib/src/phy/dft/ofdm.c @@ -37,23 +37,79 @@ #include "srslte/phy/utils/debug.h" #include "srslte/phy/utils/vector.h" +/* Uncomment next line for avoiding Guru DFT call */ +//#define AVOID_GURU -int srslte_ofdm_init_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof_prb, srslte_dft_dir_t dir) { - return srslte_ofdm_init_mbsfn_(q, cp, symbol_sz, nof_prb, dir, SRSLTE_SF_NORM); +int srslte_ofdm_init_(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, int symbol_sz, int nof_prb, srslte_dft_dir_t dir) { + return srslte_ofdm_init_mbsfn_(q, cp, in_buffer, out_buffer, symbol_sz, nof_prb, dir, SRSLTE_SF_NORM); } +int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, int symbol_sz, int nof_prb, srslte_dft_dir_t dir, srslte_sf_t sf_type) { -int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof_prb, srslte_dft_dir_t dir, srslte_sf_t sf_type) { + /* Set OFDM object attributes */ + q->symbol_sz = (uint32_t) symbol_sz; + q->nof_symbols = SRSLTE_CP_NSYMB(cp); + q->nof_symbols_mbsfn = SRSLTE_CP_NSYMB(SRSLTE_CP_EXT); + q->cp = cp; + q->freq_shift = false; + q->nof_re = (uint32_t) nof_prb * SRSLTE_NRE; + q->nof_guards = ((symbol_sz - q->nof_re) / 2); + q->slot_sz = (uint32_t) SRSLTE_SLOT_LEN(symbol_sz); + q->sf_sz = (uint32_t) SRSLTE_SF_LEN(symbol_sz); + q->in_buffer = in_buffer; + q->out_buffer= out_buffer; if (srslte_dft_plan_c(&q->fft_plan, symbol_sz, dir)) { fprintf(stderr, "Error: Creating DFT plan\n"); return -1; } + +#ifdef AVOID_GURU q->tmp = srslte_vec_malloc((uint32_t) symbol_sz * sizeof(cf_t)); if (!q->tmp) { perror("malloc"); return -1; } + bzero(q->tmp, sizeof(cf_t) * symbol_sz); +#else + int cp1 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(0, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + int cp2 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(1, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + + q->tmp = srslte_vec_malloc(sizeof(cf_t) * q->sf_sz); + if (!q->tmp) { + perror("malloc"); + return -1; + } + bzero(q->tmp, sizeof(cf_t) * q->sf_sz); + + if (dir == SRSLTE_DFT_BACKWARD) { + bzero(in_buffer, sizeof(cf_t) * SRSLTE_SF_LEN_RE(nof_prb, cp)); + }else { + bzero(in_buffer, sizeof(cf_t) * q->sf_sz); + } + + for (int slot = 0; slot < 2; slot++) { + //bzero(&q->fft_plan_sf[slot], sizeof(srslte_dft_plan_t)); + //bzero(q->tmp + SRSLTE_CP_NSYMB(cp)*symbol_sz*slot, sizeof(cf_t) * (cp1 + (SRSLTE_CP_NSYMB(cp) - 1)*cp2 + SRSLTE_CP_NSYMB(cp)*symbol_sz)); + if (dir == SRSLTE_DFT_FORWARD) { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + in_buffer + cp1 + q->slot_sz * slot, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz + cp2, symbol_sz)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } else { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + out_buffer + cp1 + q->slot_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz, symbol_sz + cp2)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } + } +#endif q->shift_buffer = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN(symbol_sz)); if (!q->shift_buffer) { @@ -64,15 +120,6 @@ int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int srslte_dft_plan_set_mirror(&q->fft_plan, true); srslte_dft_plan_set_dc(&q->fft_plan, true); - q->symbol_sz = (uint32_t) symbol_sz; - q->nof_symbols = SRSLTE_CP_NSYMB(cp); - q->nof_symbols_mbsfn = SRSLTE_CP_NSYMB(SRSLTE_CP_EXT); - q->cp = cp; - q->freq_shift = false; - q->nof_re = nof_prb * SRSLTE_NRE; - q->nof_guards = ((symbol_sz - q->nof_re) / 2); - q->slot_sz = SRSLTE_SLOT_LEN(symbol_sz); - DEBUG("Init %s symbol_sz=%d, nof_symbols=%d, cp=%s, nof_re=%d, nof_guards=%d\n", dir==SRSLTE_DFT_FORWARD?"FFT":"iFFT", q->symbol_sz, q->nof_symbols, q->cp==SRSLTE_CP_NORM?"Normal":"Extended", q->nof_re, q->nof_guards); @@ -101,9 +148,60 @@ int srslte_ofdm_replan_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof q->symbol_sz = (uint32_t) symbol_sz; q->nof_symbols = SRSLTE_CP_NSYMB(cp); q->cp = cp; - q->nof_re = nof_prb * SRSLTE_NRE; + q->nof_re = (uint32_t) nof_prb * SRSLTE_NRE; q->nof_guards = ((symbol_sz - q->nof_re) / 2); - q->slot_sz = SRSLTE_SLOT_LEN(symbol_sz); + q->slot_sz = (uint32_t) SRSLTE_SLOT_LEN(symbol_sz); + q->sf_sz = (uint32_t) SRSLTE_SF_LEN(symbol_sz); + +#ifndef AVOID_GURU + cf_t *in_buffer = q->in_buffer; + cf_t *out_buffer = q->out_buffer; + + int cp1 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(0, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + int cp2 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(1, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + + srslte_dft_dir_t dir = q->fft_plan_sf[0].dir; + + if (q->tmp) { + free(q->tmp); + } + + q->tmp = srslte_vec_malloc(sizeof(cf_t) * q->sf_sz); + if (!q->tmp) { + perror("malloc"); + return -1; + } + bzero(q->tmp, sizeof(cf_t) * q->sf_sz); + + if (dir == SRSLTE_DFT_BACKWARD) { + bzero(in_buffer, sizeof(cf_t) * SRSLTE_SF_LEN_RE(nof_prb, cp)); + }else { + bzero(in_buffer, sizeof(cf_t) * q->sf_sz); + } + + for (int slot = 0; slot < 2; slot++) { + srslte_dft_plan_free(&q->fft_plan_sf[slot]); + + if (dir == SRSLTE_DFT_FORWARD) { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + in_buffer + cp1 + q->slot_sz * slot, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz + cp2, symbol_sz)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } else { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + out_buffer + cp1 + q->slot_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz, symbol_sz + cp2)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } + } +#endif /* AVOID_GURU */ + if (q->freq_shift) { srslte_ofdm_set_freq_shift(q, q->freq_shift_f); @@ -118,6 +216,15 @@ int srslte_ofdm_replan_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof void srslte_ofdm_free_(srslte_ofdm_t *q) { srslte_dft_plan_free(&q->fft_plan); + +#ifndef AVOID_GURU + for (int slot = 0; slot < 2; slot++) { + if (q->fft_plan_sf[slot].init_size) { + srslte_dft_plan_free(&q->fft_plan_sf[slot]); + } + } +#endif + if (q->tmp) { free(q->tmp); } @@ -127,28 +234,28 @@ void srslte_ofdm_free_(srslte_ofdm_t *q) { bzero(q, sizeof(srslte_ofdm_t)); } -int srslte_ofdm_rx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { +int srslte_ofdm_rx_init(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t max_prb) { int symbol_sz = srslte_symbol_sz(max_prb); if (symbol_sz < 0) { fprintf(stderr, "Error: Invalid nof_prb=%d\n", max_prb); return -1; } q->max_prb = max_prb; - return srslte_ofdm_init_(q, cp, symbol_sz, max_prb, SRSLTE_DFT_FORWARD); + return srslte_ofdm_init_(q, cp, in_buffer, out_buffer, symbol_sz, max_prb, SRSLTE_DFT_FORWARD); } -int srslte_ofdm_rx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) +int srslte_ofdm_rx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t nof_prb) { int symbol_sz = srslte_symbol_sz(nof_prb); if (symbol_sz < 0) { fprintf(stderr, "Error: Invalid nof_prb=%d\n", nof_prb); return -1; } - return srslte_ofdm_init_mbsfn_(q, cp, symbol_sz, nof_prb, SRSLTE_DFT_FORWARD, SRSLTE_SF_MBSFN); + return srslte_ofdm_init_mbsfn_(q, cp, in_buffer, out_buffer, symbol_sz, nof_prb, SRSLTE_DFT_FORWARD, SRSLTE_SF_MBSFN); } -int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { +int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t max_prb) { uint32_t i; int ret; @@ -158,7 +265,7 @@ int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { return -1; } q->max_prb = max_prb; - ret = srslte_ofdm_init_(q, cp, symbol_sz, max_prb, SRSLTE_DFT_BACKWARD); + ret = srslte_ofdm_init_(q, cp, in_buffer, out_buffer, symbol_sz, max_prb, SRSLTE_DFT_BACKWARD); if (ret == SRSLTE_SUCCESS) { @@ -173,7 +280,7 @@ int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { return ret; } -int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) +int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t nof_prb) { uint32_t i; int ret; @@ -184,7 +291,7 @@ int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb return -1; } - ret = srslte_ofdm_init_mbsfn_(q, cp, symbol_sz, nof_prb, SRSLTE_DFT_BACKWARD, SRSLTE_SF_MBSFN); + ret = srslte_ofdm_init_mbsfn_(q, cp, in_buffer, out_buffer, symbol_sz, nof_prb, SRSLTE_DFT_BACKWARD, SRSLTE_SF_MBSFN); if (ret == SRSLTE_SUCCESS) { srslte_dft_plan_set_norm(&q->fft_plan, false); @@ -207,7 +314,8 @@ int srslte_ofdm_rx_set_prb(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) { } return srslte_ofdm_replan_(q, cp, symbol_sz, nof_prb); } else { - fprintf(stderr, "OFDM: Error calling set_prb: nof_prb must be equal or lower initialized max_prb\n"); + fprintf(stderr, "OFDM (Rx): Error calling set_prb: nof_prb (%d) must be equal or lower initialized max_prb (%d)\n", + nof_prb, q->max_prb); return -1; } } @@ -234,7 +342,8 @@ int srslte_ofdm_tx_set_prb(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) { } return ret; } else { - fprintf(stderr, "OFDM: Error calling set_prb: nof_prb must be equal or lower initialized max_prb\n"); + fprintf(stderr, "OFDM (Tx): Error calling set_prb: nof_prb (%d) must be equal or lower initialized max_prb (%d)\n", + nof_prb, q->max_prb); return -1; } } @@ -274,8 +383,12 @@ void srslte_ofdm_tx_free(srslte_ofdm_t *q) { /* Transforms input samples into output OFDM symbols. * Performs FFT on a each symbol and removes CP. */ -void srslte_ofdm_rx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { +void srslte_ofdm_rx_slot(srslte_ofdm_t *q, int slot_in_sf) { + cf_t *output = q->out_buffer + slot_in_sf * q->nof_re * q->nof_symbols; + +#ifdef AVOID_GURU uint32_t i; + cf_t *input = q->in_buffer + slot_in_sf * q->slot_sz; for (i=0;inof_symbols;i++) { input += SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); srslte_dft_run_c(&q->fft_plan, input, q->tmp); @@ -283,6 +396,25 @@ void srslte_ofdm_rx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { input += q->symbol_sz; output += q->nof_re; } +#else + float norm = 1.0f/sqrtf(q->fft_plan.size); + cf_t *tmp = q->tmp + slot_in_sf * q->symbol_sz * q->nof_symbols; + uint32_t dc = (q->fft_plan.dc) ? 1:0; + + srslte_dft_run_guru_c(&q->fft_plan_sf[slot_in_sf]); + + for (int i = 0; i < q->nof_symbols; i++) { + memcpy(output, tmp + q->symbol_sz - q->nof_re / 2, sizeof(cf_t) * q->nof_re / 2); + memcpy(output + q->nof_re / 2, &tmp[dc], sizeof(cf_t) * q->nof_re / 2); + + if (q->fft_plan.norm) { + srslte_vec_sc_prod_cfc(output, norm, output, q->nof_re); + } + + tmp += q->symbol_sz; + output += q->nof_re; + } +#endif } void srslte_ofdm_rx_slot_mbsfn(srslte_ofdm_t *q, cf_t *input, cf_t *output) @@ -314,29 +446,32 @@ void srslte_ofdm_rx_slot_zerocopy(srslte_ofdm_t *q, cf_t *input, cf_t *output) { } } -void srslte_ofdm_rx_sf(srslte_ofdm_t *q, cf_t *input, cf_t *output) { - uint32_t n; +void srslte_ofdm_rx_sf(srslte_ofdm_t *q) { + uint32_t n; if (q->freq_shift) { - srslte_vec_prod_ccc(input, q->shift_buffer, input, 2*q->slot_sz); + srslte_vec_prod_ccc(q->in_buffer, q->shift_buffer, q->in_buffer, 2*q->slot_sz); } if(!q->mbsfn_subframe){ for (n=0;n<2;n++) { - srslte_ofdm_rx_slot(q, &input[n*q->slot_sz], &output[n*q->nof_re*q->nof_symbols]); + srslte_ofdm_rx_slot(q, n); } } else{ - srslte_ofdm_rx_slot_mbsfn(q, &input[0*q->slot_sz], &output[0*q->nof_re*q->nof_symbols]); - srslte_ofdm_rx_slot(q, &input[1*q->slot_sz], &output[1*q->nof_re*q->nof_symbols]); + srslte_ofdm_rx_slot_mbsfn(q, &q->in_buffer[0*q->slot_sz], &q->out_buffer[0*q->nof_re*q->nof_symbols]); + srslte_ofdm_rx_slot(q, 1); } } /* Transforms input OFDM symbols into output samples. * Performs FFT on a each symbol and adds CP. */ -void srslte_ofdm_tx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { - uint32_t i, cp_len; - for (i=0;inof_symbols;i++) { - cp_len = SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); +void srslte_ofdm_tx_slot(srslte_ofdm_t *q, int slot_in_sf) { + cf_t *input = q->in_buffer + slot_in_sf * q->nof_re * q->nof_symbols; + cf_t *output = q->out_buffer + slot_in_sf * q->slot_sz; + +#ifdef AVOID_GURU + for (int i=0;inof_symbols;i++) { + int cp_len = SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); memcpy(&q->tmp[q->nof_guards], input, q->nof_re * sizeof(cf_t)); srslte_dft_run_c(&q->fft_plan, q->tmp, &output[cp_len]); input += q->nof_re; @@ -344,6 +479,60 @@ void srslte_ofdm_tx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { memcpy(output, &output[q->symbol_sz], cp_len * sizeof(cf_t)); output += q->symbol_sz + cp_len; } +#else + float norm = 1.0f/sqrtf(q->symbol_sz); + cf_t *tmp = q->tmp + slot_in_sf * q->symbol_sz * q->nof_symbols; + + bzero(tmp, q->slot_sz); + uint32_t dc = (q->fft_plan.dc) ? 1:0; + + for (int i = 0; i < q->nof_symbols; i++) { + memcpy(&tmp[dc], &input[q->nof_re / 2], q->nof_re / 2 * sizeof(cf_t)); + memcpy(&tmp[q->symbol_sz - q->nof_re / 2], &input[0], q->nof_re / 2 * sizeof(cf_t)); + + input += q->nof_re; + tmp += q->symbol_sz; + } + + srslte_dft_run_guru_c(&q->fft_plan_sf[slot_in_sf]); + + for (int i=0;inof_symbols;i++) { + int cp_len = SRSLTE_CP_ISNORM(q->cp) ? SRSLTE_CP_LEN_NORM(i, q->symbol_sz) : SRSLTE_CP_LEN_EXT(q->symbol_sz); + + if (q->fft_plan.norm) { + srslte_vec_sc_prod_cfc(&output[cp_len], norm, &output[cp_len], q->symbol_sz); + } + + /* add CP */ + memcpy(output, &output[q->symbol_sz], cp_len * sizeof(cf_t)); + output += q->symbol_sz + cp_len; + } +#endif + + /*input = q->in_buffer + slot_in_sf * q->nof_re * q->nof_symbols; + cf_t *output2 = srslte_vec_malloc(sizeof(cf_t) * q->slot_sz); + cf_t *o2 = output2; + bzero(q->tmp, sizeof(cf_t)*q->symbol_sz); + //bzero(output2, sizeof(cf_t)*q->slot_sz); + for (int i=0;inof_symbols;i++) { + int cp_len = SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); + memcpy(&q->tmp[q->nof_guards], input, q->nof_re * sizeof(cf_t)); + srslte_dft_run_c(&q->fft_plan, q->tmp, &o2[cp_len]); + input += q->nof_re; + memcpy(o2, &o2[q->symbol_sz], cp_len * sizeof(cf_t)); + o2 += q->symbol_sz + cp_len; + } + cf_t *output1 = q->out_buffer + slot_in_sf * q->slot_sz;//srslte_vec_malloc(sizeof(cf_t) * q->slot_sz); + + for (int i = 0; i < q->slot_sz; i++) { + float error = cabsf(output1[i] - output2[i])/cabsf(output2[i]); + cf_t k = output1[i]/output2[i]; + if (error > 0.1) printf("%d/%05d error=%f output=%+f%+fi gold=%+f%+fi k=%+f%+fi\n", slot_in_sf, i, error, + __real__ output1[i], __imag__ output1[i], + __real__ output2[i], __imag__ output2[i], + __real__ k, __imag__ k); + } + free(output2);/**/ } void srslte_ofdm_tx_slot_mbsfn(srslte_ofdm_t *q, cf_t *input, cf_t *output) @@ -369,20 +558,20 @@ void srslte_ofdm_set_normalize(srslte_ofdm_t *q, bool normalize_enable) { srslte_dft_plan_set_norm(&q->fft_plan, normalize_enable); } -void srslte_ofdm_tx_sf(srslte_ofdm_t *q, cf_t *input, cf_t *output) +void srslte_ofdm_tx_sf(srslte_ofdm_t *q) { - uint32_t n; + uint32_t n; if(!q->mbsfn_subframe){ for (n=0;n<2;n++) { - srslte_ofdm_tx_slot(q, &input[n*q->nof_re*q->nof_symbols], &output[n*q->slot_sz]); + srslte_ofdm_tx_slot(q, n); } } else{ - srslte_ofdm_tx_slot_mbsfn(q, &input[0*q->nof_re*q->nof_symbols], &output[0*q->slot_sz]); - srslte_ofdm_tx_slot(q, &input[1*q->nof_re*q->nof_symbols], &output[1*q->slot_sz]); + srslte_ofdm_tx_slot_mbsfn(q, &q->in_buffer[0*q->nof_re*q->nof_symbols], &q->out_buffer[0*q->slot_sz]); + srslte_ofdm_tx_slot(q, 1); } if (q->freq_shift) { - srslte_vec_prod_ccc(output, q->shift_buffer, output, 2*q->slot_sz); + srslte_vec_prod_ccc(q->out_buffer, q->shift_buffer, q->out_buffer, 2*q->slot_sz); } } diff --git a/lib/src/phy/dft/test/ofdm_test.c b/lib/src/phy/dft/test/ofdm_test.c index 11aac7f4e..e77fcd39e 100644 --- a/lib/src/phy/dft/test/ofdm_test.c +++ b/lib/src/phy/dft/test/ofdm_test.c @@ -35,16 +35,28 @@ int nof_prb = -1; srslte_cp_t cp = SRSLTE_CP_NORM; +int nof_repetitions = 128; + +static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { + if (ts_end->tv_usec > ts_start->tv_usec) { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec) * 1000000 + + (double) ts_end->tv_usec - (double) ts_start->tv_usec; + } else { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec - 1) * 1000000 + + ((double) ts_end->tv_usec + 1000000) - (double) ts_start->tv_usec; + } +} void usage(char *prog) { printf("Usage: %s\n", prog); printf("\t-n nof_prb [Default All]\n"); printf("\t-e extended cyclic prefix [Default Normal]\n"); + printf("\t-r nof_repetitions [Default %d]\n", nof_repetitions); } void parse_args(int argc, char **argv) { int opt; - while ((opt = getopt(argc, argv, "ne")) != -1) { + while ((opt = getopt(argc, argv, "ner")) != -1) { switch (opt) { case 'n': nof_prb = atoi(argv[optind]); @@ -52,6 +64,9 @@ void parse_args(int argc, char **argv) { case 'e': cp = SRSLTE_CP_EXT; break; + case 'r': + nof_repetitions = atoi(argv[optind]); + break; default: usage(argv[0]); exit(-1); @@ -61,6 +76,7 @@ void parse_args(int argc, char **argv) { int main(int argc, char **argv) { + struct timeval start, end; srslte_ofdm_t fft, ifft; cf_t *input, *outfft, *outifft; float mse; @@ -81,48 +97,65 @@ int main(int argc, char **argv) { printf("Running test for %d PRB, %d RE... ", n_prb, n_re);fflush(stdout); - input = malloc(sizeof(cf_t) * n_re); + input = srslte_vec_malloc(sizeof(cf_t) * n_re * 2); if (!input) { perror("malloc"); exit(-1); } - outfft = malloc(sizeof(cf_t) * SRSLTE_SLOT_LEN(srslte_symbol_sz(n_prb))); + outfft = srslte_vec_malloc(sizeof(cf_t) * n_re * 2); if (!outfft) { perror("malloc"); exit(-1); } - outifft = malloc(sizeof(cf_t) * n_re); + outifft = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SLOT_LEN(srslte_symbol_sz(n_prb)) * 2); if (!outifft) { perror("malloc"); exit(-1); } + bzero(outifft, sizeof(cf_t) * SRSLTE_SLOT_LEN(srslte_symbol_sz(n_prb)) * 2); - if (srslte_ofdm_rx_init(&fft, cp, n_prb)) { + if (srslte_ofdm_rx_init(&fft, cp, outifft, outfft, n_prb)) { fprintf(stderr, "Error initializing FFT\n"); exit(-1); } - srslte_dft_plan_set_norm(&fft.fft_plan, true); + srslte_ofdm_set_normalize(&fft, true); - if (srslte_ofdm_tx_init(&ifft, cp, n_prb)) { + if (srslte_ofdm_tx_init(&ifft, cp, input, outifft, n_prb)) { fprintf(stderr, "Error initializing iFFT\n"); exit(-1); } - srslte_dft_plan_set_norm(&ifft.fft_plan, true); + srslte_ofdm_set_normalize(&ifft, true); for (i=0;i 1.0f) printf("%04d. %+.1f%+.1fi Vs. %+.1f%+.1f %+.1f%+.1f (mse=%f)\n", i, __real__ input[i], __imag__ input[i], __real__ outifft[i], __imag__ outifft[i], __real__ outfft[i], __imag__ outfft[i], mse); } - printf("MSE=%f\n", mse); + /*for (i=0;i= 0.07) { printf("MSE too large\n"); diff --git a/lib/src/phy/enb/enb_dl.c b/lib/src/phy/enb/enb_dl.c index 2ba179399..54a63bc2a 100644 --- a/lib/src/phy/enb/enb_dl.c +++ b/lib/src/phy/enb/enb_dl.c @@ -41,7 +41,7 @@ #define SRSLTE_ENB_RF_AMP 0.1 -int srslte_enb_dl_init(srslte_enb_dl_t *q, uint32_t max_prb) +int srslte_enb_dl_init(srslte_enb_dl_t *q, cf_t *out_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -53,13 +53,26 @@ int srslte_enb_dl_init(srslte_enb_dl_t *q, uint32_t max_prb) q->cfi = 3; q->tx_amp = SRSLTE_ENB_RF_AMP; - - if (srslte_ofdm_tx_init(&q->ifft, SRSLTE_CP_NORM, max_prb)) { - fprintf(stderr, "Error initiating FFT\n"); - goto clean_exit; + + for (int i=0;isf_symbols[i] = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); + if (!q->sf_symbols[i]) { + perror("malloc"); + goto clean_exit; + } + q->slot1_symbols[i] = &q->sf_symbols[i][SRSLTE_SLOT_LEN_RE(max_prb, SRSLTE_CP_NORM)]; + } + + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + if (srslte_ofdm_tx_init(&q->ifft[i], SRSLTE_CP_NORM, q->sf_symbols[i], out_buffer[i], max_prb)) { + fprintf(stderr, "Error initiating FFT (%d)\n", i); + goto clean_exit; + } } - srslte_ofdm_set_normalize(&q->ifft, true); + for (int i = 0; i < q->cell.nof_ports; i++) { + srslte_ofdm_set_normalize(&q->ifft[i], true); + } if (srslte_pbch_init(&q->pbch)) { fprintf(stderr, "Error creating PBCH object\n"); @@ -89,15 +102,6 @@ int srslte_enb_dl_init(srslte_enb_dl_t *q, uint32_t max_prb) goto clean_exit; } - for (int i=0;isf_symbols[i] = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); - if (!q->sf_symbols[i]) { - perror("malloc"); - goto clean_exit; - } - q->slot1_symbols[i] = &q->sf_symbols[i][SRSLTE_SLOT_LEN_RE(max_prb, SRSLTE_CP_NORM)]; - } - ret = SRSLTE_SUCCESS; } else { @@ -114,7 +118,9 @@ clean_exit: void srslte_enb_dl_free(srslte_enb_dl_t *q) { if (q) { - srslte_ofdm_tx_free(&q->ifft); + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + srslte_ofdm_tx_free(&q->ifft[i]); + } srslte_regs_free(&q->regs); srslte_pbch_free(&q->pbch); srslte_pcfich_free(&q->pcfich); @@ -152,9 +158,11 @@ int srslte_enb_dl_set_cell(srslte_enb_dl_t *q, srslte_cell_t cell) fprintf(stderr, "Error resizing REGs\n"); return SRSLTE_ERROR; } - if (srslte_ofdm_rx_set_prb(&q->ifft, q->cell.cp, q->cell.nof_prb)) { - fprintf(stderr, "Error initiating FFT\n"); - return SRSLTE_ERROR; + for (int i = 0; i < q->cell.nof_ports; i++) { + if (srslte_ofdm_tx_set_prb(&q->ifft[i], q->cell.cp, q->cell.nof_prb)) { + fprintf(stderr, "Error re-planning iFFT (%d)\n", i); + return SRSLTE_ERROR; + } } if (srslte_pbch_set_cell(&q->pbch, q->cell)) { fprintf(stderr, "Error creating PBCH object\n"); @@ -264,14 +272,15 @@ void srslte_enb_dl_put_base(srslte_enb_dl_t *q, uint32_t tti) } -void srslte_enb_dl_gen_signal(srslte_enb_dl_t *q, cf_t *signal_buffer) +void srslte_enb_dl_gen_signal(srslte_enb_dl_t *q) { - - srslte_ofdm_tx_sf(&q->ifft, q->sf_symbols[0], signal_buffer); - // TODO: PAPR control float norm_factor = (float) sqrt(q->cell.nof_prb)/15; - srslte_vec_sc_prod_cfc(signal_buffer, q->tx_amp*norm_factor, signal_buffer, SRSLTE_SF_LEN_PRB(q->cell.nof_prb)); + + for (int i = 0; i < q->cell.nof_ports; i++) { + srslte_ofdm_tx_sf(&q->ifft[i]); + srslte_vec_sc_prod_cfc(q->ifft[i].out_buffer, q->tx_amp*norm_factor, q->ifft[i].out_buffer, (uint32_t) SRSLTE_SF_LEN_PRB(q->cell.nof_prb)); + } } int srslte_enb_dl_add_rnti(srslte_enb_dl_t *q, uint16_t rnti) diff --git a/lib/src/phy/enb/enb_ul.c b/lib/src/phy/enb/enb_ul.c index db05d44ea..f94eb0277 100644 --- a/lib/src/phy/enb/enb_ul.c +++ b/lib/src/phy/enb/enb_ul.c @@ -40,6 +40,7 @@ #define MAX_CANDIDATES 16 int srslte_enb_ul_init(srslte_enb_ul_t *q, + cf_t *in_buffer, uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -55,8 +56,20 @@ int srslte_enb_ul_init(srslte_enb_ul_t *q, perror("malloc"); goto clean_exit; } - - if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { + + q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); + if (!q->sf_symbols) { + perror("malloc"); + goto clean_exit; + } + + q->ce = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); + if (!q->ce) { + perror("malloc"); + goto clean_exit; + } + + if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, in_buffer, q->sf_symbols, max_prb)) { fprintf(stderr, "Error initiating FFT\n"); goto clean_exit; } @@ -80,18 +93,6 @@ int srslte_enb_ul_init(srslte_enb_ul_t *q, goto clean_exit; } - q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); - if (!q->sf_symbols) { - perror("malloc"); - goto clean_exit; - } - - q->ce = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); - if (!q->ce) { - perror("malloc"); - goto clean_exit; - } - ret = SRSLTE_SUCCESS; } else { @@ -254,7 +255,7 @@ int srslte_enb_ul_cfg_ue(srslte_enb_ul_t *q, uint16_t rnti, void srslte_enb_ul_fft(srslte_enb_ul_t *q, cf_t *signal_buffer) { - srslte_ofdm_rx_sf(&q->fft, signal_buffer, q->sf_symbols); + srslte_ofdm_rx_sf(&q->fft); } int get_pucch(srslte_enb_ul_t *q, uint16_t rnti, diff --git a/lib/src/phy/fec/rm_turbo.c b/lib/src/phy/fec/rm_turbo.c index 23929fff3..cdc8ac88d 100644 --- a/lib/src/phy/fec/rm_turbo.c +++ b/lib/src/phy/fec/rm_turbo.c @@ -741,4 +741,3 @@ int srslte_rm_turbo_rx(float *w_buff, uint32_t w_buff_len, float *input, uint32_ return 0; } - diff --git a/lib/src/phy/mimo/precoding.c b/lib/src/phy/mimo/precoding.c index 6f3e43889..a21219a91 100644 --- a/lib/src/phy/mimo/precoding.c +++ b/lib/src/phy/mimo/precoding.c @@ -33,6 +33,7 @@ #include "srslte/phy/mimo/precoding.h" #include "srslte/phy/utils/vector.h" #include "srslte/phy/utils/debug.h" +#include "srslte/phy/utils/mat.h" #ifdef LV_HAVE_SSE #include @@ -46,7 +47,6 @@ int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_ #endif #include "srslte/phy/utils/mat.h" - static srslte_mimo_decoder_t mimo_decoder = SRSLTE_MIMO_DECODER_MMSE; /************************************************ diff --git a/lib/src/phy/modem/demod_hard.c b/lib/src/phy/modem/demod_hard.c index 76f54236d..899559ecc 100644 --- a/lib/src/phy/modem/demod_hard.c +++ b/lib/src/phy/modem/demod_hard.c @@ -44,6 +44,7 @@ int srslte_demod_hard_demodulate(srslte_demod_hard_t* q, cf_t* symbols, uint8_t int nbits=-1; switch(q->mod) { + case SRSLTE_MOD_LAST: case SRSLTE_MOD_BPSK: hard_bpsk_demod(symbols,bits,nsymbols); nbits=nsymbols; diff --git a/lib/src/phy/modem/modem_table.c b/lib/src/phy/modem/modem_table.c index c19e52e77..3c4ad2417 100644 --- a/lib/src/phy/modem/modem_table.c +++ b/lib/src/phy/modem/modem_table.c @@ -82,6 +82,7 @@ int srslte_modem_table_set(srslte_modem_table_t* q, cf_t* table, uint32_t nsymbo int srslte_modem_table_lte(srslte_modem_table_t* q, srslte_mod_t modulation) { srslte_modem_table_init(q); switch(modulation) { + case SRSLTE_MOD_LAST: case SRSLTE_MOD_BPSK: q->nbits_x_symbol = 1; q->nsymbols = 2; diff --git a/lib/src/phy/phch/dci.c b/lib/src/phy/phch/dci.c index 2daa7e10d..471429145 100644 --- a/lib/src/phy/phch/dci.c +++ b/lib/src/phy/phch/dci.c @@ -111,7 +111,7 @@ int srslte_dci_rar_to_ul_grant(srslte_dci_rar_grant_t *rar, uint32_t nof_prb, srslte_ra_type2_from_riv(riv, &ul_dci->type2_alloc.L_crb, &ul_dci->type2_alloc.RB_start, nof_prb, nof_prb); - if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant, 0)) { + if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant)) { return SRSLTE_ERROR; } @@ -177,7 +177,7 @@ int srslte_dci_msg_to_ul_grant(srslte_dci_msg_t *msg, uint32_t nof_prb, return ret; } - if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant, harq_pid)) { + if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant)) { return ret; } diff --git a/lib/src/phy/phch/ra.c b/lib/src/phy/phch/ra.c index be10c304c..913bd9548 100644 --- a/lib/src/phy/phch/ra.c +++ b/lib/src/phy/phch/ra.c @@ -185,108 +185,87 @@ int srslte_ra_ul_dci_to_grant_prb_allocation(srslte_ra_ul_dci_t *dci, srslte_ra_ } } -srslte_mod_t last_mod[8]; -uint32_t last_ul_tbs_idx[8]; -uint32_t last_dl_tbs[8]; -uint32_t last_dl_tbs2[8]; - -static int ul_dci_to_grant_mcs(srslte_ra_ul_dci_t *dci, srslte_ra_ul_grant_t *grant, uint32_t harq_pid) { - int tbs = -1; +static void ul_dci_to_grant_mcs(srslte_ra_ul_dci_t *dci, srslte_ra_ul_grant_t *grant) { // 8.6.2 First paragraph if (dci->mcs_idx <= 28) { /* Table 8.6.1-1 on 36.213 */ if (dci->mcs_idx < 11) { grant->mcs.mod = SRSLTE_MOD_QPSK; - tbs = srslte_ra_tbs_from_idx(dci->mcs_idx, grant->L_prb); - last_ul_tbs_idx[harq_pid%8] = dci->mcs_idx; + grant->mcs.tbs = srslte_ra_tbs_from_idx(dci->mcs_idx, grant->L_prb); } else if (dci->mcs_idx < 21) { grant->mcs.mod = SRSLTE_MOD_16QAM; - tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-1, grant->L_prb); - last_ul_tbs_idx[harq_pid%8] = dci->mcs_idx-1; + grant->mcs.tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-1, grant->L_prb); } else if (dci->mcs_idx < 29) { grant->mcs.mod = SRSLTE_MOD_64QAM; - tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-2, grant->L_prb); - last_ul_tbs_idx[harq_pid%8] = dci->mcs_idx-2; + grant->mcs.tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-2, grant->L_prb); } else { fprintf(stderr, "Invalid MCS index %d\n", dci->mcs_idx); } - last_mod[harq_pid%8] = grant->mcs.mod; } else if (dci->mcs_idx == 29 && dci->cqi_request && grant->L_prb <= 4) { // 8.6.1 and 8.6.2 36.213 second paragraph grant->mcs.mod = SRSLTE_MOD_QPSK; - tbs = srslte_ra_tbs_from_idx(last_ul_tbs_idx[harq_pid%8], grant->L_prb); - dci->rv_idx = 1; + grant->mcs.tbs = 0; + dci->rv_idx = 1; } else if (dci->mcs_idx >= 29) { - // Else use last TBS/Modulation and use mcs to obtain rv_idx - tbs = srslte_ra_tbs_from_idx(last_ul_tbs_idx[harq_pid%8], grant->L_prb); - grant->mcs.mod = last_mod[harq_pid%8]; + // Else use last TBS/Modulation and use mcs to obtain rv_idx + grant->mcs.tbs = -1; + grant->mcs.mod = SRSLTE_MOD_LAST; dci->rv_idx = dci->mcs_idx - 28; - DEBUG("TTI=%d, harq_pid=%d, mcs_idx=%d, tbs=%d, mod=%d, rv=%d\n", - harq_pid, harq_pid%8, dci->mcs_idx, tbs/8, grant->mcs.mod, dci->rv_idx); - } - if (tbs < 0) { - fprintf(stderr, "Error computing TBS\n"); - return SRSLTE_ERROR; - } else { - grant->mcs.tbs = (uint32_t) tbs; - return SRSLTE_SUCCESS; + DEBUG("mcs_idx=%d, tbs=%d, mod=%d, rv=%d\n", + dci->mcs_idx, grant->mcs.tbs/8, grant->mcs.mod, dci->rv_idx); } } -void srslte_ra_ul_grant_to_nbits(srslte_ra_ul_grant_t *grant, srslte_cp_t cp, uint32_t N_srs, srslte_ra_nbits_t *nbits) +void srslte_ra_ul_grant_to_nbits(srslte_ra_ul_grant_t *grant, srslte_cp_t cp, uint32_t N_srs, srslte_ra_nbits_t *nbits) { - nbits->nof_symb = 2*(SRSLTE_CP_NSYMB(cp)-1) - N_srs; + nbits->nof_symb = 2*(SRSLTE_CP_NSYMB(cp)-1) - N_srs; nbits->nof_re = nbits->nof_symb*grant->M_sc; nbits->nof_bits = nbits->nof_re * grant->Qm; } /** Compute PRB allocation for Uplink as defined in 8.1 and 8.4 of 36.213 */ -int srslte_ra_ul_dci_to_grant(srslte_ra_ul_dci_t *dci, uint32_t nof_prb, uint32_t n_rb_ho, srslte_ra_ul_grant_t *grant, - uint32_t harq_pid) +int srslte_ra_ul_dci_to_grant(srslte_ra_ul_dci_t *dci, uint32_t nof_prb, uint32_t n_rb_ho, srslte_ra_ul_grant_t *grant) { - - // Compute PRB allocation + + // Compute PRB allocation if (!srslte_ra_ul_dci_to_grant_prb_allocation(dci, grant, n_rb_ho, nof_prb)) { - - // Compute MCS - if (!ul_dci_to_grant_mcs(dci, grant, harq_pid)) { - - // Fill rest of grant structure - grant->mcs.idx = dci->mcs_idx; - grant->M_sc = grant->L_prb*SRSLTE_NRE; - grant->M_sc_init = grant->M_sc; // FIXME: What should M_sc_init be? - grant->Qm = srslte_mod_bits_x_symbol(grant->mcs.mod); - } else { - fprintf(stderr, "Error computing MCS\n"); - return SRSLTE_ERROR; - } + + // Compute MCS + ul_dci_to_grant_mcs(dci, grant); + + // Fill rest of grant structure + grant->mcs.idx = dci->mcs_idx; + grant->M_sc = grant->L_prb*SRSLTE_NRE; + grant->M_sc_init = grant->M_sc; // FIXME: What should M_sc_init be? + grant->Qm = srslte_mod_bits_x_symbol(grant->mcs.mod); + } else { printf("Error computing UL PRB allocation\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } return SRSLTE_SUCCESS; } -uint32_t srslte_ra_dl_approx_nof_re(srslte_cell_t cell, uint32_t nof_prb, uint32_t nof_ctrl_symbols) +uint32_t srslte_ra_dl_approx_nof_re(srslte_cell_t cell, uint32_t nof_prb, uint32_t nof_ctrl_symbols) { - uint32_t nof_refs = 0; + uint32_t nof_refs = 0; uint32_t nof_symb = 2*SRSLTE_CP_NSYMB(cell.cp)-nof_ctrl_symbols; switch(cell.nof_ports) { - case 1: - nof_refs = 2*3; - break; - case 2: - nof_refs = 4*3; - break; - case 4: - nof_refs = 4*4; - break; + case 1: + nof_refs = 2*3; + break; + case 2: + nof_refs = 4*3; + break; + case 4: + nof_refs = 4*4; + break; } return nof_prb * (nof_symb*SRSLTE_NRE-nof_refs); } /* Computes the number of RE for each PRB in the prb_dist structure */ -uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t cell, +uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t cell, uint32_t sf_idx, uint32_t nof_ctrl_symbols) { uint32_t j, s; @@ -300,7 +279,7 @@ uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t ce } } } - return nof_re; + return nof_re; } @@ -315,7 +294,7 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ uint32_t bitmask; uint32_t P = srslte_ra_type0_P(nof_prb); uint32_t n_rb_rbg_subset, n_rb_type1; - + bzero(grant, sizeof(srslte_ra_dl_grant_t)); switch (dci->alloc_type) { case SRSLTE_RA_ALLOC_TYPE0: @@ -352,14 +331,14 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ * P * P + dci->type1_alloc.rbg_subset * P + (i + shift) % P] = true; grant->nof_prb++; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } } memcpy(&grant->prb_idx[1], &grant->prb_idx[0], SRSLTE_MAX_PRB*sizeof(bool)); break; case SRSLTE_RA_ALLOC_TYPE2: - if (dci->type2_alloc.mode == SRSLTE_RA_TYPE2_LOC) { + if (dci->type2_alloc.mode == SRSLTE_RA_TYPE2_LOC) { for (i = 0; i < dci->type2_alloc.L_crb; i++) { grant->prb_idx[0][i + dci->type2_alloc.RB_start] = true; grant->nof_prb++; @@ -408,13 +387,13 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ if (n_tilde_prb_odd < nof_prb) { grant->prb_idx[0][n_tilde_prb_odd] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } else { if (n_tilde_prb_odd + N_gap - N_tilde_vrb / 2 < nof_prb) { grant->prb_idx[0][n_tilde_prb_odd + N_gap - N_tilde_vrb / 2] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } grant->nof_prb++; @@ -422,13 +401,13 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ if(n_tilde_prb_even < nof_prb) { grant->prb_idx[1][n_tilde_prb_even] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } else { if (n_tilde_prb_even + N_gap - N_tilde_vrb / 2 < nof_prb) { grant->prb_idx[1][n_tilde_prb_even + N_gap - N_tilde_vrb / 2] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } } @@ -442,8 +421,7 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ } int srslte_dl_fill_ra_mcs(srslte_ra_mcs_t *mcs, uint32_t nprb) { - uint32_t i_tbs = 0; - int tbs = -1; + int i_tbs = 0; if (mcs->idx < 10) { mcs->mod = SRSLTE_MOD_QPSK; i_tbs = mcs->idx; @@ -455,30 +433,26 @@ int srslte_dl_fill_ra_mcs(srslte_ra_mcs_t *mcs, uint32_t nprb) { i_tbs = mcs->idx-2; } else if (mcs->idx == 29) { mcs->mod = SRSLTE_MOD_QPSK; - tbs = 0; - i_tbs = 0; + i_tbs = -1; } else if (mcs->idx == 30) { mcs->mod = SRSLTE_MOD_16QAM; - tbs = 0; - i_tbs = 0; + i_tbs = -1; } else if (mcs->idx == 31) { mcs->mod = SRSLTE_MOD_64QAM; - tbs = 0; - i_tbs = 0; + i_tbs = -1; } - - if (tbs == -1) { + + int tbs = -1; + if (i_tbs >= 0) { tbs = srslte_ra_tbs_from_idx(i_tbs, nprb); - if (tbs >= 0) { - mcs->tbs = tbs; - } - } - return tbs; + mcs->tbs = tbs; + } + return tbs; } int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { - uint32_t i_tbs = 0; - int tbs = -1; + uint32_t i_tbs = 0; + int tbs = -1; if (mcs->idx < 5) { mcs->mod = SRSLTE_MOD_QPSK; i_tbs = mcs->idx*2; @@ -492,7 +466,7 @@ int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { mcs->mod = SRSLTE_MOD_64QAM; i_tbs = mcs->idx + 5; }else if (mcs->idx < 28) { - //mcs->mod = SRSLTE_MOD_256QAM; + //mcs->mod = SRSLTE_MOD_256QAM; i_tbs = mcs->idx + 5; }else if (mcs->idx == 28) { mcs->mod = SRSLTE_MOD_QPSK; @@ -511,15 +485,15 @@ int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { tbs = 0; i_tbs = 0; } - - + + if (tbs == -1) { tbs = srslte_ra_tbs_from_idx(i_tbs, nprb); if (tbs >= 0) { - mcs->tbs = tbs; + mcs->tbs = tbs; } - } - return tbs; + } + return tbs; } /* Modulation order and transport block size determination 7.1.7 in 36.213 @@ -530,9 +504,9 @@ int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { * */ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *grant, bool crc_is_crnti) { uint32_t n_prb=0; - int tbs = -1; - uint32_t i_tbs = 0; - + int tbs = -1; + uint32_t i_tbs = 0; + if (!crc_is_crnti) { if (dci->dci_is_1a) { n_prb = dci->type2_alloc.n_prb1a == SRSLTE_RA_TYPE2_NPRB1A_2 ? 2 : 3; @@ -546,35 +520,23 @@ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *gr } } else { fprintf(stderr, "Error decoding DCI: P/SI/RA-RNTI supports Format1A/1C only\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } grant->mcs[0].mod = SRSLTE_MOD_QPSK; grant->mcs[0].tbs = (uint32_t) tbs; } else { n_prb = grant->nof_prb; - grant->nof_tb = 0; + grant->nof_tb = 0; if (dci->tb_en[0]) { grant->mcs[0].idx = dci->mcs_idx; - tbs = srslte_dl_fill_ra_mcs(&grant->mcs[0], n_prb); - if (tbs) { - last_dl_tbs[dci->harq_process%8] = tbs; - } else { - // For mcs>=29, set last TBS received for this PID - grant->mcs[0].tbs = last_dl_tbs[dci->harq_process%8]; - } + grant->mcs[0].tbs = srslte_dl_fill_ra_mcs(&grant->mcs[0], n_prb); grant->nof_tb++; } else { grant->mcs[0].tbs = 0; } if (dci->tb_en[1]) { grant->mcs[1].idx = dci->mcs_idx_1; - tbs = srslte_dl_fill_ra_mcs(&grant->mcs[1], n_prb); - if (tbs) { - last_dl_tbs2[dci->harq_process%8] = tbs; - } else { - // For mcs>=29, set last TBS received for this PID - grant->mcs[1].tbs = last_dl_tbs2[dci->harq_process%8]; - } + grant->mcs[1].tbs = srslte_dl_fill_ra_mcs(&grant->mcs[1], n_prb); } else { grant->mcs[1].tbs = 0; } @@ -587,7 +549,7 @@ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *gr } grant->pinfo = dci->pinfo; - if (tbs < 0) { + if (grant->mcs[0].tbs < 0 || grant->mcs[1].tbs < 0) { return SRSLTE_ERROR; } else { return SRSLTE_SUCCESS; @@ -622,10 +584,12 @@ int srslte_ra_dl_dci_to_grant(srslte_ra_dl_dci_t *dci, if (msg_rnti >= SRSLTE_CRNTI_START && msg_rnti <= SRSLTE_CRNTI_END) { crc_is_crnti = true; } - // Compute PRB allocation - if (!srslte_ra_dl_dci_to_grant_prb_allocation(dci, grant, nof_prb)) { - // Compute MCS - if (!dl_dci_to_grant_mcs(dci, grant, crc_is_crnti)) { + // Compute PRB allocation + int ret =srslte_ra_dl_dci_to_grant_prb_allocation(dci, grant, nof_prb); + if (!ret) { + // Compute MCS + ret = dl_dci_to_grant_mcs(dci, grant, crc_is_crnti); + if (ret == SRSLTE_SUCCESS) { // Apply Section 7.1.7.3. If RA-RNTI and Format1C rv_idx=0 if (msg_rnti >= SRSLTE_RARNTI_START && msg_rnti <= SRSLTE_RARNTI_END && dci->dci_is_1c) @@ -907,4 +871,4 @@ void srslte_ra_prb_fprint(FILE *f, srslte_ra_dl_grant_t *grant) { } } -} \ No newline at end of file +} diff --git a/lib/src/phy/phch/test/pbch_file_test.c b/lib/src/phy/phch/test/pbch_file_test.c index 734640d55..2ca12e4c9 100644 --- a/lib/src/phy/phch/test/pbch_file_test.c +++ b/lib/src/phy/phch/test/pbch_file_test.c @@ -140,7 +140,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -203,7 +203,7 @@ int main(int argc, char **argv) { if (nread > 0) { // process 1st subframe only - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); /* Get channel estimates for each port */ srslte_chest_dl_estimate(&chest, fft_buffer, ce, 0); diff --git a/lib/src/phy/phch/test/pcfich_file_test.c b/lib/src/phy/phch/test/pcfich_file_test.c index dfb8d72e3..e92d6c7ba 100644 --- a/lib/src/phy/phch/test/pcfich_file_test.c +++ b/lib/src/phy/phch/test/pcfich_file_test.c @@ -120,15 +120,15 @@ int base_init() { fmatlab = NULL; } - flen = SRSLTE_SF_LEN(srslte_symbol_sz(cell.nof_prb)); + flen = SRSLTE_SF_LEN(srslte_symbol_sz_power2(cell.nof_prb)); - input_buffer = malloc(flen * sizeof(cf_t)); + input_buffer = srslte_vec_malloc(flen * sizeof(cf_t)); if (!input_buffer) { perror("malloc"); exit(-1); } - fft_buffer = malloc(SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp) * sizeof(cf_t)); + fft_buffer = srslte_vec_malloc(SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp) * sizeof(cf_t)); if (!fft_buffer) { perror("malloc"); return -1; @@ -151,7 +151,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -215,7 +215,7 @@ int main(int argc, char **argv) { n = srslte_filesource_read(&fsrc, input_buffer, flen); - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); if (fmatlab) { fprintf(fmatlab, "infft="); diff --git a/lib/src/phy/phch/test/pdcch_file_test.c b/lib/src/phy/phch/test/pdcch_file_test.c index d4ceed4b6..5482d9f98 100644 --- a/lib/src/phy/phch/test/pdcch_file_test.c +++ b/lib/src/phy/phch/test/pdcch_file_test.c @@ -126,7 +126,7 @@ int base_init() { exit(-1); } - flen = 2 * (SRSLTE_SLOT_LEN(srslte_symbol_sz(cell.nof_prb))); + flen = 2 * (SRSLTE_SLOT_LEN(srslte_symbol_sz_power2(cell.nof_prb))); input_buffer = malloc(flen * sizeof(cf_t)); if (!input_buffer) { @@ -157,7 +157,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -231,7 +231,7 @@ int main(int argc, char **argv) { INFO("Reading %d samples sub-frame %d\n", flen, frame_cnt); - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); /* Get channel estimates for each port */ srslte_chest_dl_estimate(&chest, fft_buffer, ce, frame_cnt %10); diff --git a/lib/src/phy/phch/test/pdsch_pdcch_file_test.c b/lib/src/phy/phch/test/pdsch_pdcch_file_test.c index 90c0e1c17..0faf7eca1 100644 --- a/lib/src/phy/phch/test/pdsch_pdcch_file_test.c +++ b/lib/src/phy/phch/test/pdsch_pdcch_file_test.c @@ -129,7 +129,7 @@ int base_init() { exit(-1); } - flen = 2 * (SRSLTE_SLOT_LEN(srslte_symbol_sz(cell.nof_prb))); + flen = SRSLTE_SF_LEN(srslte_symbol_sz_power2(cell.nof_prb)); input_buffer[0] = malloc(flen * sizeof(cf_t)); if (!input_buffer[0]) { @@ -137,7 +137,7 @@ int base_init() { exit(-1); } - if (srslte_ue_dl_init(&ue_dl, cell.nof_prb, 1)) { + if (srslte_ue_dl_init(&ue_dl, input_buffer, cell.nof_prb, 1)) { fprintf(stderr, "Error initializing UE DL\n"); return -1; } diff --git a/lib/src/phy/phch/test/phich_file_test.c b/lib/src/phy/phch/test/phich_file_test.c index d7078f933..65f7ce9c0 100644 --- a/lib/src/phy/phch/test/phich_file_test.c +++ b/lib/src/phy/phch/test/phich_file_test.c @@ -144,7 +144,7 @@ int base_init() { fmatlab = NULL; } - flen = SRSLTE_SF_LEN(srslte_symbol_sz(cell.nof_prb)); + flen = SRSLTE_SF_LEN(srslte_symbol_sz_power2(cell.nof_prb)); input_buffer = malloc(flen * sizeof(cf_t)); if (!input_buffer) { @@ -175,7 +175,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -242,7 +242,7 @@ int main(int argc, char **argv) { n = srslte_filesource_read(&fsrc, input_buffer, flen); - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); if (fmatlab) { fprintf(fmatlab, "infft="); diff --git a/lib/src/phy/phch/test/pmch_file_test.c b/lib/src/phy/phch/test/pmch_file_test.c index ac66072fa..6586b2ee9 100644 --- a/lib/src/phy/phch/test/pmch_file_test.c +++ b/lib/src/phy/phch/test/pmch_file_test.c @@ -140,7 +140,7 @@ int base_init() { exit(-1); } - if (srslte_ue_dl_init(&ue_dl, cell.nof_prb, 1)) { + if (srslte_ue_dl_init(&ue_dl, input_buffer, cell.nof_prb, 1)) { fprintf(stderr, "Error initializing UE DL\n"); return -1; } diff --git a/lib/src/phy/phch/test/pmch_test.c b/lib/src/phy/phch/test/pmch_test.c index a187ca2bb..a9c29ef64 100644 --- a/lib/src/phy/phch/test/pmch_test.c +++ b/lib/src/phy/phch/test/pmch_test.c @@ -139,7 +139,7 @@ cf_t *tx_slot_symbols[SRSLTE_MAX_PORTS]; cf_t *rx_slot_symbols[SRSLTE_MAX_PORTS]; srslte_pmch_t pmch_tx, pmch_rx; srslte_pdsch_cfg_t pmch_cfg; -srslte_ofdm_t ifft_mbsfn, fft_mbsfn; +srslte_ofdm_t ifft_mbsfn[SRSLTE_MAX_PORTS], fft_mbsfn[SRSLTE_MAX_PORTS]; int main(int argc, char **argv) { uint32_t i, j, k; @@ -169,10 +169,10 @@ int main(int argc, char **argv) { grant.tb_en[1] = false; grant.nof_tb = 1; grant.mcs[0].idx = mcs_idx; - + grant.nof_prb = cell.nof_prb; grant.sf_type = SRSLTE_SF_MBSFN; - + srslte_dl_fill_ra_mcs(&grant.mcs[0], cell.nof_prb); grant.Qm[0] = srslte_mod_bits_x_symbol(grant.mcs[0].mod); for(int i = 0; i < 2; i++){ @@ -181,41 +181,6 @@ int main(int argc, char **argv) { } } - - -#ifdef DO_OFDM - - if (srslte_ofdm_tx_init_mbsfn(&ifft_mbsfn, SRSLTE_CP_EXT, cell.nof_prb)) { - fprintf(stderr, "Error creating iFFT object\n"); - exit(-1); - } - if (srslte_ofdm_rx_init_mbsfn(&fft_mbsfn, SRSLTE_CP_EXT, cell.nof_prb)) { - fprintf(stderr, "Error creating iFFT object\n"); - exit(-1); - } - - srslte_ofdm_set_non_mbsfn_region(&ifft_mbsfn, non_mbsfn_region); - srslte_ofdm_set_non_mbsfn_region(&fft_mbsfn, non_mbsfn_region); - srslte_ofdm_set_normalize(&ifft_mbsfn, true); - srslte_ofdm_set_normalize(&fft_mbsfn, true); - - - for (i = 0; i < cell.nof_ports; i++) { - tx_sf_symbols[i] = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN_PRB(cell.nof_prb)); - } - - for (i = 0; i < nof_rx_antennas; i++) { - rx_sf_symbols[i] = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN_PRB(cell.nof_prb)); - } -#endif /* DO_OFDM */ - - /* Configure PDSCH */ - - if (srslte_pmch_cfg(&pmch_cfg, cell, &grant, cfi, subframe)) { - fprintf(stderr, "Error configuring PMCH\n"); - exit(-1); - } - /* init memory */ for (i=0;idftp_input, input, input_fft); if (ce) { - srslte_vec_div_ccc(&input_fft[q->fft_size/2-SRSLTE_SSS_N], ce, ce_mod, - &input_fft[q->fft_size/2-SRSLTE_SSS_N], z_real, z_imag, - 2*SRSLTE_SSS_N); + srslte_vec_div_ccc(&input_fft[q->fft_size/2-SRSLTE_SSS_N], ce, + &input_fft[q->fft_size/2-SRSLTE_SSS_N], 2*SRSLTE_SSS_N); } for (int i = 0; i < SRSLTE_SSS_N; i++) { diff --git a/lib/src/phy/sync/test/sync_test.c b/lib/src/phy/sync/test/sync_test.c index 8715b316e..1e3951884 100644 --- a/lib/src/phy/sync/test/sync_test.c +++ b/lib/src/phy/sync/test/sync_test.c @@ -108,8 +108,8 @@ int main(int argc, char **argv) { perror("malloc"); exit(-1); } - - if (srslte_ofdm_tx_init(&ifft, cp, nof_prb)) { + + if (srslte_ofdm_tx_init(&ifft, cp, buffer, fft_buffer, nof_prb)) { fprintf(stderr, "Error creating iFFT object\n"); exit(-1); } @@ -150,8 +150,14 @@ int main(int argc, char **argv) { /* Transform to OFDM symbols */ memset(fft_buffer, 0, sizeof(cf_t) * FLEN); - srslte_ofdm_tx_sf(&ifft, buffer, &fft_buffer[offset]); + srslte_ofdm_tx_sf(&ifft); + /* Apply sample offset */ + for (int i = 0; i < FLEN; i++) { + fft_buffer[FLEN - i - 1 + offset] = fft_buffer[FLEN - i - 1]; + } + bzero(fft_buffer, sizeof(cf_t) * offset); + if (srslte_sync_find(&syncobj, fft_buffer, 0, &find_idx) < 0) { fprintf(stderr, "Error running srslte_sync_find\n"); exit(-1); diff --git a/lib/src/phy/ue/ue_dl.c b/lib/src/phy/ue/ue_dl.c index c4e2d3f6c..930fe2623 100644 --- a/lib/src/phy/ue/ue_dl.c +++ b/lib/src/phy/ue/ue_dl.c @@ -53,6 +53,7 @@ static srslte_dci_format_t common_formats[] = {SRSLTE_DCI_FORMAT1A,SRSLTE_DCI_FO const uint32_t nof_common_formats = 2; int srslte_ue_dl_init(srslte_ue_dl_t *q, + cf_t *in_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb, uint32_t nof_rx_antennas) { @@ -73,12 +74,35 @@ int srslte_ue_dl_init(srslte_ue_dl_t *q, q->sample_offset = 0; q->nof_rx_antennas = nof_rx_antennas; - if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { - fprintf(stderr, "Error initiating FFT\n"); - goto clean_exit; + for (int j = 0; j < SRSLTE_MAX_PORTS; j++) { + q->sf_symbols_m[j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); + if (!q->sf_symbols_m[j]) { + perror("malloc"); + goto clean_exit; + } + for (uint32_t i=0;ice_m[i][j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); + if (!q->ce_m[i][j]) { + perror("malloc"); + goto clean_exit; + } + bzero(q->ce_m[i][j], MAX_SFLEN_RE * sizeof(cf_t)); + } + } + + q->sf_symbols = q->sf_symbols_m[0]; + for (int i=0;ice[i] = q->ce_m[i][0]; + } + + for (int i = 0; i < nof_rx_antennas; i++) { + if (srslte_ofdm_rx_init(&q->fft[i], SRSLTE_CP_NORM, in_buffer[i], q->sf_symbols_m[i], max_prb)) { + fprintf(stderr, "Error initiating FFT\n"); + goto clean_exit; + } } - - if (srslte_ofdm_rx_init_mbsfn(&q->fft_mbsfn, SRSLTE_CP_EXT, max_prb)) { + + if (srslte_ofdm_rx_init_mbsfn(&q->fft_mbsfn, SRSLTE_CP_EXT, in_buffer[0], q->sf_symbols_m[0], max_prb)) { fprintf(stderr, "Error initiating FFT for MBSFN subframes \n"); goto clean_exit; } @@ -127,28 +151,7 @@ int srslte_ue_dl_init(srslte_ue_dl_t *q, fprintf(stderr, "Error initiating SFO correct\n"); goto clean_exit; } - srslte_cfo_set_tol(&q->sfo_correct, 1e-5f/q->fft.symbol_sz); - - for (int j = 0; j < SRSLTE_MAX_PORTS; j++) { - q->sf_symbols_m[j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); - if (!q->sf_symbols_m[j]) { - perror("malloc"); - goto clean_exit; - } - for (uint32_t i=0;ice_m[i][j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); - if (!q->ce_m[i][j]) { - perror("malloc"); - goto clean_exit; - } - bzero(q->ce_m[i][j], MAX_SFLEN_RE * sizeof(cf_t)); - } - } - - q->sf_symbols = q->sf_symbols_m[0]; - for (int i=0;ice[i] = q->ce_m[i][0]; - } + srslte_cfo_set_tol(&q->sfo_correct, 1e-5f/q->fft[0].symbol_sz); ret = SRSLTE_SUCCESS; } else { @@ -164,7 +167,9 @@ clean_exit: void srslte_ue_dl_free(srslte_ue_dl_t *q) { if (q) { - srslte_ofdm_rx_free(&q->fft); + for (int port = 0; port < SRSLTE_MAX_PORTS; port++) { + srslte_ofdm_rx_free(&q->fft[port]); + } srslte_ofdm_rx_free(&q->fft_mbsfn); srslte_chest_dl_free(&q->chest); srslte_regs_free(&q->regs); @@ -219,10 +224,12 @@ int srslte_ue_dl_set_cell(srslte_ue_dl_t *q, srslte_cell_t cell) fprintf(stderr, "Error resizing SFO correct\n"); return SRSLTE_ERROR; } - srslte_cfo_set_tol(&q->sfo_correct, 1e-5/q->fft.symbol_sz); - if (srslte_ofdm_rx_set_prb(&q->fft, q->cell.cp, q->cell.nof_prb)) { - fprintf(stderr, "Error resizing FFT\n"); - return SRSLTE_ERROR; + srslte_cfo_set_tol(&q->sfo_correct, 1e-5f/q->fft[0].symbol_sz); + for (int port = 0; port < q->nof_rx_antennas; port++) { + if (srslte_ofdm_rx_set_prb(&q->fft[port], q->cell.cp, q->cell.nof_prb)) { + fprintf(stderr, "Error resizing FFT\n"); + return SRSLTE_ERROR; + } } if (srslte_chest_dl_set_cell(&q->chest, q->cell)) { fprintf(stderr, "Error resizing channel estimator\n"); @@ -339,9 +346,9 @@ int srslte_ue_dl_decode_fft_estimate_mbsfn(srslte_ue_dl_t *q, cf_t *input[SRSLTE /* Run FFT for all subframe data */ for (int j=0;jnof_rx_antennas;j++) { if(sf_type == SRSLTE_SF_MBSFN ) { - srslte_ofdm_rx_sf(&q->fft_mbsfn, input[j], q->sf_symbols_m[j]); + srslte_ofdm_rx_sf(&q->fft_mbsfn); }else{ - srslte_ofdm_rx_sf(&q->fft, input[j], q->sf_symbols_m[j]); + srslte_ofdm_rx_sf(&q->fft[j]); } /* Correct SFO multiplying by complex exponential in the time domain */ @@ -351,7 +358,7 @@ int srslte_ue_dl_decode_fft_estimate_mbsfn(srslte_ue_dl_t *q, cf_t *input[SRSLTE srslte_cfo_correct(&q->sfo_correct, &q->sf_symbols_m[j][i*q->cell.nof_prb*SRSLTE_NRE], &q->sf_symbols_m[j][i*q->cell.nof_prb*SRSLTE_NRE], - q->sample_offset / q->fft.symbol_sz); + q->sample_offset / q->fft[j].symbol_sz); } } } @@ -603,7 +610,8 @@ int srslte_ue_dl_decode_mbsfn(srslte_ue_dl_t * q, grant.sf_type = SRSLTE_SF_MBSFN; grant.nof_tb = 1; grant.mcs[0].idx = 2; - + grant.tb_en[0] = true; + grant.tb_en[1] = false; grant.nof_prb = q->pmch.cell.nof_prb; srslte_dl_fill_ra_mcs(&grant.mcs[0], grant.nof_prb); srslte_softbuffer_rx_reset_tbs(q->softbuffers[0], (uint32_t) grant.mcs[0].tbs); diff --git a/lib/src/phy/ue/ue_mib.c b/lib/src/phy/ue/ue_mib.c index 46a470ab8..c003a2ba3 100644 --- a/lib/src/phy/ue/ue_mib.c +++ b/lib/src/phy/ue/ue_mib.c @@ -35,7 +35,8 @@ #include "srslte/phy/utils/debug.h" #include "srslte/phy/utils/vector.h" -int srslte_ue_mib_init(srslte_ue_mib_t * q, +int srslte_ue_mib_init(srslte_ue_mib_t * q, + cf_t *in_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -65,7 +66,7 @@ int srslte_ue_mib_init(srslte_ue_mib_t * q, } } - if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { + if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, in_buffer[0], q->sf_symbols, max_prb)) { fprintf(stderr, "Error initializing FFT\n"); goto clean_exit; } @@ -143,14 +144,14 @@ void srslte_ue_mib_reset(srslte_ue_mib_t * q) srslte_pbch_decode_reset(&q->pbch); } -int srslte_ue_mib_decode(srslte_ue_mib_t * q, cf_t *input, +int srslte_ue_mib_decode(srslte_ue_mib_t * q, uint8_t bch_payload[SRSLTE_BCH_PAYLOAD_LEN], uint32_t *nof_tx_ports, int *sfn_offset) { int ret = SRSLTE_SUCCESS; cf_t *ce_slot1[SRSLTE_MAX_PORTS]; /* Run FFT for the slot symbols */ - srslte_ofdm_rx_sf(&q->fft, input, q->sf_symbols); + srslte_ofdm_rx_sf(&q->fft); /* Get channel estimates of sf idx #0 for each port */ ret = srslte_chest_dl_estimate(&q->chest, q->sf_symbols, q->ce, 0); @@ -198,7 +199,7 @@ int srslte_ue_mib_sync_init_multi(srslte_ue_mib_sync_t *q, } q->nof_rx_antennas = nof_rx_antennas; - if (srslte_ue_mib_init(&q->ue_mib, SRSLTE_UE_MIB_NOF_PRB)) { + if (srslte_ue_mib_init(&q->ue_mib, q->sf_buffer, SRSLTE_UE_MIB_NOF_PRB)) { fprintf(stderr, "Error initiating ue_mib\n"); return SRSLTE_ERROR; } @@ -274,7 +275,7 @@ int srslte_ue_mib_sync_decode(srslte_ue_mib_sync_t * q, return -1; } else if (srslte_ue_sync_get_sfidx(&q->ue_sync) == 0) { if (ret == 1) { - mib_ret = srslte_ue_mib_decode(&q->ue_mib, q->sf_buffer[0], bch_payload, nof_tx_ports, sfn_offset); + mib_ret = srslte_ue_mib_decode(&q->ue_mib, bch_payload, nof_tx_ports, sfn_offset); } else { DEBUG("Resetting PBCH decoder after %d frames\n", q->ue_mib.frame_cnt); srslte_ue_mib_reset(&q->ue_mib); diff --git a/lib/src/phy/ue/ue_sync.c b/lib/src/phy/ue/ue_sync.c index f121c5bac..e9c02ca8b 100644 --- a/lib/src/phy/ue/ue_sync.c +++ b/lib/src/phy/ue/ue_sync.c @@ -359,6 +359,11 @@ uint32_t srslte_ue_sync_peak_idx(srslte_ue_sync_t *q) { return q->peak_idx; } +void srslte_ue_sync_set_cfo_ema(srslte_ue_sync_t *q, float ema) { + srslte_sync_set_cfo_ema_alpha(&q->sfind, ema); + srslte_sync_set_cfo_ema_alpha(&q->strack, ema); +} + srslte_ue_sync_state_t srslte_ue_sync_get_state(srslte_ue_sync_t *q) { return q->state; } diff --git a/lib/src/phy/ue/ue_ul.c b/lib/src/phy/ue/ue_ul.c index 37dfecd93..853937f7c 100644 --- a/lib/src/phy/ue/ue_ul.c +++ b/lib/src/phy/ue/ue_ul.c @@ -41,6 +41,7 @@ #define DEFAULT_CFO_TOL 50.0 // Hz int srslte_ue_ul_init(srslte_ue_ul_t *q, + cf_t *out_buffer, uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -50,8 +51,14 @@ int srslte_ue_ul_init(srslte_ue_ul_t *q, ret = SRSLTE_ERROR; bzero(q, sizeof(srslte_ue_ul_t)); - - if (srslte_ofdm_tx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { + + q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_PRB(max_prb) * sizeof(cf_t)); + if (!q->sf_symbols) { + perror("malloc"); + goto clean_exit; + } + + if (srslte_ofdm_tx_init(&q->fft, SRSLTE_CP_NORM, q->sf_symbols, out_buffer, max_prb)) { fprintf(stderr, "Error initiating FFT\n"); goto clean_exit; } @@ -83,11 +90,6 @@ int srslte_ue_ul_init(srslte_ue_ul_t *q, fprintf(stderr, "Error initiating srslte_refsignal_ul\n"); goto clean_exit; } - q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_PRB(max_prb) * sizeof(cf_t)); - if (!q->sf_symbols) { - perror("malloc"); - goto clean_exit; - } q->refsignal = srslte_vec_malloc(2 * SRSLTE_NRE * max_prb * sizeof(cf_t)); if (!q->refsignal) { perror("malloc"); @@ -347,7 +349,7 @@ int srslte_ue_ul_pucch_encode(srslte_ue_ul_t *q, srslte_uci_data_t uci_data, q->last_pucch_format = format; - srslte_ofdm_tx_sf(&q->fft, q->sf_symbols, output_signal); + srslte_ofdm_tx_sf(&q->fft); if (q->cfo_en) { srslte_cfo_correct(&q->cfo, output_signal, output_signal, q->current_cfo / srslte_symbol_sz(q->cell.nof_prb)); @@ -417,7 +419,7 @@ int srslte_ue_ul_srs_encode(srslte_ue_ul_t *q, uint32_t tti, cf_t *output_signal } } - srslte_ofdm_tx_sf(&q->fft, q->sf_symbols, output_signal); + srslte_ofdm_tx_sf(&q->fft); if (q->cfo_en) { srslte_cfo_correct(&q->cfo, output_signal, output_signal, q->current_cfo / srslte_symbol_sz(q->cell.nof_prb)); @@ -486,7 +488,7 @@ int srslte_ue_ul_pusch_encode_rnti_softbuffer(srslte_ue_ul_t *q, } } - srslte_ofdm_tx_sf(&q->fft, q->sf_symbols, output_signal); + srslte_ofdm_tx_sf(&q->fft); if (q->cfo_en) { srslte_cfo_correct(&q->cfo, output_signal, output_signal, q->current_cfo / srslte_symbol_sz(q->cell.nof_prb)); diff --git a/lib/src/phy/utils/bit.c b/lib/src/phy/utils/bit.c index 9ef53c35a..b1ae383a6 100644 --- a/lib/src/phy/utils/bit.c +++ b/lib/src/phy/utils/bit.c @@ -31,6 +31,12 @@ #include #include +#ifdef LV_HAVE_SSE + +#include + +#endif /* LV_HAVE_SSE */ + #include "srslte/phy/utils/bit.h" void srslte_bit_interleave(uint8_t *input, uint8_t *output, uint16_t *interleaver, uint32_t nof_bits) { @@ -53,6 +59,125 @@ void srslte_bit_interleave_w_offset(uint8_t *input, uint8_t *output, uint16_t *i } w_offset_p=8-w_offset; } +#ifdef LV_HAVE_SSE + __m64 m64mask = _mm_setr_pi8((uint8_t) 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1); + __m128i m128mask = _mm_set1_epi64(m64mask); + + union { + uint8_t v[8]; + __m64 m64; + } a, b, c; + + union { + __m128i m128; + uint16_t u16[8]; + uint8_t u8[16]; + struct { + __m64 reg_a; + __m64 reg_b; + } m64; + struct { + uint16_t i0, i1, i2, i3, i4, i5, i6, i7; + } v; + } ipx, epx, ipx2, epx2, b128, a128, c128; + + uint32_t i = st; + for (; i < (nof_bits / 8 - 1); i += 2) { + ipx.m128 = _mm_loadu_si128((__m128i *) (interleaver + (i * 8) - w_offset_p)); + epx.m128 = _mm_shuffle_epi8(ipx.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + ipx2.m128 = _mm_loadu_si128((__m128i *) (interleaver + ((i + 1) * 8) - w_offset_p)); + epx2.m128 = _mm_shuffle_epi8(ipx2.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + + epx.m64.reg_b = epx2.m64.reg_a; + + b128.m128 = _mm_and_si128(epx.m128, _mm_set1_epi8(0x7)); + b128.m128 = _mm_shuffle_epi8(m128mask, b128.m128); + + ipx.m128 = _mm_srli_epi16(ipx.m128, 3); + ipx2.m128 = _mm_srli_epi16(ipx2.m128, 3); + + a128.m128 = _mm_set_epi8(input[ipx2.v.i0], + input[ipx2.v.i1], + input[ipx2.v.i2], + input[ipx2.v.i3], + input[ipx2.v.i4], + input[ipx2.v.i5], + input[ipx2.v.i6], + input[ipx2.v.i7], + input[ipx.v.i0], + input[ipx.v.i1], + input[ipx.v.i2], + input[ipx.v.i3], + input[ipx.v.i4], + input[ipx.v.i5], + input[ipx.v.i6], + input[ipx.v.i7]); + + c128.m128 = _mm_cmpeq_epi8(_mm_and_si128(a128.m128, b128.m128), b128.m128); + uint16_t o = (uint16_t) _mm_movemask_epi8(c128.m128); + *((uint16_t *) (output + i)) = o; + } + + for (; i < nof_bits / 8; i++) { + ipx.m128 = _mm_loadu_si128((__m128i *) (interleaver + i * 8 - w_offset_p)); + epx.m128 = _mm_shuffle_epi8(ipx.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + b.m64 = _mm_and_si64(epx.m64.reg_a, _mm_set1_pi8(0x7)); + b.m64 = _mm_shuffle_pi8(m64mask, b.m64); + + ipx.m128 = _mm_srli_epi16(ipx.m128, 3); + + a.m64 = _mm_set_pi8(input[ipx.v.i0], + input[ipx.v.i1], + input[ipx.v.i2], + input[ipx.v.i3], + input[ipx.v.i4], + input[ipx.v.i5], + input[ipx.v.i6], + input[ipx.v.i7]); + + c.m64 = _mm_cmpeq_pi8(_mm_and_si64(a.m64, b.m64), b.m64); + output[i] = (uint8_t) _mm_movemask_pi8(c.m64); + } + +#if 0 + /* THIS PIECE OF CODE IS FOR CHECKING SIMD BEHAVIOUR. DO NOT ENABLE. */ + uint8_t *output2 = malloc(nof_bits/8); + for (i=st;i #include +#include #include "srslte/phy/utils/mat.h" diff --git a/lib/src/phy/utils/test/CMakeLists.txt b/lib/src/phy/utils/test/CMakeLists.txt index 4dccbf2a0..1f5c66827 100644 --- a/lib/src/phy/utils/test/CMakeLists.txt +++ b/lib/src/phy/utils/test/CMakeLists.txt @@ -42,3 +42,7 @@ target_link_libraries(algebra_test srslte_phy) add_test(algebra_2x2_zf_solver_test algebra_test -z) add_test(algebra_2x2_mmse_solver_test algebra_test -m) + +add_executable(vector_test vector_test.c) +target_link_libraries(vector_test srslte_phy) +add_test(vector_test vector_test) diff --git a/lib/src/phy/utils/test/mat_test.c b/lib/src/phy/utils/test/mat_test.c index 49be5c9ae..0bfb482a9 100644 --- a/lib/src/phy/utils/test/mat_test.c +++ b/lib/src/phy/utils/test/mat_test.c @@ -29,16 +29,21 @@ #include #include #include -#include #include #include "srslte/phy/utils/mat.h" +#include "srslte/phy/utils/simd.h" +#include "srslte/phy/utils/vector.h" bool zf_solver = false; bool mmse_solver = false; bool verbose = false; +#define RANDOM_F() ((float)rand())/((float)RAND_MAX) +#define RANDOM_S() ((int16_t)(rand() && 0x800F)) +#define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F()) + double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { if (ts_end->tv_usec > ts_start->tv_usec) { return ((double) ts_end->tv_sec - (double) ts_start->tv_sec) * 1000000 + @@ -49,16 +54,16 @@ double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { } } -#define NOF_REPETITIONS 1000 +#define BLOCK_SIZE 1000 #define RUN_TEST(FUNCTION) /*TYPE NAME (void)*/ { \ int i;\ struct timeval start, end;\ gettimeofday(&start, NULL); \ bool ret = true; \ - for (i = 0; i < NOF_REPETITIONS; i++) {ret &= FUNCTION ();}\ + for (i = 0; i < BLOCK_SIZE; i++) {ret &= FUNCTION ();}\ gettimeofday(&end, NULL);\ if (verbose) printf("%32s: %s ... %6.2f us/call\n", #FUNCTION, (ret)?"Pass":"Fail", \ - elapsed_us(&start, &end)/NOF_REPETITIONS);\ + elapsed_us(&start, &end)/BLOCK_SIZE);\ passed &= ret;\ } @@ -373,6 +378,24 @@ bool test_mmse_solver_avx(void) { #endif /* LV_HAVE_AVX */ +bool test_vec_dot_prod_ccc(void) { + __attribute__((aligned(256))) cf_t a[14]; + __attribute__((aligned(256))) cf_t b[14]; + cf_t res = 0, gold = 0; + + for (int i = 0; i < 14; i++) { + a[i] = RANDOM_CF(); + b[i] = RANDOM_CF(); + } + + res = srslte_vec_dot_prod_ccc(a, b, 14); + + for (int i=0;i<14;i++) { + gold += a[i]*b[i]; + } + + return (cabsf(res - gold) < 1e-3); +} int main(int argc, char **argv) { bool passed = true; @@ -405,6 +428,8 @@ int main(int argc, char **argv) { #endif /* LV_HAVE_AVX */ } + RUN_TEST(test_vec_dot_prod_ccc); + printf("%s!\n", (passed) ? "Ok" : "Failed"); if (!passed) { diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c new file mode 100644 index 000000000..4ebed9862 --- /dev/null +++ b/lib/src/phy/utils/test/vector_test.c @@ -0,0 +1,822 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "srslte/phy/utils/mat.h" +#include "srslte/phy/utils/simd.h" +#include "srslte/phy/utils/vector.h" + + +bool zf_solver = false; +bool mmse_solver = false; +bool verbose = false; + +#define MAX_MSE (1e-3) +#define NOF_REPETITIONS (1024) +#define MAX_FUNCTIONS (64) +#define MAX_BLOCKS (16) + +#define RANDOM_F() ((float)rand())/((float)RAND_MAX) +#define RANDOM_S() ((int16_t)(rand() && 0x800F)) +#define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F()) + +#define TEST_CALL(TEST_CODE) gettimeofday(&start, NULL);\ + for (int i = 0; i < NOF_REPETITIONS; i++){TEST_CODE;}\ + gettimeofday(&end, NULL); \ + *timing = elapsed_us(&start, &end); + +#define TEST(X, CODE) static bool test_##X (char *func_name, double *timing, uint32_t block_size) {\ + struct timeval start, end;\ + float mse = 0.0f;\ + bool passed;\ + strncpy(func_name, #X, 32);\ + CODE;\ + passed = (mse < MAX_MSE);\ + printf("%32s (%5d) ... %7.1f MSamp/s ... %3s Passed (%.6f)\n", func_name, block_size, \ + (double) block_size*NOF_REPETITIONS/ *timing, passed?"":"Not", mse);\ + return passed;\ +} + +#define MALLOC(TYPE, NAME) TYPE *NAME = srslte_vec_malloc(sizeof(TYPE)*block_size) + + +static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { + if (ts_end->tv_usec > ts_start->tv_usec) { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec) * 1000000 + + (double) ts_end->tv_usec - (double) ts_start->tv_usec; + } else { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec - 1) * 1000000 + + ((double) ts_end->tv_usec + 1000000) - (double) ts_start->tv_usec; + } +} + +float squared_error (cf_t a, cf_t b) { + float diff_re = __real__ a - __real__ b; + float diff_im = __imag__ a - __imag__ b; + return diff_re*diff_re + diff_im*diff_im; +} + +TEST(srslte_vec_acc_ff, + MALLOC(float, x); + float z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + } + + TEST_CALL(z = srslte_vec_acc_ff(x, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i]; + } + + mse += fabs(gold - z) / gold; + + free(x); +) + +TEST(srslte_vec_dot_prod_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + int16_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(z = srslte_vec_dot_prod_sss(x, y, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i] * y[i]; + } + + mse += cabsf(gold - z) / cabsf(gold); + + free(x); + free(y); +) + +TEST(srslte_vec_sum_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + MALLOC(int16_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(srslte_vec_sum_sss(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] + y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_sub_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + MALLOC(int16_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(srslte_vec_sub_sss(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] - y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_prod_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + MALLOC(int16_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(srslte_vec_prod_sss(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_acc_cc, + MALLOC(cf_t, x); + cf_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + } + + TEST_CALL(z = srslte_vec_acc_cc(x, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i]; + } + + mse += cabsf(gold - z)/cabsf(gold); + + free(x); +) + + +TEST(srslte_vec_sum_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_sum_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] + y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); +) + +TEST(srslte_vec_sub_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_sub_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] - y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); +) + +TEST(srslte_vec_dot_prod_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + cf_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(z = srslte_vec_dot_prod_ccc(x, y, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i] * y[i]; + } + + mse = cabsf(gold - z) / cabsf(gold); + + free(x); + free(y); +) + +TEST(srslte_vec_dot_prod_conj_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + cf_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(z = srslte_vec_dot_prod_conj_ccc(x, y, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i] * conjf(y[i]); + } + + mse = cabsf(gold - z) / cabsf(gold); + + free(x); + free(y); +) + +TEST(srslte_vec_prod_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_prod_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_prod_ccc_split, + MALLOC(float, x_re); + MALLOC(float, x_im); + MALLOC(float, y_re); + MALLOC(float, y_im); + MALLOC(float, z_re); + MALLOC(float, z_im); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x_re[i] = RANDOM_F(); + x_im[i] = RANDOM_F(); + y_re[i] = RANDOM_F(); + y_im[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_prod_ccc_split(x_re, x_im, y_re, y_im, z_re, z_im, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = (x_re[i] + I * x_im[i]) * (y_re[i] + I * y_im[i]); + mse += cabsf(gold - (z_re[i] + I*z_im[i])); + } + + free(x_re); + free(x_im); + free(y_re); + free(y_im); + free(z_re); + free(z_im); +) + +TEST(srslte_vec_prod_conj_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_prod_conj_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * conjf(y[i]); + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_sc_prod_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, z); + cf_t y = RANDOM_CF(); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_sc_prod_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_convert_fi, + MALLOC(float, x); + MALLOC(short, z); + float scale = 1000.0f; + + short gold; + for (int i = 0; i < block_size; i++) { + x[i] = (float) RANDOM_F(); + } + + TEST_CALL(srslte_vec_convert_fi(x, z, scale, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = (short) ((x[i] * scale)); + mse += cabsf((float)gold - (float) z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_prod_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_prod_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_prod_cfc, + MALLOC(cf_t, x); + MALLOC(float, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_prod_cfc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_sc_prod_fff, + MALLOC(float, x); + MALLOC(float, z); + float y = RANDOM_F(); + + float gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_sc_prod_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_abs_cf, + MALLOC(cf_t, x); + MALLOC(float, z); + float gold; + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_abs_cf(x, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = sqrtf(crealf(x[i]) * crealf(x[i]) + cimagf(x[i])*cimagf(x[i])); + mse += cabsf(gold - z[i])/block_size; + } + + free(x); + free(z); +) + +TEST(srslte_vec_abs_square_cf, + MALLOC(cf_t, x); + MALLOC(float, z); + float gold; + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_abs_square_cf(x, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = crealf(x[i]) * crealf(x[i]) + cimagf(x[i])*cimagf(x[i]); + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_sc_prod_cfc, + MALLOC(cf_t, x); + MALLOC(cf_t, z); + cf_t gold; + float h = RANDOM_F(); + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_sc_prod_cfc(x, h, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * h; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_div_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_div_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] / y[i]; + mse += cabsf(gold - z[i]); + } + mse /= block_size; + + free(x); + free(y); + free(z); +) + + +TEST(srslte_vec_div_cfc, + MALLOC(cf_t, x); + MALLOC(float, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_div_cfc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] / y[i]; + mse += cabsf(gold - z[i])/cabsf(gold); + } + mse /= block_size; + + free(x); + free(y); + free(z); +) + + +TEST(srslte_vec_div_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_div_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] / y[i]; + mse += cabsf(gold - z[i]); + } + mse /= block_size; + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_max_fi, + MALLOC(float, x); + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + } + + uint32_t max_index = 0; + TEST_CALL(max_index = srslte_vec_max_fi(x, block_size);) + + float gold_value = -INFINITY; + uint32_t gold_index = 0; + for (int i = 0; i < block_size; i++) { + if (gold_value < x[i]) { + gold_value = x[i]; + gold_index = i; + } + } + mse = (gold_index != max_index) ? 1:0; + + free(x); +) + +TEST(srslte_vec_max_abs_ci, + MALLOC(cf_t, x); + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + uint32_t max_index = 0; + TEST_CALL(max_index = srslte_vec_max_abs_ci(x, block_size);) + + float gold_value = -INFINITY; + uint32_t gold_index = 0; + for (int i = 0; i < block_size; i++) { + cf_t a = x[i]; + float abs2 = __real__ a * __real__ a + __imag__ a * __imag__ a; + if (abs2 > gold_value) { + gold_value = abs2; + gold_index = (uint32_t)i; + } + } + mse = (gold_index != max_index) ? 1:0; + + free(x); +) + +int main(int argc, char **argv) { + char func_names[MAX_FUNCTIONS][32]; + double timmings[MAX_FUNCTIONS][MAX_BLOCKS]; + uint32_t sizes[32]; + uint32_t size_count = 0; + uint32_t func_count = 0; + bool passed[MAX_FUNCTIONS][MAX_BLOCKS]; + bool all_passed = true; + + for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) { + func_count = 0; + + passed[func_count][size_count] = test_srslte_vec_acc_ff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_dot_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sum_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sub_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_acc_cc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sum_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sub_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_dot_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_dot_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_convert_fi(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_ccc_split(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sc_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sc_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_abs_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_abs_square_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sc_prod_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_div_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_div_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_div_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_max_fi(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_max_abs_ci(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + sizes[size_count] = block_size; + size_count++; + } + + char fname[68]; + FILE *f = NULL; + void * p = popen("(date +%g%m%d && hostname) | tr '\\r\\n' '__'", "r"); + if (p) { + fgets(fname, 64, p); + strncpy(fname + strnlen(fname, 64) - 1, ".tsv", 4); + f = fopen(fname, "w"); + if (f) printf("Saving benchmark results in '%s'\n", fname); + } + pclose(p); + + + printf("\n"); + printf("%32s |", "Subroutine/MSps"); + if (f) fprintf(f, "Subroutine/MSps Vs Vector size\t"); + for (int i = 0; i < size_count; i++) { + printf(" %7d", sizes[i]); + if (f) fprintf(f, "%d\t", sizes[i]); + } + printf(" |\n"); + if (f) fprintf(f, "\n"); + + for (int j = 0; j < 32; j++) { + printf("-"); + } + printf("-+-"); + for (int j = 0; j < size_count; j++) { + printf("--------"); + } + printf("-|\n"); + + for (int i = 0; i < func_count; i++) { + printf("%32s | ", func_names[i]); + if (f) fprintf(f, "%s\t", func_names[i]); + + for (int j = 0; j < size_count; j++) { + printf(" %s%7.1f\x1b[0m", (passed[i][j])?"":"\x1B[31m", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + if (f) fprintf(f, "%.1f\t", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + + all_passed &= passed[i][j]; + } + printf(" |\n"); + if (f) fprintf(f, "\n"); + } + + if (f) fclose(f); + + return (all_passed)?SRSLTE_SUCCESS:SRSLTE_ERROR; +} diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index d78f5d707..3bb7fb08f 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -36,48 +36,10 @@ #include "srslte/phy/utils/bit.h" -#ifdef LV_HAVE_SSE -#include -#endif - -#ifdef LV_HAVE_AVX -#include -#endif - - -#ifdef HAVE_VOLK -#include "volk/volk.h" -#endif - -#ifdef DEBUG_MODE -#warning FIXME: Disabling SSE/AVX vector code -#undef LV_HAVE_SSE -#undef LV_HAVE_AVX -#endif - - -int srslte_vec_acc_ii(int *x, uint32_t len) { - int i; - int z=0; - for (i=0;im) { - m=x[i]; - p=i; - } - } - return p; -#endif -#endif -} - -int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) { -#ifdef HAVE_VOLK_MAX_STAR_S_FUNCTION - int16_t target=0; - volk_16i_max_star_16i(&target,x,len); - return target; - -#else - uint32_t i; - int16_t m=-INT16_MIN; - for (i=0;im) { - m=x[i]; - } - } - return m; -#endif -} - -int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) { - uint32_t i; - int16_t m=-INT16_MIN; - for (i=0;im) { - m=abs(x[i]); - } - } - return m; + return srslte_vec_max_fi_simd(x, len); } -void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) { -#ifdef HAVE_VOLK_MAX_VEC_FUNCTION - volk_32f_x2_max_32f(z,x,y,len); -#else - uint32_t i; - for (i=0;i y[i]) { - z[i] = x[i]; - } else { - z[i] = y[i]; - } - } -#endif -} - - +// CP autocorr uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) { -#ifdef HAVE_VOLK_MAX_ABS_FUNCTION_32 - uint32_t target=0; - volk_32fc_index_max_32u(&target,x,len); - return target; -#else -#ifdef HAVE_VOLK_MAX_ABS_FUNCTION_16 - uint32_t target=0; - volk_32fc_index_max_16u(&target,x,len); - return target; -#else - uint32_t i; - float m=-FLT_MAX; - uint32_t p=0; - float tmp; - for (i=0;im) { - m=tmp; - p=i; - } - } - return p; -#endif -#endif + return srslte_vec_max_ci_simd(x, len); } void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len) { @@ -863,23 +393,5 @@ void srslte_vec_quant_suc(int16_t *in, uint8_t *out, float gain, int16_t offset, } void srs_vec_cf_cpy(cf_t *dst, cf_t *src, int len) { - int i = 0; - -#ifdef LV_HAVE_AVX - for (; i < len - 3; i += 4) { - _mm256_store_ps((float *) &dst[i], _mm256_load_ps((float *) &src[i])); - } -#endif /* LV_HAVE_AVX */ -#ifdef LV_HAVE_SSE - for (; i < len - 1; i += 2) { - _mm_store_ps((float *) &dst[i], _mm_load_ps((float *) &src[i])); - } - for (; i < len; i++) { - ((__m64*) dst)[i] = ((__m64*) src)[i]; - } -#else - for (; i < len; i++) { - dst[i] = src[i]; - } -#endif /* LV_HAVE_SSE */ + srslte_vec_cp_simd(dst, src, len); } diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 7be4afc44..ab281a653 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -25,475 +25,1080 @@ */ -#include #include #include #include #include +#include +#include +#include #include "srslte/phy/utils/vector_simd.h" +#include "srslte/phy/utils/simd.h" -#include -#include -#ifdef LV_HAVE_SSE -#include -#endif +int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len) { + int i = 0; + int result = 0; +#if SRSLTE_SIMD_S_SIZE + simd_s_t simd_dotProdVal = srslte_simd_s_zero(); + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); -#ifdef LV_HAVE_AVX -#include -#endif + simd_s_t z = srslte_simd_s_mul(a, b); + simd_dotProdVal = srslte_simd_s_add(simd_dotProdVal, z); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); -int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len) -{ - int result = 0; -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 8; + simd_s_t z = srslte_simd_s_mul(a, b); - const __m128i* xPtr = (const __m128i*) x; - const __m128i* yPtr = (const __m128i*) y; - - __m128i dotProdVal = _mm_setzero_si128(); + simd_dotProdVal = srslte_simd_s_add(simd_dotProdVal, z); + } + } + __attribute__ ((aligned (SRSLTE_SIMD_S_SIZE*2))) short dotProdVector[SRSLTE_SIMD_S_SIZE]; + srslte_simd_s_store(dotProdVector, simd_dotProdVal); + for (int k = 0; k < SRSLTE_SIMD_S_SIZE; k++) { + result += dotProdVector[k]; + } +#endif /* SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + result += (x[i] * y[i]); + } - __m128i xVal, yVal, zVal; - for(;number < points; number++){ + return result; +} - xVal = _mm_load_si128(xPtr); - yVal = _mm_loadu_si128(yPtr); +void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { + int i = 0; +#if SRSLTE_SIMD_S_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); - zVal = _mm_mullo_epi16(xVal, yVal); + simd_s_t r = srslte_simd_s_add(a, b); + + srslte_simd_s_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); - dotProdVal = _mm_add_epi16(dotProdVal, zVal); + simd_s_t r = srslte_simd_s_add(a, b); - xPtr ++; - yPtr ++; + srslte_simd_s_storeu(&z[i], r); + } } - - short dotProdVector[8]; - _mm_store_si128((__m128i*) dotProdVector, dotProdVal); - for (int i=0;i<8;i++) { - result += dotProdVector[i]; +#endif /* SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + z[i] = x[i] + y[i]; } +} + +void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { + int i = 0; +#if SRSLTE_SIMD_S_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); - number = points * 8; - for(;number < len; number++){ - result += (x[number] * y[number]); + simd_s_t r = srslte_simd_s_sub(a, b); + + srslte_simd_s_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); + + simd_s_t r = srslte_simd_s_sub(a, b); + + srslte_simd_s_storeu(&z[i], r); + } + } +#endif /* SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + z[i] = x[i] - y[i]; } - -#endif - return result; } +void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { + int i = 0; +#if SRSLTE_SIMD_S_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); -int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len) -{ - int result = 0; -#ifdef LV_HAVE_AVX2 - unsigned int number = 0; - const unsigned int points = len / 16; + simd_s_t r = srslte_simd_s_mul(a, b); - const __m256i* xPtr = (const __m256i*) x; - const __m256i* yPtr = (const __m256i*) y; - - __m256i dotProdVal = _mm256_setzero_si256(); + srslte_simd_s_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); - __m256i xVal, yVal, zVal; - for(;number < points; number++){ + simd_s_t r = srslte_simd_s_mul(a, b); - xVal = _mm256_load_si256(xPtr); - yVal = _mm256_loadu_si256(yPtr); - zVal = _mm256_mullo_epi16(xVal, yVal); - dotProdVal = _mm256_add_epi16(dotProdVal, zVal); - xPtr ++; - yPtr ++; + srslte_simd_s_storeu(&z[i], r); + } } - - __attribute__ ((aligned (256))) short dotProdVector[16]; - _mm256_store_si256((__m256i*) dotProdVector, dotProdVal); - for (int i=0;i<16;i++) { - result += dotProdVector[i]; +#endif /* SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + z[i] = x[i] * y[i]; } +} - number = points * 16; - for(;number < len; number++){ - result += (x[number] * y[number]); +/* No improvement with AVX */ +void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, int len) { + int i = 0; +#ifdef LV_HAVE_SSE +#if CMAKE_BUILD_TYPE!=Debug + + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(lut)) { + for (; i < len - 7; i += 8) { + __m128i xVal = _mm_load_si128((__m128i *) &x[i]); + __m128i lutVal = _mm_load_si128((__m128i *) &lut[i]); + + for (int k = 0; k < 8; k++) { + int16_t x = (int16_t) _mm_extract_epi16(xVal, k); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, k); + y[l] = (short) x; + } + } + } else { + for (; i < len - 7; i += 8) { + __m128i xVal = _mm_loadu_si128((__m128i *) &x[i]); + __m128i lutVal = _mm_loadu_si128((__m128i *) &lut[i]); + + for (int k = 0; k < 8; k++) { + int16_t x = (int16_t) _mm_extract_epi16(xVal, k); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, k); + y[l] = (short) x; + } + } } - #endif - return result; +#endif + + for (; i < len; i++) { + y[lut[i]] = x[i]; + } } +/* Modified from volk_32f_s32f_convert_16i_a_simd2. Removed clipping */ +void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, int len) { + int i = 0; +#if SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE + simd_f_t s = srslte_simd_f_set1(scale); + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_f_t a = srslte_simd_f_load(&x[i]); + simd_f_t b = srslte_simd_f_load(&x[i + SRSLTE_SIMD_F_SIZE]); -void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len) -{ -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 8; + simd_f_t sa = srslte_simd_f_mul(a, s); + simd_f_t sb = srslte_simd_f_mul(b, s); + + simd_s_t i16 = srslte_simd_convert_2f_s(sa, sb); + + srslte_simd_s_store(&z[i], i16); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_f_t a = srslte_simd_f_loadu(&x[i]); + simd_f_t b = srslte_simd_f_loadu(&x[i + SRSLTE_SIMD_F_SIZE]); - const __m128i* xPtr = (const __m128i*) x; - const __m128i* yPtr = (const __m128i*) y; - __m128i* zPtr = (__m128i*) z; + simd_f_t sa = srslte_simd_f_mul(a, s); + simd_f_t sb = srslte_simd_f_mul(b, s); - __m128i xVal, yVal, zVal; - for(;number < points; number++){ + simd_s_t i16 = srslte_simd_convert_2f_s(sa, sb); - xVal = _mm_load_si128(xPtr); - yVal = _mm_load_si128(yPtr); + srslte_simd_s_storeu(&z[i], i16); + } + } +#endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + z[i] = (int16_t) (x[i] * scale); + } +} - zVal = _mm_add_epi16(xVal, yVal); +float srslte_vec_acc_ff_simd(float *x, int len) { + int i = 0; + float acc_sum = 0.0f; - _mm_store_si128(zPtr, zVal); +#if SRSLTE_SIMD_F_SIZE + simd_f_t simd_sum = srslte_simd_f_zero(); - xPtr ++; - yPtr ++; - zPtr ++; + if (SRSLTE_IS_ALIGNED(x)) { + for (; i < len - SRSLTE_SIMD_F_SIZE + 1; i += SRSLTE_SIMD_F_SIZE) { + simd_f_t a = srslte_simd_f_load(&x[i]); + + simd_sum = srslte_simd_f_add(simd_sum, a); + } + } else { + for (; i < len - SRSLTE_SIMD_F_SIZE + 1; i += SRSLTE_SIMD_F_SIZE) { + simd_f_t a = srslte_simd_f_loadu(&x[i]); + + simd_sum = srslte_simd_f_add(simd_sum, a); + } } - number = points * 8; - for(;number < len; number++){ - z[number] = x[number] + y[number]; + __attribute__((aligned(SRSLTE_SIMD_F_SIZE*4))) float sum[SRSLTE_SIMD_F_SIZE]; + srslte_simd_f_store(sum, simd_sum); + for (int k = 0; k < SRSLTE_SIMD_F_SIZE; k++) { + acc_sum += sum[k]; } #endif + for (; i max_value) { + max_value = values_buffer[k]; + max_index = (uint32_t) indexes_buffer[k]; + } + } +#endif /* SRSLTE_SIMD_I_SIZE */ + + for (; i < len; i++) { + if (x[i] > max_value) { + max_value = x[i]; + max_index = (uint32_t)i; + } + } + + return max_index; +} + +uint32_t srslte_vec_max_ci_simd(cf_t *x, int len) { + int i = 0; + + float max_value = -INFINITY; + uint32_t max_index = 0; + +#if SRSLTE_SIMD_I_SIZE + __attribute__ ((aligned (SRSLTE_SIMD_I_SIZE*sizeof(int)))) int indexes_buffer[SRSLTE_SIMD_I_SIZE] = {0}; + __attribute__ ((aligned (SRSLTE_SIMD_I_SIZE*sizeof(float)))) float values_buffer[SRSLTE_SIMD_I_SIZE] = {0}; + + for (int k = 0; k < SRSLTE_SIMD_I_SIZE; k++) indexes_buffer[k] = k; + simd_i_t simd_inc = srslte_simd_i_set1(SRSLTE_SIMD_I_SIZE); + simd_i_t simd_indexes = srslte_simd_i_load(indexes_buffer); + simd_i_t simd_max_indexes = srslte_simd_i_set1(0); + + simd_f_t simd_max_values = srslte_simd_f_set1(-INFINITY); + + if (SRSLTE_IS_ALIGNED(x)) { + for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { + simd_f_t x1 = srslte_simd_f_load((float *) &x[i]); + simd_f_t x2 = srslte_simd_f_load((float *) &x[i + SRSLTE_SIMD_F_SIZE / 2]); + + simd_f_t mul1 = srslte_simd_f_mul(x1, x1); + simd_f_t mul2 = srslte_simd_f_mul(x2, x2); + + simd_f_t z1 = srslte_simd_f_hadd(mul1, mul2); + + simd_sel_t res = srslte_simd_f_max(z1, simd_max_values); + + simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); + simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) z1, res); + simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc); + } + } else { + for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { + simd_f_t x1 = srslte_simd_f_loadu((float *) &x[i]); + simd_f_t x2 = srslte_simd_f_loadu((float *) &x[i + SRSLTE_SIMD_F_SIZE / 2]); + + simd_f_t mul1 = srslte_simd_f_mul(x1, x1); + simd_f_t mul2 = srslte_simd_f_mul(x2, x2); + + simd_f_t z1 = srslte_simd_f_hadd(mul1, mul2); + + simd_sel_t res = srslte_simd_f_max(z1, simd_max_values); + + simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); + simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) z1, res); + simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc); + } + } + + srslte_simd_i_store(indexes_buffer, simd_max_indexes); + srslte_simd_f_store(values_buffer, simd_max_values); + + for (int k = 0; k < SRSLTE_SIMD_I_SIZE; k++) { + if (values_buffer[k] > max_value) { + max_value = values_buffer[k]; + max_index = (uint32_t) indexes_buffer[k]; + } + } +#endif /* SRSLTE_SIMD_I_SIZE */ + + for (; i < len; i++) { + cf_t a = x[i]; + float abs2 = __real__ a * __real__ a + __imag__ a * __imag__ a; + if (abs2 > max_value) { + max_value = abs2; + max_index = (uint32_t)i; + } + } + + return max_index; } diff --git a/lib/src/upper/rlc_um.cc b/lib/src/upper/rlc_um.cc index b2697178c..d95e186e7 100644 --- a/lib/src/upper/rlc_um.cc +++ b/lib/src/upper/rlc_um.cc @@ -277,7 +277,7 @@ int rlc_um::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) int head_len = rlc_um_packed_length(&header); int pdu_space = nof_bytes; - if(pdu_space <= head_len) + if(pdu_space <= head_len + 1) { log->warning("%s Cannot build a PDU - %d bytes available, %d bytes required for header\n", rrc->get_rb_name(lcid).c_str(), nof_bytes, head_len); @@ -309,7 +309,7 @@ int rlc_um::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) } // Pull SDUs from queue - while(pdu_space > head_len && tx_sdu_queue.size() > 0) + while(pdu_space > head_len + 1 && tx_sdu_queue.size() > 0) { log->debug("pdu_space=%d, head_len=%d\n", pdu_space, head_len); if(last_li > 0) diff --git a/lib/test/upper/rlc_um_test.cc b/lib/test/upper/rlc_um_test.cc index 0894a2a8b..8abcfae3c 100644 --- a/lib/test/upper/rlc_um_test.cc +++ b/lib/test/upper/rlc_um_test.cc @@ -123,7 +123,7 @@ void basic_test() byte_buffer_t pdu_bufs[NBUFS]; for(int i=0;i pending_ack; diff --git a/srsenb/hdr/phy/phch_worker.h b/srsenb/hdr/phy/phch_worker.h index 906e8b9d0..dd95aaa93 100644 --- a/srsenb/hdr/phy/phch_worker.h +++ b/srsenb/hdr/phy/phch_worker.h @@ -73,12 +73,12 @@ private: void work_imp(); - int encode_pdsch(srslte_enb_dl_pdsch_t *grants, uint32_t nof_grants, uint32_t sf_idx); - int decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, uint32_t tti_rx); - int encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks, uint32_t sf_idx); - int encode_pdcch_dl(srslte_enb_dl_pdsch_t *grants, uint32_t nof_grants, uint32_t sf_idx); - int encode_pdcch_ul(srslte_enb_ul_pusch_t *grants, uint32_t nof_grants, uint32_t sf_idx); - int decode_pucch(uint32_t tti_rx); + int encode_pdsch(srslte_enb_dl_pdsch_t *grants, uint32_t nof_grants); + int decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch); + int encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks); + int encode_pdcch_dl(srslte_enb_dl_pdsch_t *grants, uint32_t nof_grants); + int encode_pdcch_ul(srslte_enb_ul_pusch_t *grants, uint32_t nof_grants); + int decode_pucch(); /* Common objects */ @@ -87,10 +87,11 @@ private: bool initiated; bool running; - cf_t *signal_buffer_rx; - cf_t *signal_buffer_tx; - uint32_t tti_rx, tti_tx, tti_sched_ul, sf_rx, sf_tx, sf_sched_ul, tx_mutex_cnt; - + cf_t *signal_buffer_rx; + cf_t *signal_buffer_tx[SRSLTE_MAX_PORTS]; + uint32_t tti_rx, tti_tx_dl, tti_tx_ul; + uint32_t sf_rx, sf_tx, tx_mutex_cnt; + uint32_t t_rx, t_tx_dl, t_tx_ul; srslte_enb_dl_t enb_dl; srslte_enb_ul_t enb_ul; diff --git a/srsenb/src/mac/mac.cc b/srsenb/src/mac/mac.cc index 6c8458540..03e192034 100644 --- a/srsenb/src/mac/mac.cc +++ b/srsenb/src/mac/mac.cc @@ -406,7 +406,7 @@ int mac::get_dl_sched(uint32_t tti, dl_sched_t *dl_sched_res) log_h->step(tti); if (!started) { - return 0; + return 0; } if (!dl_sched_res) { @@ -601,7 +601,7 @@ int mac::get_ul_sched(uint32_t tti, ul_sched_t *ul_sched_res) ul_sched_res->nof_phich = sched_result.nof_phich_elems; return SRSLTE_SUCCESS; } - + void mac::tti_clock() { timers_thread.tti_clock(); diff --git a/srsenb/src/mac/scheduler.cc b/srsenb/src/mac/scheduler.cc index 801b68d38..a7e3d12d6 100644 --- a/srsenb/src/mac/scheduler.cc +++ b/srsenb/src/mac/scheduler.cc @@ -541,7 +541,7 @@ int sched::dl_sched_rar(dl_sched_rar_t rar[MAX_RAR_LIST]) pending_rar[j].rar_tti = 0; // Save UL resources - uint32_t pending_tti=(current_tti+6)%10; + uint32_t pending_tti=(current_tti+MSG3_DELAY_MS+HARQ_DELAY_MS)%10; pending_msg3[pending_tti].enabled = true; pending_msg3[pending_tti].rnti = pending_rar[j].rnti; pending_msg3[pending_tti].L = L_prb; @@ -678,17 +678,17 @@ int sched::ul_sched(uint32_t tti, srsenb::sched_interface::ul_sched_res_t* sched pthread_mutex_lock(&mutex); /* If dl_sched() not yet called this tti (this tti is +4ms advanced), reset CCE state */ - if ((current_tti+4)%10240 != tti) { + if (TTI_TX(current_tti) != tti) { bzero(used_cce, MAX_CCE*sizeof(bool)); } /* Initialize variables */ current_tti = tti; sfn = tti/10; - if (tti > 4) { - sf_idx = (tti-4)%10; + if (tti > HARQ_DELAY_MS) { + sf_idx = (tti-HARQ_DELAY_MS)%10; } else { - sf_idx = (tti+10240-4)%10; + sf_idx = (tti+10240-HARQ_DELAY_MS)%10; } int nof_dci_elems = 0; int nof_phich_elems = 0; diff --git a/srsenb/src/mac/scheduler_harq.cc b/srsenb/src/mac/scheduler_harq.cc index a6ae70d19..f5209b374 100644 --- a/srsenb/src/mac/scheduler_harq.cc +++ b/srsenb/src/mac/scheduler_harq.cc @@ -177,7 +177,7 @@ void dl_harq_proc::set_rbgmask(uint32_t new_mask) bool dl_harq_proc::has_pending_retx(uint32_t current_tti) { - return srslte_tti_interval(current_tti, tti) >= 8 && has_pending_retx_common(); + return srslte_tti_interval(current_tti, tti) >= (2*HARQ_DELAY_MS) && has_pending_retx_common(); } int dl_harq_proc::get_tbs() diff --git a/srsenb/src/mac/scheduler_metric.cc b/srsenb/src/mac/scheduler_metric.cc index 708ab2dd8..6c50009f7 100644 --- a/srsenb/src/mac/scheduler_metric.cc +++ b/srsenb/src/mac/scheduler_metric.cc @@ -142,8 +142,12 @@ dl_harq_proc* dl_metric_rr::get_user_allocation(sched_ue *user) dl_harq_proc *h = user->get_pending_dl_harq(current_tti); // Time-domain RR scheduling +#if ASYNC_DL_SCHED if (pending_data || h) { - if (nof_users_with_data) { +#else + if (pending_data || (h && !h->is_empty())) { +#endif + if (nof_users_with_data) { if (nof_users_with_data == 2) { } if ((current_tti%nof_users_with_data) != user->ue_idx) { @@ -153,7 +157,11 @@ dl_harq_proc* dl_metric_rr::get_user_allocation(sched_ue *user) } // Schedule retx if we have space +#if ASYNC_DL_SCHED if (h) { +#else + if (h && !h->is_empty()) { +#endif uint32_t retx_mask = h->get_rbgmask(); // If can schedule the same mask, do it if (!allocation_is_valid(retx_mask)) { @@ -170,10 +178,14 @@ dl_harq_proc* dl_metric_rr::get_user_allocation(sched_ue *user) } } } - // If could not schedule the reTx, or there wasn't any pending retx, find an empty PID + // If could not schedule the reTx, or there wasn't any pending retx, find an empty PID +#if ASYNC_DL_SCHED h = user->get_empty_dl_harq(); if (h) { - // Allocate resources based on pending data +#else + if (h && h->is_empty()) { +#endif + // Allocate resources based on pending data if (pending_data) { uint32_t pending_rb = user->get_required_prb_dl(pending_data, nof_ctrl_symbols); uint32_t newtx_mask = 0; diff --git a/srsenb/src/mac/scheduler_ue.cc b/srsenb/src/mac/scheduler_ue.cc index 004808976..94056b990 100644 --- a/srsenb/src/mac/scheduler_ue.cc +++ b/srsenb/src/mac/scheduler_ue.cc @@ -249,7 +249,7 @@ bool sched_ue::get_pucch_sched(uint32_t current_tti, uint32_t prb_idx[2]) // First check if it has pending ACKs for (int i=0;icell.nof_prb)*sizeof(cf_t)); - if (!signal_buffer_tx) { + bzero(&signal_buffer_tx, sizeof(cf_t *) * SRSLTE_MAX_PORTS); + signal_buffer_tx[0] = (cf_t*) srslte_vec_malloc(2*SRSLTE_SF_LEN_PRB(phy->cell.nof_prb)*sizeof(cf_t)); + if (!signal_buffer_tx[0]) { fprintf(stderr, "Error allocating memory\n"); return; } - if (srslte_enb_dl_init(&enb_dl, phy->cell.nof_prb)) { + if (srslte_enb_dl_init(&enb_dl, signal_buffer_tx, phy->cell.nof_prb)) { fprintf(stderr, "Error initiating ENB DL\n"); return; } @@ -106,7 +107,7 @@ void phch_worker::init(phch_common* phy_, srslte::log *log_h_) fprintf(stderr, "Error initiating ENB DL\n"); return; } - if (srslte_enb_ul_init(&enb_ul, phy->cell.nof_prb)) { + if (srslte_enb_ul_init(&enb_ul, signal_buffer_rx, phy->cell.nof_prb)) { fprintf(stderr, "Error initiating ENB UL\n"); return; } @@ -156,8 +157,10 @@ void phch_worker::stop() if (signal_buffer_rx) { free(signal_buffer_rx); } - if (signal_buffer_tx) { - free(signal_buffer_tx); + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + if (signal_buffer_tx[i]) { + free(signal_buffer_tx[i]); + } } pthread_mutex_unlock(&mutex); pthread_mutex_destroy(&mutex); @@ -176,11 +179,16 @@ cf_t* phch_worker::get_buffer_rx() void phch_worker::set_time(uint32_t tti_, uint32_t tx_mutex_cnt_, srslte_timestamp_t tx_time_) { tti_rx = tti_; - tti_tx = (tti_ + 4)%10240; - tti_sched_ul = (tti_ + 8)%10240; + tti_tx_dl = TTI_TX(tti_rx); + tti_tx_ul = TTI_RX_ACK(tti_rx); + sf_rx = tti_rx%10; - sf_tx = tti_tx%10; - sf_sched_ul = tti_sched_ul%10; + sf_tx = tti_tx_dl%10; + + t_tx_dl = TTIMOD(tti_tx_dl); + t_rx = TTIMOD(tti_rx); + t_tx_ul = TTIMOD(tti_tx_ul); + tx_mutex_cnt = tx_mutex_cnt_; memcpy(&tx_time, &tx_time_, sizeof(srslte_timestamp_t)); } @@ -189,16 +197,16 @@ int phch_worker::add_rnti(uint16_t rnti) { if (srslte_enb_dl_add_rnti(&enb_dl, rnti)) { - return -1; + return -1; } if (srslte_enb_ul_add_rnti(&enb_ul, rnti)) { - return -1; + return -1; } - // Create user - ue_db[rnti].rnti = rnti; - - return SRSLTE_SUCCESS; + // Create user + ue_db[rnti].rnti = rnti; + + return SRSLTE_SUCCESS; } @@ -206,174 +214,171 @@ uint32_t phch_worker::get_nof_rnti() { return ue_db.size(); } -void phch_worker::set_config_dedicated(uint16_t rnti, - srslte_uci_cfg_t *uci_cfg, +void phch_worker::set_config_dedicated(uint16_t rnti, + srslte_uci_cfg_t *uci_cfg, srslte_pucch_sched_t *pucch_sched, - srslte_refsignal_srs_cfg_t *srs_cfg, + srslte_refsignal_srs_cfg_t *srs_cfg, uint32_t I_sr, bool pucch_cqi, uint32_t pmi_idx, bool pucch_cqi_ack) { - pthread_mutex_lock(&mutex); + pthread_mutex_lock(&mutex); if (ue_db.count(rnti)) { pucch_sched->N_pucch_1 = phy->pucch_cfg.n1_pucch_an; srslte_enb_ul_cfg_ue(&enb_ul, rnti, uci_cfg, pucch_sched, srs_cfg); - - ue_db[rnti].I_sr = I_sr; + + ue_db[rnti].I_sr = I_sr; ue_db[rnti].I_sr_en = true; if (pucch_cqi) { - ue_db[rnti].pmi_idx = pmi_idx; - ue_db[rnti].cqi_en = true; - ue_db[rnti].pucch_cqi_ack = pucch_cqi_ack; + ue_db[rnti].pmi_idx = pmi_idx; + ue_db[rnti].cqi_en = true; + ue_db[rnti].pucch_cqi_ack = pucch_cqi_ack; } else { - ue_db[rnti].pmi_idx = 0; - ue_db[rnti].cqi_en = false; + ue_db[rnti].pmi_idx = 0; + ue_db[rnti].cqi_en = false; } - + } else { Error("Setting config dedicated: rnti=0x%x does not exist\n"); } - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&mutex); } void phch_worker::rem_rnti(uint16_t rnti) { - pthread_mutex_lock(&mutex); + pthread_mutex_lock(&mutex); if (ue_db.count(rnti)) { ue_db.erase(rnti); - - srslte_enb_dl_rem_rnti(&enb_dl, rnti); + + srslte_enb_dl_rem_rnti(&enb_dl, rnti); srslte_enb_ul_rem_rnti(&enb_ul, rnti); - - // remove any pending grant for each subframe - for (uint32_t i=0;i<10;i++) { + + // remove any pending grant for each subframe + for (uint32_t i=0;iul_grants[i].nof_grants;j++) { if (phy->ul_grants[i].sched_grants[j].rnti == rnti) { - phy->ul_grants[i].sched_grants[j].rnti = 0; + phy->ul_grants[i].sched_grants[j].rnti = 0; } } for (uint32_t j=0;jdl_grants[i].nof_grants;j++) { if (phy->dl_grants[i].sched_grants[j].rnti == rnti) { - phy->dl_grants[i].sched_grants[j].rnti = 0; + phy->dl_grants[i].sched_grants[j].rnti = 0; } } } } else { Error("Removing user: rnti=0x%x does not exist\n", rnti); } - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&mutex); } void phch_worker::work_imp() { - uint32_t sf_ack; - if (!running) { return; } pthread_mutex_lock(&mutex); - + mac_interface_phy::ul_sched_t *ul_grants = phy->ul_grants; - mac_interface_phy::dl_sched_t *dl_grants = phy->dl_grants; - mac_interface_phy *mac = phy->mac; - + mac_interface_phy::dl_sched_t *dl_grants = phy->dl_grants; + mac_interface_phy *mac = phy->mac; + log_h->step(tti_rx); - + Debug("Worker %d running\n", get_id()); - + for(std::map::iterator iter=ue_db.begin(); iter!=ue_db.end(); ++iter) { uint16_t rnti = (uint16_t) iter->first; - ue_db[rnti].has_grant_tti = -1; + ue_db[rnti].has_grant_tti = -1; } // Process UL signal srslte_enb_ul_fft(&enb_ul, signal_buffer_rx); // Decode pending UL grants for the tti they were scheduled - decode_pusch(ul_grants[sf_rx].sched_grants, ul_grants[sf_rx].nof_grants, sf_rx); - + decode_pusch(ul_grants[t_rx].sched_grants, ul_grants[t_rx].nof_grants); + // Decode remaining PUCCH ACKs not associated with PUSCH transmission and SR signals - decode_pucch(tti_rx); - + decode_pucch(); + // Get DL scheduling for the TX TTI from MAC - if (mac->get_dl_sched(tti_tx, &dl_grants[sf_tx]) < 0) { + if (mac->get_dl_sched(tti_tx_dl, &dl_grants[t_tx_dl]) < 0) { Error("Getting DL scheduling from MAC\n"); goto unlock; - } - - if (dl_grants[sf_tx].cfi < 1 || dl_grants[sf_tx].cfi > 3) { - Error("Invalid CFI=%d\n", dl_grants[sf_tx].cfi); + } + + if (dl_grants[t_tx_dl].cfi < 1 || dl_grants[t_tx_dl].cfi > 3) { + Error("Invalid CFI=%d\n", dl_grants[t_tx_dl].cfi); goto unlock; } - + // Get UL scheduling for the TX TTI from MAC - if (mac->get_ul_sched(tti_sched_ul, &ul_grants[sf_sched_ul]) < 0) { + if (mac->get_ul_sched(tti_tx_ul, &ul_grants[t_tx_ul]) < 0) { Error("Getting UL scheduling from MAC\n"); goto unlock; - } - + } + // Put base signals (references, PBCH, PCFICH and PSS/SSS) into the resource grid srslte_enb_dl_clear_sf(&enb_dl); - srslte_enb_dl_set_cfi(&enb_dl, dl_grants[sf_tx].cfi); - srslte_enb_dl_put_base(&enb_dl, tti_tx); + srslte_enb_dl_set_cfi(&enb_dl, dl_grants[t_tx_dl].cfi); + srslte_enb_dl_put_base(&enb_dl, tti_tx_dl); + + // Put UL/DL grants to resource grid. PDSCH data will be encoded as well. + encode_pdcch_dl(dl_grants[t_tx_dl].sched_grants, dl_grants[t_tx_dl].nof_grants); + encode_pdcch_ul(ul_grants[t_tx_ul].sched_grants, ul_grants[t_tx_ul].nof_grants); + encode_pdsch(dl_grants[t_tx_dl].sched_grants, dl_grants[t_tx_dl].nof_grants); - // Put UL/DL grants to resource grid. PDSCH data will be encoded as well. - encode_pdcch_dl(dl_grants[sf_tx].sched_grants, dl_grants[sf_tx].nof_grants, sf_tx); - encode_pdcch_ul(ul_grants[sf_sched_ul].sched_grants, ul_grants[sf_sched_ul].nof_grants, sf_tx); - encode_pdsch(dl_grants[sf_tx].sched_grants, dl_grants[sf_tx].nof_grants, sf_tx); - // Put pending PHICH HARQ ACK/NACK indications into subframe - encode_phich(ul_grants[sf_sched_ul].phich, ul_grants[sf_sched_ul].nof_phich, sf_tx); - - // Prepare for receive ACK for DL grants in sf_tx+4 - sf_ack = (sf_tx+4)%10; - phy->ack_clear(sf_ack); - for (uint32_t i=0;iack_clear(TTIMOD(TTI_TX(t_tx_dl))); + for (uint32_t i=0;i= SRSLTE_CRNTI_START && dl_grants[sf_tx].sched_grants[i].rnti <= SRSLTE_CRNTI_END) { - phy->ack_set_pending(sf_ack, dl_grants[sf_tx].sched_grants[i].rnti, dl_grants[sf_tx].sched_grants[i].location.ncce); + if (dl_grants[t_tx_dl].sched_grants[i].rnti >= SRSLTE_CRNTI_START && dl_grants[t_tx_dl].sched_grants[i].rnti <= SRSLTE_CRNTI_END) { + phy->ack_set_pending(TTIMOD(TTI_TX(t_tx_dl)), dl_grants[t_tx_dl].sched_grants[i].rnti, dl_grants[t_tx_dl].sched_grants[i].location.ncce); } } - + // Generate signal and transmit - srslte_enb_dl_gen_signal(&enb_dl, signal_buffer_tx); + srslte_enb_dl_gen_signal(&enb_dl); Debug("Sending to radio\n"); - phy->worker_end(tx_mutex_cnt, signal_buffer_tx, SRSLTE_SF_LEN_PRB(phy->cell.nof_prb), tx_time); + phy->worker_end(tx_mutex_cnt, signal_buffer_tx[0], SRSLTE_SF_LEN_PRB(phy->cell.nof_prb), tx_time); #ifdef DEBUG_WRITE_FILE fwrite(signal_buffer_tx, SRSLTE_SF_LEN_PRB(phy->cell.nof_prb)*sizeof(cf_t), 1, f); #endif #ifdef DEBUG_WRITE_FILE - if (tti_tx == 10) { + if (tti_tx_dl == 10) { fclose(f); exit(-1); } -#endif - +#endif + /* Tell the plotting thread to draw the plots */ #ifdef ENABLE_GUI if ((int) get_id() == plot_worker_id) { - sem_post(&plot_sem); + sem_post(&plot_sem); } #endif unlock: - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&mutex); } -int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, uint32_t tti) +int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) { - srslte_uci_data_t uci_data; + srslte_uci_data_t uci_data; bzero(&uci_data, sizeof(srslte_uci_data_t)); - - uint32_t wideband_cqi_value = 0; - - uint32_t n_rb_ho = 0; + + uint32_t wideband_cqi_value = 0; + + uint32_t n_rb_ho = 0; for (uint32_t i=0;iack_is_pending(sf_rx, rnti)) { - uci_data.uci_ack_len = 1; + if (phy->ack_is_pending(t_rx, rnti)) { + uci_data.uci_ack_len = 1; } - // Configure PUSCH CQI channel + // Configure PUSCH CQI channel srslte_cqi_value_t cqi_value; - bool cqi_enabled = false; + bool cqi_enabled = false; if (ue_db[rnti].cqi_en && srslte_cqi_send(ue_db[rnti].pmi_idx, tti_rx)) { cqi_value.type = SRSLTE_CQI_TYPE_WIDEBAND; - cqi_enabled = true; + cqi_enabled = true; } else if (grants[i].grant.cqi_request) { cqi_value.type = SRSLTE_CQI_TYPE_SUBBAND_HL; cqi_value.subband_hl.N = (phy->cell.nof_prb > 7) ? srslte_cqi_hl_get_no_subbands(phy->cell.nof_prb) : 0; - cqi_enabled = true; + cqi_enabled = true; } if (cqi_enabled) { uci_data.uci_cqi_len = srslte_cqi_size(&cqi_value); } - - // mark this tti as having an ul grant to avoid pucch - ue_db[rnti].has_grant_tti = tti_rx; - - srslte_ra_ul_grant_t phy_grant; + + // mark this tti as having an ul grant to avoid pucch + ue_db[rnti].has_grant_tti = tti_rx; + + srslte_ra_ul_grant_t phy_grant; int res = -1; - if (!srslte_ra_ul_dci_to_grant(&grants[i].grant, enb_ul.cell.nof_prb, n_rb_ho, &phy_grant, tti%8)) { + if (!srslte_ra_ul_dci_to_grant(&grants[i].grant, enb_ul.cell.nof_prb, n_rb_ho, &phy_grant)) { if (phy_grant.mcs.mod == SRSLTE_MOD_64QAM) { phy_grant.mcs.mod = SRSLTE_MOD_16QAM; } phy_grant.Qm = SRSLTE_MIN(phy_grant.Qm, 4); res = srslte_enb_ul_get_pusch(&enb_ul, &phy_grant, grants[i].softbuffer, - rnti, grants[i].rv_idx, - grants[i].current_tx_nb, - grants[i].data, - &uci_data, - tti); + rnti, grants[i].rv_idx, + grants[i].current_tx_nb, + grants[i].data, + &uci_data, + sf_rx); } else { Error("Computing PUSCH grant\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } - + #ifdef LOG_EXECTIME gettimeofday(&t[2], NULL); get_time_interval(t); snprintf(timestr, 64, ", dec_time=%4d us", (int) t[0].tv_usec); #endif - - bool crc_res = (res == 0); - + + bool crc_res = (res == 0); + // Save PHICH scheduling for this user. Each user can have just 1 PUSCH grant per TTI - ue_db[rnti].phich_info.n_prb_lowest = enb_ul.pusch_cfg.grant.n_prb_tilde[0]; - ue_db[rnti].phich_info.n_dmrs = phy_grant.ncs_dmrs; + ue_db[rnti].phich_info.n_prb_lowest = enb_ul.pusch_cfg.grant.n_prb_tilde[0]; + ue_db[rnti].phich_info.n_dmrs = phy_grant.ncs_dmrs; char cqi_str[64]; if (cqi_enabled) { @@ -444,8 +449,8 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, } snprintf(cqi_str, 64, ", cqi=%d", wideband_cqi_value); } - - float snr_db = 10*log10(srslte_chest_ul_get_snr(&enb_ul.chest)); + + float snr_db = 10*log10(srslte_chest_ul_get_snr(&enb_ul.chest)); /* if (!crc_res && enb_ul.pusch_cfg.grant.L_prb == 1 && enb_ul.pusch_cfg.grant.n_prb[0] == 0 && snr_db > 5) { @@ -454,8 +459,8 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, srslte_vec_save_file("d", enb_ul.pusch.d, sizeof(cf_t)*enb_ul.pusch_cfg.nbits.nof_re); srslte_vec_save_file("ce2", enb_ul.pusch.ce, sizeof(cf_t)*enb_ul.pusch_cfg.nbits.nof_re); srslte_vec_save_file("z", enb_ul.pusch.z, sizeof(cf_t)*enb_ul.pusch_cfg.nbits.nof_re); - printf("saved sf_idx=%d, mcs=%d, tbs=%d, rnti=%d, rv=%d, snr=%.1f\n", tti%10, - grants[i].grant.mcs_idx, enb_ul.pusch_cfg.cb_segm.tbs, rnti, grants[i].rv_idx, snr_db); + printf("saved sf_idx=%d, mcs=%d, tbs=%d, rnti=%d, rv=%d, snr=%.1f\n", tti%10, + grants[i].grant.mcs_idx, enb_ul.pusch_cfg.cb_segm.tbs, rnti, grants[i].rv_idx, snr_db); exit(-1); } */ @@ -463,204 +468,204 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, "PUSCH: rnti=0x%x, prb=(%d,%d), tbs=%d, mcs=%d, rv=%d, snr=%.1f dB, n_iter=%d, crc=%s%s%s%s\n", rnti, phy_grant.n_prb[0], phy_grant.n_prb[0]+phy_grant.L_prb, phy_grant.mcs.tbs/8, phy_grant.mcs.idx, grants[i].grant.rv_idx, - snr_db, + snr_db, srslte_pusch_last_noi(&enb_ul.pusch), crc_res?"OK":"KO", uci_data.uci_ack_len>0?(uci_data.uci_ack?", ack=1":", ack=0"):"", - uci_data.uci_cqi_len>0?cqi_str:"", - timestr); - - // Notify MAC of RL status + uci_data.uci_cqi_len>0?cqi_str:"", + timestr); + + // Notify MAC of RL status if (grants[i].grant.rv_idx == 0) { if (res && snr_db < PUSCH_RL_SNR_DB_TH) { Debug("PUSCH: Radio-Link failure snr=%.1f dB\n", snr_db); phy->mac->rl_failure(rnti); } else { phy->mac->rl_ok(rnti); - } + } } - + // Notify MAC new received data and HARQ Indication value - phy->mac->crc_info(tti_rx, rnti, phy_grant.mcs.tbs/8, crc_res); + phy->mac->crc_info(tti_rx, rnti, phy_grant.mcs.tbs/8, crc_res); if (uci_data.uci_ack_len) { phy->mac->ack_info(tti_rx, rnti, uci_data.uci_ack && (crc_res || snr_db > PUSCH_RL_SNR_DB_TH)); } - - // Notify MAC of UL SNR and DL CQI + + // Notify MAC of UL SNR and DL CQI if (snr_db >= PUSCH_RL_SNR_DB_TH) { phy->mac->snr_info(tti_rx, rnti, snr_db); } if (uci_data.uci_cqi_len>0 && crc_res) { phy->mac->cqi_info(tti_rx, rnti, wideband_cqi_value); } - - // Save metrics stats + + // Save metrics stats ue_db[rnti].metrics_ul(phy_grant.mcs.idx, 0, snr_db, srslte_pusch_last_noi(&enb_ul.pusch)); - } + } } - return SRSLTE_SUCCESS; + return SRSLTE_SUCCESS; } -int phch_worker::decode_pucch(uint32_t tti_rx) +int phch_worker::decode_pucch() { - uint32_t sf_rx = tti_rx%10; - srslte_uci_data_t uci_data; - + srslte_uci_data_t uci_data; + for(std::map::iterator iter=ue_db.begin(); iter!=ue_db.end(); ++iter) { uint16_t rnti = (uint16_t) iter->first; if (rnti >= SRSLTE_CRNTI_START && rnti <= SRSLTE_CRNTI_END && ue_db[rnti].has_grant_tti != (int) tti_rx) { - // Check if user needs to receive PUCCH - bool needs_pucch = false, needs_ack=false, needs_sr=false, needs_cqi=false; + // Check if user needs to receive PUCCH + bool needs_pucch = false, needs_ack=false, needs_sr=false, needs_cqi=false; uint32_t last_n_pdcch = 0; bzero(&uci_data, sizeof(srslte_uci_data_t)); - + if (ue_db[rnti].I_sr_en) { if (srslte_ue_ul_sr_send_tti(ue_db[rnti].I_sr, tti_rx)) { - needs_pucch = true; - needs_sr = true; - uci_data.scheduling_request = true; + needs_pucch = true; + needs_sr = true; + uci_data.scheduling_request = true; } - } - if (phy->ack_is_pending(sf_rx, rnti, &last_n_pdcch)) { - needs_pucch = true; - needs_ack = true; - uci_data.uci_ack_len = 1; + } + + if (phy->ack_is_pending(t_rx, rnti, &last_n_pdcch)) { + needs_pucch = true; + needs_ack = true; + uci_data.uci_ack_len = 1; } srslte_cqi_value_t cqi_value; if (ue_db[rnti].cqi_en && (ue_db[rnti].pucch_cqi_ack || !needs_ack)) { if (srslte_cqi_send(ue_db[rnti].pmi_idx, tti_rx)) { - needs_pucch = true; - needs_cqi = true; - cqi_value.type = SRSLTE_CQI_TYPE_WIDEBAND; + needs_pucch = true; + needs_cqi = true; + cqi_value.type = SRSLTE_CQI_TYPE_WIDEBAND; uci_data.uci_cqi_len = srslte_cqi_size(&cqi_value); } } - + if (needs_pucch) { if (srslte_enb_ul_get_pucch(&enb_ul, rnti, last_n_pdcch, sf_rx, &uci_data)) { fprintf(stderr, "Error getting PUCCH\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } if (uci_data.uci_ack_len > 0) { - phy->mac->ack_info(tti_rx, rnti, uci_data.uci_ack && (srslte_pucch_get_last_corr(&enb_ul.pucch) >= PUCCH_RL_CORR_TH)); + phy->mac->ack_info(tti_rx, rnti, uci_data.uci_ack && (srslte_pucch_get_last_corr(&enb_ul.pucch) >= PUCCH_RL_CORR_TH)); } if (uci_data.scheduling_request) { - phy->mac->sr_detected(tti_rx, rnti); + phy->mac->sr_detected(tti_rx, rnti); } - + char cqi_str[64]; if (uci_data.uci_cqi_len) { srslte_cqi_value_unpack(uci_data.uci_cqi, &cqi_value); phy->mac->cqi_info(tti_rx, rnti, cqi_value.wideband.wideband_cqi); sprintf(cqi_str, ", cqi=%d", cqi_value.wideband.wideband_cqi); } - log_h->info("PUCCH: rnti=0x%x, corr=%.2f, n_pucch=%d, n_prb=%d%s%s%s\n", - rnti, + log_h->info("PUCCH: rnti=0x%x, corr=%.2f, n_pucch=%d, n_prb=%d%s%s%s\n", + rnti, srslte_pucch_get_last_corr(&enb_ul.pucch), enb_ul.pucch.last_n_pucch, enb_ul.pucch.last_n_prb, - needs_ack?(uci_data.uci_ack?", ack=1":", ack=0"):"", - needs_sr?(uci_data.scheduling_request?", sr=yes":", sr=no"):"", - needs_cqi?cqi_str:""); + needs_ack?(uci_data.uci_ack?", ack=1":", ack=0"):"", + needs_sr?(uci_data.scheduling_request?", sr=yes":", sr=no"):"", + needs_cqi?cqi_str:""); - // Notify MAC of RL status + // Notify MAC of RL status if (!needs_sr) { if (srslte_pucch_get_last_corr(&enb_ul.pucch) < PUCCH_RL_CORR_TH) { Debug("PUCCH: Radio-Link failure corr=%.1f\n", srslte_pucch_get_last_corr(&enb_ul.pucch)); phy->mac->rl_failure(rnti); } else { phy->mac->rl_ok(rnti); - } - } + } + } } } - } - return 0; + } + return 0; } -int phch_worker::encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks, uint32_t sf_idx) +int phch_worker::encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks) { for (uint32_t i=0;iinfo_hex(ptr, len, - "PDSCH: rnti=0x%x, l_crb=%2d, %s, harq=%d, tbs=%d, mcs=%d, rv=%d, tti_tx=%d\n", - rnti, phy_grant.nof_prb, grant_str, grants[i].grant.harq_process, - phy_grant.mcs[0].tbs/8, phy_grant.mcs[0].idx, grants[i].grant.rv_idx, tti_tx); + "PDSCH: rnti=0x%x, l_crb=%2d, %s, harq=%d, tbs=%d, mcs=%d, rv=%d, tti_tx_dl=%d\n", + rnti, phy_grant.nof_prb, grant_str, grants[i].grant.harq_process, + phy_grant.mcs[0].tbs/8, phy_grant.mcs[0].idx, grants[i].grant.rv_idx, tti_tx_dl); } srslte_softbuffer_tx_t *sb[SRSLTE_MAX_CODEWORDS] = {grants[i].softbuffer, NULL}; @@ -693,7 +698,7 @@ int phch_worker::encode_pdsch(srslte_enb_dl_pdsch_t *grants, uint32_t nof_grants int rv[SRSLTE_MAX_CODEWORDS] = {grants[i].grant.rv_idx, 0}; - if (srslte_enb_dl_put_pdsch(&enb_dl, &phy_grant, sb, rnti, rv, sf_idx, d, SRSLTE_MIMO_TYPE_SINGLE_ANTENNA, 0)) + if (srslte_enb_dl_put_pdsch(&enb_dl, &phy_grant, sb, rnti, rv, sf_tx, d, SRSLTE_MIMO_TYPE_SINGLE_ANTENNA, 0)) { fprintf(stderr, "Error putting PDSCH %d\n",i); return SRSLTE_ERROR; diff --git a/srsenb/src/phy/txrx.cc b/srsenb/src/phy/txrx.cc index 9427e3459..fa14b0b82 100644 --- a/srsenb/src/phy/txrx.cc +++ b/srsenb/src/phy/txrx.cc @@ -115,7 +115,7 @@ void txrx::run_thread() /* Compute TX time: Any transmission happens in TTI+4 thus advance 4 ms the reception time */ srslte_timestamp_copy(&tx_time, &rx_time); - srslte_timestamp_add(&tx_time, 0, 4e-3); + srslte_timestamp_add(&tx_time, 0, HARQ_DELAY_MS*1e-3); Debug("Settting TTI=%d, tx_mutex=%d, tx_time=%d:%f to worker %d\n", tti, tx_mutex_cnt, diff --git a/srsue/hdr/mac/dl_harq.h b/srsue/hdr/mac/dl_harq.h index 278dbc6dc..521018d73 100644 --- a/srsue/hdr/mac/dl_harq.h +++ b/srsue/hdr/mac/dl_harq.h @@ -259,7 +259,7 @@ private: memcpy(&cur_grant, &grant, sizeof(Tgrant)); // If data has not yet been successfully decoded - if (!ack) { + if (!ack || (grant.rv[tid]==0 && grant.phy_grant.dl.mcs[tid].idx < 29)) { // Instruct the PHY To combine the received data and attempt to decode it if (pid == HARQ_BCCH_PID) { @@ -347,9 +347,10 @@ private: // Determine if it's a new transmission 5.3.2.2 bool calc_is_new_transmission(Tgrant grant) { - if ((grant.ndi[tid] != cur_grant.ndi[tid]) || // 1st condition (NDI has changed) - (pid == HARQ_BCCH_PID && grant.rv[tid] == 0) || // 2nd condition (Broadcast and 1st transmission) - is_first_tb) // 3rd condition (first TB) + if (grant.phy_grant.dl.mcs[tid].idx <= 28 && // mcs 29,30,31 always retx regardless of rest + ((grant.ndi[tid] != cur_grant.ndi[tid]) || // 1st condition (NDI has changed) + (pid == HARQ_BCCH_PID && grant.rv[tid] == 0) || // 2nd condition (Broadcast and 1st transmission) + is_first_tb)) { is_first_tb = false; is_new_transmission = true; diff --git a/srsue/hdr/mac/mac.h b/srsue/hdr/mac/mac.h index a306af187..d19f668bf 100644 --- a/srsue/hdr/mac/mac.h +++ b/srsue/hdr/mac/mac.h @@ -109,7 +109,7 @@ private: static const int MAC_MAIN_THREAD_PRIO = 5; static const int MAC_PDU_THREAD_PRIO = 6; - static const int MAC_NOF_HARQ_PROC = 8; + static const int MAC_NOF_HARQ_PROC = 2*HARQ_DELAY_MS; // Interaction with PHY srslte::tti_sync_cv ttisync; diff --git a/srsue/hdr/mac/mux.h b/srsue/hdr/mac/mux.h index 0d04c99cb..ebc61b5c6 100644 --- a/srsue/hdr/mac/mux.h +++ b/srsue/hdr/mac/mux.h @@ -82,8 +82,7 @@ private: const static int MIN_RLC_SDU_LEN = 0; const static int MAX_NOF_SUBHEADERS = 20; - const static int MAX_HARQ_PROC = 8; - + std::vector lch; // Keep track of the PIDs that transmitted BSR reports diff --git a/srsue/hdr/phy/phch_common.h b/srsue/hdr/phy/phch_common.h index aa64fe9ea..39e9e9685 100644 --- a/srsue/hdr/phy/phch_common.h +++ b/srsue/hdr/phy/phch_common.h @@ -138,7 +138,7 @@ namespace srsue { uint32_t I_lowest; uint32_t n_dmrs; } pending_ack_t; - pending_ack_t pending_ack[10]; + pending_ack_t pending_ack[TTIMOD_SZ]; bool is_first_tx; diff --git a/srsue/hdr/phy/phch_recv.h b/srsue/hdr/phy/phch_recv.h index 044960760..01a296094 100644 --- a/srsue/hdr/phy/phch_recv.h +++ b/srsue/hdr/phy/phch_recv.h @@ -53,6 +53,7 @@ public: void set_agc_enable(bool enable); void set_earfcn(std::vector earfcn); + void force_freq(float dl_freq, float ul_freq); void reset_sync(); void cell_search_start(); @@ -157,7 +158,7 @@ private: uint32_t current_earfcn; uint32_t sync_sfn_cnt; - const static uint32_t SYNC_SFN_TIMEOUT = 200; + const static uint32_t SYNC_SFN_TIMEOUT = 1000; float ul_dl_factor; int cur_earfcn_index; bool cell_search_in_progress; @@ -165,12 +166,16 @@ private: float measure_rsrp; srslte_ue_dl_t ue_dl_measure; - const static int RSRP_MEASURE_NOF_FRAMES = 5; + const static int RSRP_MEASURE_NOF_FRAMES = 10; int cell_sync_sfn(); int cell_meas_rsrp(); int cell_search(int force_N_id_2 = -1); bool set_cell(); + + float dl_freq; + float ul_freq; + }; } // namespace srsue diff --git a/srsue/hdr/phy/phch_worker.h b/srsue/hdr/phy/phch_worker.h index 0811723e0..c26966c17 100644 --- a/srsue/hdr/phy/phch_worker.h +++ b/srsue/hdr/phy/phch_worker.h @@ -149,7 +149,12 @@ private: uint32_t I_sr; float cfo; bool rar_cqi_request; - + + // Save last TBS for mcs>28 cases + int last_dl_tbs[2*HARQ_DELAY_MS][SRSLTE_MAX_CODEWORDS]; + int last_ul_tbs[2*HARQ_DELAY_MS]; + srslte_mod_t last_ul_mod[2*HARQ_DELAY_MS]; + // Metrics dl_metrics_t dl_metrics; ul_metrics_t ul_metrics; diff --git a/srsue/hdr/phy/phy.h b/srsue/hdr/phy/phy.h index 0c77360b2..07a2713fa 100644 --- a/srsue/hdr/phy/phy.h +++ b/srsue/hdr/phy/phy.h @@ -76,6 +76,7 @@ public: void write_trace(std::string filename); void set_earfcn(std::vector earfcns); + void force_freq(float dl_freq, float ul_freq); /********** RRC INTERFACE ********************/ void reset(); @@ -167,7 +168,7 @@ private: /* Current time advance */ uint32_t n_ta; - + bool init_(srslte::radio *radio_handler, mac_interface_phy *mac, srslte::log *log_h, bool do_agc, uint32_t nof_workers); void set_default_args(phy_args_t *args); bool check_args(phy_args_t *args); diff --git a/srsue/hdr/ue_base.h b/srsue/hdr/ue_base.h index 411896f70..b547945a9 100644 --- a/srsue/hdr/ue_base.h +++ b/srsue/hdr/ue_base.h @@ -103,6 +103,7 @@ typedef struct { }gui_args_t; typedef struct { + std::string ip_netmask; phy_args_t phy; float metrics_period_secs; bool pregenerate_signals; diff --git a/srsue/hdr/upper/gw.h b/srsue/hdr/upper/gw.h index 800b31624..b97ceb6c5 100644 --- a/srsue/hdr/upper/gw.h +++ b/srsue/hdr/upper/gw.h @@ -57,8 +57,13 @@ public: // NAS interface srslte::error_t setup_if_addr(uint32_t ip_addr, char *err_str); + void set_netmask(std::string netmask); + private: + bool default_netmask; + std::string netmask; + static const int GW_THREAD_PRIO = 7; pdcp_interface_gw *pdcp; diff --git a/srsue/hdr/upper/rrc.h b/srsue/hdr/upper/rrc.h index 3643f76c3..3e2fb70dd 100644 --- a/srsue/hdr/upper/rrc.h +++ b/srsue/hdr/upper/rrc.h @@ -98,6 +98,8 @@ private: uint8_t transaction_id; bool drb_up; + bool reestablishment_in_progress; + // timeouts in ms uint32_t connecting_timeout; @@ -244,7 +246,8 @@ private: // Helpers void rrc_connection_release(); - void radio_link_failure(); + void con_restablish_cell_reselected(); + void radio_link_failure(); static void* start_sib_thread(void *rrc_); void sib_search(); void apply_sib2_configs(LIBLTE_RRC_SYS_INFO_BLOCK_TYPE_2_STRUCT *sib2); diff --git a/srsue/src/mac/demux.cc b/srsue/src/mac/demux.cc index 14c524165..171e7bf45 100644 --- a/srsue/src/mac/demux.cc +++ b/srsue/src/mac/demux.cc @@ -36,7 +36,7 @@ namespace srsue { -demux::demux() : mac_msg(20), pending_mac_msg(20) +demux::demux() : mac_msg(20), pending_mac_msg(20), rlc(NULL) { } diff --git a/srsue/src/mac/mac.cc b/srsue/src/mac/mac.cc index 1a2c909ae..f8d10bb34 100644 --- a/srsue/src/mac/mac.cc +++ b/srsue/src/mac/mac.cc @@ -117,7 +117,8 @@ void mac::reset() Info("Resetting MAC\n"); - timers.stop_all(); + timers.get(timer_alignment)->stop(); + timers.get(contention_resolution_timer)->stop(); ul_harq.reset_ndi(); diff --git a/srsue/src/mac/proc_bsr.cc b/srsue/src/mac/proc_bsr.cc index 898943ab9..43694c1bc 100644 --- a/srsue/src/mac/proc_bsr.cc +++ b/srsue/src/mac/proc_bsr.cc @@ -368,7 +368,7 @@ bool bsr_proc::need_to_reset_sr() { bool bsr_proc::need_to_send_sr(uint32_t tti) { if (!sr_is_sent && triggered_bsr_type == REGULAR) { - if (srslte_tti_interval(tti,next_tx_tti)>0 && srslte_tti_interval(tti,next_tx_tti) < 10240-4) { + if (srslte_tti_interval(tti,next_tx_tti)>0 && srslte_tti_interval(tti,next_tx_tti) < 10240-HARQ_DELAY_MS) { reset_sr = false; sr_is_sent = true; Debug("BSR: Need to send sr: sr_is_sent=true, reset_sr=false, tti=%d, next_tx_tti=%d\n", tti, next_tx_tti); diff --git a/srsue/src/main.cc b/srsue/src/main.cc index 569083998..b933b66f3 100644 --- a/srsue/src/main.cc +++ b/srsue/src/main.cc @@ -65,6 +65,8 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { common.add_options() ("rf.dl_earfcn", bpo::value(&args->rf.dl_earfcn)->default_value(3400), "Downlink EARFCN") ("rf.freq_offset", bpo::value(&args->rf.freq_offset)->default_value(0), "(optional) Frequency offset") + ("rf.dl_freq", bpo::value(&args->rf.dl_freq)->default_value(-1), "Downlink Frequency (if positive overrides EARFCN)") + ("rf.ul_freq", bpo::value(&args->rf.ul_freq)->default_value(-1), "Uplink Frequency (if positive overrides EARFCN)") ("rf.rx_gain", bpo::value(&args->rf.rx_gain)->default_value(-1), "Front-end receiver gain") ("rf.tx_gain", bpo::value(&args->rf.tx_gain)->default_value(-1), "Front-end transmitter gain") ("rf.nof_rx_ant", bpo::value(&args->rf.nof_rx_ant)->default_value(1), "Number of RX antennas") @@ -120,6 +122,10 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { /* Expert section */ + ("expert.ip_netmask", + bpo::value(&args->expert.ip_netmask)->default_value("255.255.255.0"), + "Netmask of the tun_srsue device") + ("expert.phy.worker_cpu_mask", bpo::value(&args->expert.phy.worker_cpu_mask)->default_value(-1), "cpu bit mask (eg 255 = 1111 1111)") @@ -196,6 +202,11 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { bpo::value(&args->expert.phy.cfo_correct_tol_hz)->default_value(50.0), "Tolerance (in Hz) for digial CFO compensation.") + ("expert.cfo_ema", + bpo::value(&args->expert.phy.cfo_ema)->default_value(0.4), + "CFO Exponential Moving Average coefficient. Lower makes it more robust to noise " + "but vulnerable to periodic interruptions due to VCO corrections.") + ("expert.time_correct_period", bpo::value(&args->expert.phy.time_correct_period)->default_value(5), "Period for sampling time offset correction.") diff --git a/srsue/src/phy/phch_common.cc b/srsue/src/phy/phch_common.cc index d49b1ced2..d956ddd15 100644 --- a/srsue/src/phy/phch_common.cc +++ b/srsue/src/phy/phch_common.cc @@ -136,12 +136,14 @@ srslte::radio* phch_common::get_radio() void phch_common::set_rar_grant(uint32_t tti, uint8_t grant_payload[SRSLTE_RAR_GRANT_LEN]) { srslte_dci_rar_grant_unpack(&rar_grant, grant_payload); - rar_grant_pending = true; - // PUSCH is at n+6 or n+7 and phch_worker assumes default delay of 4 ttis + rar_grant_pending = true; + if (MSG3_DELAY_MS < 0) { + fprintf(stderr, "Error MSG3_DELAY_MS can't be negative\n"); + } if (rar_grant.ul_delay) { - rar_grant_tti = (tti + 3) % 10240; + rar_grant_tti = (tti + MSG3_DELAY_MS + 1) % 10240; } else { - rar_grant_tti = (tti + 2) % 10240; + rar_grant_tti = (tti + MSG3_DELAY_MS) % 10240; } } @@ -195,13 +197,13 @@ void phch_common::set_dl_rnti(srslte_rnti_type_t type, uint16_t rnti_value, int } void phch_common::reset_pending_ack(uint32_t tti) { - pending_ack[tti%10].enabled = false; + pending_ack[TTIMOD(tti)].enabled = false; } void phch_common::set_pending_ack(uint32_t tti, uint32_t I_lowest, uint32_t n_dmrs) { - pending_ack[tti%10].enabled = true; - pending_ack[tti%10].I_lowest = I_lowest; - pending_ack[tti%10].n_dmrs = n_dmrs; + pending_ack[TTIMOD(tti)].enabled = true; + pending_ack[TTIMOD(tti)].I_lowest = I_lowest; + pending_ack[TTIMOD(tti)].n_dmrs = n_dmrs; Debug("Set pending ACK for tti=%d I_lowest=%d, n_dmrs=%d\n", tti, I_lowest, n_dmrs); } @@ -211,12 +213,12 @@ bool phch_common::get_pending_ack(uint32_t tti) { bool phch_common::get_pending_ack(uint32_t tti, uint32_t *I_lowest, uint32_t *n_dmrs) { if (I_lowest) { - *I_lowest = pending_ack[tti%10].I_lowest; + *I_lowest = pending_ack[TTIMOD(tti)].I_lowest; } if (n_dmrs) { - *n_dmrs = pending_ack[tti%10].n_dmrs; + *n_dmrs = pending_ack[TTIMOD(tti)].n_dmrs; } - return pending_ack[tti%10].enabled; + return pending_ack[TTIMOD(tti)].enabled; } /* The transmisison of UL subframes must be in sequence. Each worker uses this function to indicate @@ -334,6 +336,7 @@ void phch_common::reset_ul() pthread_mutex_trylock(&tx_mutex[i]); pthread_mutex_unlock(&tx_mutex[i]); } + radio_h->tx_end(); } } diff --git a/srsue/src/phy/phch_recv.cc b/srsue/src/phy/phch_recv.cc index c14141257..8a69a7f76 100644 --- a/srsue/src/phy/phch_recv.cc +++ b/srsue/src/phy/phch_recv.cc @@ -63,6 +63,8 @@ double callback_set_rx_gain(void *h, double gain) { phch_recv::phch_recv() { + dl_freq = -1; + ul_freq = -1; bzero(&cell, sizeof(srslte_cell_t)); running = false; } @@ -101,13 +103,13 @@ void phch_recv:: init(srslte::radio_multi *_radio_handler, mac_interface_phy *_ if (do_agc) { srslte_ue_sync_start_agc(&cs.ue_sync, callback_set_rx_gain, last_gain); } - - if (srslte_ue_dl_init(&ue_dl_measure, SRSLTE_MAX_PRB, nof_rx_antennas)) { + + if (srslte_ue_dl_init(&ue_dl_measure, sf_buffer, SRSLTE_MAX_PRB, nof_rx_antennas)) { Error("SYNC: Initiating ue_dl_measure\n"); return; } - if (srslte_ue_mib_init(&ue_mib, SRSLTE_MAX_PRB)) { + if (srslte_ue_mib_init(&ue_mib, sf_buffer, SRSLTE_MAX_PRB)) { Error("SYNC: Initiating UE MIB decoder\n"); return; } @@ -207,6 +209,7 @@ void phch_recv::set_ue_sync_opts(srslte_ue_sync_t *q) { srslte_ue_sync_cfo_i_detec_en(q, true); } + srslte_ue_sync_set_cfo_ema(q, worker_com->args->cfo_ema); srslte_ue_sync_set_cfo_tol(q, worker_com->args->cfo_correct_tol_hz); int time_correct_period = worker_com->args->time_correct_period; @@ -374,7 +377,7 @@ int phch_recv::cell_sync_sfn(void) { int sfn_offset = 0; Info("SYNC: Trying to decode MIB... SNR=%.1f dB\n", 10*log10(srslte_chest_dl_get_snr(&ue_mib.chest))); - int n = srslte_ue_mib_decode(&ue_mib, sf_buffer[0], bch_payload, NULL, &sfn_offset); + int n = srslte_ue_mib_decode(&ue_mib, bch_payload, NULL, &sfn_offset); if (n < 0) { Error("SYNC: Error decoding MIB while synchronising SFN"); return -1; @@ -445,6 +448,11 @@ void phch_recv::set_earfcn(std::vector earfcn) { this->earfcn = earfcn; } +void phch_recv::force_freq(float dl_freq, float ul_freq) { + this->dl_freq = dl_freq; + this->ul_freq = ul_freq; +} + bool phch_recv::stop_sync() { wait_radio_reset(); @@ -568,17 +576,25 @@ bool phch_recv::cell_select(uint32_t earfcn, srslte_cell_t cell) { bool phch_recv::set_frequency() { - double dl_freq = 1e6*srslte_band_fd(current_earfcn); - double ul_freq = 1e6*srslte_band_fu(srslte_band_ul_earfcn(current_earfcn)); - if (dl_freq > 0 && ul_freq > 0) { + double set_dl_freq = 0; + double set_ul_freq = 0; + + if (this->dl_freq > 0 && this->ul_freq > 0) { + set_dl_freq = this->dl_freq; + set_ul_freq = this->ul_freq; + } else { + set_dl_freq = 1e6*srslte_band_fd(current_earfcn); + set_ul_freq = 1e6*srslte_band_fu(srslte_band_ul_earfcn(current_earfcn)); + } + if (set_dl_freq > 0 && set_ul_freq > 0) { log_h->info("SYNC: Set DL EARFCN=%d, f_dl=%.1f MHz, f_ul=%.1f MHz\n", - current_earfcn, dl_freq / 1e6, ul_freq / 1e6); + current_earfcn, set_dl_freq / 1e6, set_ul_freq / 1e6); log_h->console("Searching cell in DL EARFCN=%d, f_dl=%.1f MHz, f_ul=%.1f MHz\n", - current_earfcn, dl_freq / 1e6, ul_freq / 1e6); + current_earfcn, set_dl_freq / 1e6, set_ul_freq / 1e6); - radio_h->set_rx_freq(dl_freq); - radio_h->set_tx_freq(ul_freq); + radio_h->set_rx_freq(set_dl_freq); + radio_h->set_tx_freq(set_ul_freq); ul_dl_factor = radio_h->get_tx_freq()/radio_h->get_rx_freq(); srslte_ue_sync_reset(&ue_sync); @@ -717,11 +733,11 @@ void phch_recv::run_thread() { worker->set_sample_offset(srslte_ue_sync_get_sfo(&ue_sync)/1000); - /* Compute TX time: Any transmission happens in TTI4 thus advance 4 ms the reception time */ + /* Compute TX time: Any transmission happens in TTI+4 thus advance 4 ms the reception time */ srslte_timestamp_t rx_time, tx_time, tx_time_prach; srslte_ue_sync_get_last_timestamp(&ue_sync, &rx_time); srslte_timestamp_copy(&tx_time, &rx_time); - srslte_timestamp_add(&tx_time, 0, 4e-3 - time_adv_sec); + srslte_timestamp_add(&tx_time, 0, HARQ_DELAY_MS*1e-3 - time_adv_sec); worker->set_tx_time(tx_time, next_offset); next_offset = 0; diff --git a/srsue/src/phy/phch_worker.cc b/srsue/src/phy/phch_worker.cc index c0fec2ba2..33795a69f 100644 --- a/srsue/src/phy/phch_worker.cc +++ b/srsue/src/phy/phch_worker.cc @@ -65,8 +65,7 @@ phch_worker::phch_worker() : tr_exec(10240) cell_initiated = false; pregen_enabled = false; trace_enabled = false; - - reset(); + reset(); } @@ -97,7 +96,7 @@ void phch_worker::reset() bzero(&period_cqi, sizeof(srslte_cqi_periodic_cfg_t)); I_sr = 0; rnti_is_set = false; - rar_cqi_request = false; + rar_cqi_request = false; cfi = 0; } @@ -118,12 +117,12 @@ bool phch_worker::init(uint32_t max_prb, srslte::log *log_h) } } - if (srslte_ue_dl_init(&ue_dl, max_prb, phy->args->nof_rx_ant)) { + if (srslte_ue_dl_init(&ue_dl, signal_buffer, max_prb, phy->args->nof_rx_ant)) { Error("Initiating UE DL\n"); return false; } - if (srslte_ue_ul_init(&ue_ul, max_prb)) { + if (srslte_ue_ul_init(&ue_ul, signal_buffer[0], max_prb)) { Error("Initiating UE UL\n"); return false; } @@ -292,6 +291,13 @@ void phch_worker::work_imp() } } } + + // Process RAR before UL to enable zero-delay Msg3 + bool rar_delivered = false; + if (HARQ_DELAY_MS == MSG3_DELAY_MS && dl_mac_grant.rnti_type == SRSLTE_RNTI_RAR) { + rar_delivered = true; + phy->mac->tb_decoded(dl_ack[0], 0, dl_mac_grant.rnti_type, dl_mac_grant.pid); + } // Decode PHICH bool ul_ack = false; @@ -313,8 +319,8 @@ void phch_worker::work_imp() set_uci_periodic_cqi(); } - /* TTI offset for UL is always 4 for LTE */ - ul_action.tti_offset = 4; + /* TTI offset for UL */ + ul_action.tti_offset = HARQ_DELAY_MS; /* Send UL grant or HARQ information (from PHICH) to MAC */ if (ul_grant_available && ul_ack_available) { @@ -335,7 +341,7 @@ void phch_worker::work_imp() &ul_action.softbuffers[0], ul_action.rv[0], ul_action.rnti, ul_mac_grant.is_from_rar); signal_ready = true; if (ul_action.expect_ack) { - phy->set_pending_ack(tti + 8, ue_ul.pusch_cfg.grant.n_prb_tilde[0], ul_action.phy_grant.ul.ncs_dmrs); + phy->set_pending_ack(TTI_RX_ACK(tti), ue_ul.pusch_cfg.grant.n_prb_tilde[0], ul_action.phy_grant.ul.ncs_dmrs); } } else if (dl_action.generate_ack || uci_data.scheduling_request || uci_data.uci_cqi_len > 0) { @@ -357,7 +363,7 @@ void phch_worker::work_imp() if (!dl_action.generate_ack_callback) { if (dl_mac_grant.rnti_type == SRSLTE_RNTI_PCH && dl_action.decode_enabled[0]) { phy->mac->pch_decoded_ok(dl_mac_grant.n_bytes[0]); - } else { + } else if (!rar_delivered) { for (uint32_t tb = 0; tb < SRSLTE_MAX_TB; tb++) { if (dl_action.decode_enabled[tb]) { phy->mac->tb_decoded(dl_ack[tb], tb, dl_mac_grant.rnti_type, dl_mac_grant.pid); @@ -472,10 +478,20 @@ bool phch_worker::decode_pdcch_dl(srsue::mac_interface_phy::mac_grant_t* grant) return false; } + grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:(tti%(2*HARQ_DELAY_MS)); + + // Set last TBS for this TB (pid) in case of mcs>29 (7.1.7.2 of 36.213) + for (int i=0;iphy_grant.dl.mcs[i].tbs < 0) { + grant->phy_grant.dl.mcs[i].tbs = last_dl_tbs[grant->pid%(2*HARQ_DELAY_MS)][i]; + } + // save it + last_dl_tbs[grant->pid%(2*HARQ_DELAY_MS)][i] = grant->phy_grant.dl.mcs[i].tbs; + } + /* Fill MAC grant structure */ grant->ndi[0] = dci_unpacked.ndi; grant->ndi[1] = dci_unpacked.ndi_1; - grant->pid = dci_unpacked.harq_process; grant->n_bytes[0] = grant->phy_grant.dl.mcs[0].tbs / (uint32_t) 8; grant->n_bytes[1] = grant->phy_grant.dl.mcs[1].tbs / (uint32_t) 8; grant->tti = tti; @@ -663,7 +679,7 @@ bool phch_worker::decode_pdcch_ul(mac_interface_phy::mac_grant_t* grant) char timestr[64]; timestr[0]='\0'; - phy->reset_pending_ack(tti + 8); + phy->reset_pending_ack(TTI_RX_ACK(tti)); srslte_dci_msg_t dci_msg; srslte_ra_ul_dci_t dci_unpacked; @@ -712,14 +728,29 @@ bool phch_worker::decode_pdcch_ul(mac_interface_phy::mac_grant_t* grant) ue_dl.last_location_ul.ncce, (1<phy_grant.ul.mcs.tbs==0) { - srslte_vec_fprint_hex(stdout, dci_msg.data, dci_msg.nof_bits); + Info("Received PUSCH grant with empty data\n"); } } } - + + if (ret) { + + // Use last TBS for this TB in case of mcs>28 + if (grant->phy_grant.ul.mcs.tbs < 0) { + grant->phy_grant.ul.mcs.tbs = last_ul_tbs[tti%(2*HARQ_DELAY_MS)]; + } + last_ul_tbs[tti%(2*HARQ_DELAY_MS)] = grant->phy_grant.ul.mcs.tbs; + + if (grant->phy_grant.ul.mcs.mod == SRSLTE_MOD_LAST) { + grant->phy_grant.ul.mcs.mod = last_ul_mod[tti%(2*HARQ_DELAY_MS)]; + grant->phy_grant.ul.Qm = srslte_mod_bits_x_symbol(grant->phy_grant.ul.mcs.mod); + } + last_ul_mod[tti%(2*HARQ_DELAY_MS)] = grant->phy_grant.ul.mcs.mod; + } + /* Limit UL modulation if not supported by the UE or disabled by higher layers */ if (!phy->config->enable_64qam) { - if (grant->phy_grant.ul.mcs.mod == SRSLTE_MOD_64QAM) { + if (grant->phy_grant.ul.mcs.mod >= SRSLTE_MOD_64QAM) { grant->phy_grant.ul.mcs.mod = SRSLTE_MOD_16QAM; grant->phy_grant.ul.Qm = 4; } @@ -776,7 +807,7 @@ void phch_worker::set_uci_sr() { uci_data.scheduling_request = false; if (phy->sr_enabled) { - uint32_t sr_tx_tti = (tti+4)%10240; + uint32_t sr_tx_tti = TTI_TX(tti); // Get I_sr parameter if (srslte_ue_ul_sr_send_tti(I_sr, sr_tx_tti)) { Info("PUCCH: SR transmission at TTI=%d, I_sr=%d\n", sr_tx_tti, I_sr); @@ -793,7 +824,7 @@ void phch_worker::set_uci_periodic_cqi() int cqi_max = phy->args->cqi_max; if (period_cqi.configured && rnti_is_set) { - if (period_cqi.ri_idx_present && srslte_ri_send(period_cqi.pmi_idx, period_cqi.ri_idx, (tti+4)%10240)) { + if (period_cqi.ri_idx_present && srslte_ri_send(period_cqi.pmi_idx, period_cqi.ri_idx, TTI_TX(tti))) { if (uci_data.uci_ri_len) { uci_data.uci_cqi[0] = uci_data.uci_ri; uci_data.uci_cqi_len = uci_data.uci_ri_len; @@ -802,7 +833,7 @@ void phch_worker::set_uci_periodic_cqi() uci_data.uci_pmi_len = 0; Info("PUCCH: Periodic RI=%d\n", uci_data.uci_cqi[0]); } - } else if (srslte_cqi_send(period_cqi.pmi_idx, (tti+4)%10240)) { + } else if (srslte_cqi_send(period_cqi.pmi_idx, TTI_TX(tti))) { srslte_cqi_value_t cqi_report; if (period_cqi.format_is_subband) { // TODO: Implement subband periodic reports @@ -868,8 +899,8 @@ void phch_worker::set_uci_aperiodic_cqi() bool phch_worker::srs_is_ready_to_send() { if (srs_cfg.configured) { - if (srslte_refsignal_srs_send_cs(srs_cfg.subframe_config, (tti+4)%10) == 1 && - srslte_refsignal_srs_send_ue(srs_cfg.I_srs, (tti+4)%10240) == 1) + if (srslte_refsignal_srs_send_cs(srs_cfg.subframe_config, TTI_TX(tti)%10) == 1 && + srslte_refsignal_srs_send_ue(srs_cfg.I_srs, TTI_TX(tti)) == 1) { return true; } @@ -889,10 +920,10 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui char timestr[64]; timestr[0]='\0'; - if (srslte_ue_ul_cfg_grant(&ue_ul, grant, (tti+4)%10240, rv, current_tx_nb)) { + if (srslte_ue_ul_cfg_grant(&ue_ul, grant, TTI_TX(tti), rv, current_tx_nb)) { Error("Configuring UL grant\n"); } - + if (srslte_ue_ul_pusch_encode_rnti_softbuffer(&ue_ul, payload, uci_data, softbuffer, @@ -919,12 +950,12 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui #endif Info("PUSCH: tti_tx=%d, alloc=(%d,%d), tbs=%d, mcs=%d, rv=%d, ack=%s, ri=%s, cfo=%.1f KHz%s\n", - (tti+4)%10240, - grant->n_prb[0], grant->n_prb[0]+grant->L_prb, - grant->mcs.tbs/8, grant->mcs.idx, rv, - uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", - uci_data.uci_ri_len>0?(uci_data.uci_ri?"1":"0"):"no", - cfo*15, timestr); + (tti+HARQ_DELAY_MS)%10240, + grant->n_prb[0], grant->n_prb[0]+grant->L_prb, + grant->mcs.tbs/8, grant->mcs.idx, rv, + uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", + uci_data.uci_ri_len>0?(uci_data.uci_ri?"1":"0"):"no", + cfo*15, timestr); // Store metrics ul_metrics.mcs = grant->mcs.idx; @@ -950,7 +981,7 @@ void phch_worker::encode_pucch() gettimeofday(&t[1], NULL); #endif - if (srslte_ue_ul_pucch_encode(&ue_ul, uci_data, last_dl_pdcch_ncce, (tti+4)%10240, signal_buffer[0])) { + if (srslte_ue_ul_pucch_encode(&ue_ul, uci_data, last_dl_pdcch_ncce, TTI_TX(tti), signal_buffer[0])) { Error("Encoding PUCCH\n"); } @@ -975,7 +1006,7 @@ void phch_worker::encode_pucch() uci_data.uci_pmi_len>0?(uci_data.uci_pmi[0]?"1":"0"):"", uci_data.scheduling_request?"yes":"no", cfo*15, timestr); - } + } if (uci_data.scheduling_request) { phy->sr_enabled = false; @@ -987,7 +1018,7 @@ void phch_worker::encode_srs() char timestr[64]; timestr[0]='\0'; - if (srslte_ue_ul_srs_encode(&ue_ul, (tti+4)%10240, signal_buffer[0])) + if (srslte_ue_ul_srs_encode(&ue_ul, TTI_TX(tti), signal_buffer[0])) { Error("Encoding SRS\n"); } @@ -1002,7 +1033,7 @@ void phch_worker::encode_srs() float gain = set_power(tx_power); uint32_t fi = srslte_vec_max_fi((float*) signal_buffer, SRSLTE_SF_LEN_PRB(cell.nof_prb)); float *f = (float*) signal_buffer; - Info("SRS: power=%.2f dBm, tti_tx=%d%s\n", tx_power, (tti+4)%10240, timestr); + Info("SRS: power=%.2f dBm, tti_tx=%d%s\n", tx_power, TTI_TX(tti), timestr); } diff --git a/srsue/src/phy/phy.cc b/srsue/src/phy/phy.cc index e74107661..9760ab374 100644 --- a/srsue/src/phy/phy.cc +++ b/srsue/src/phy/phy.cc @@ -200,8 +200,8 @@ void phy::set_timeadv_rar(uint32_t ta_cmd) { void phy::set_timeadv(uint32_t ta_cmd) { n_ta = srslte_N_ta_new(n_ta, ta_cmd); - //sf_recv.set_time_adv_sec(((float) n_ta)*SRSLTE_LTE_TS); - Warning("Not supported: Set TA: ta_cmd: %d, n_ta: %d, ta_usec: %.1f\n", ta_cmd, n_ta, ((float) n_ta)*SRSLTE_LTE_TS*1e6); + sf_recv.set_time_adv_sec(((float) n_ta)*SRSLTE_LTE_TS); + //Warning("Not supported: Set TA: ta_cmd: %d, n_ta: %d, ta_usec: %.1f\n", ta_cmd, n_ta, ((float) n_ta)*SRSLTE_LTE_TS*1e6); } void phy::configure_prach_params() @@ -308,7 +308,8 @@ void phy::reset() pdcch_dl_search_reset(); for(uint32_t i=0;i earfcns) sf_recv.set_earfcn(earfcns); } +void phy::force_freq(float dl_freq, float ul_freq) +{ + sf_recv.force_freq(dl_freq, ul_freq); +} + bool phy::sync_status() { return sf_recv.status_is_sync(); diff --git a/srsue/src/ue.cc b/srsue/src/ue.cc index 92adaf8dd..a99281e6f 100644 --- a/srsue/src/ue.cc +++ b/srsue/src/ue.cc @@ -185,6 +185,8 @@ bool ue::init(all_args_t *args_) nas.init(&usim, &rrc, &gw, &nas_log, 1 /* RB_ID_SRB1 */); gw.init(&pdcp, &nas, &gw_log, 3 /* RB_ID_DRB1 */); + gw.set_netmask(args->expert.ip_netmask); + rrc.init(&phy, &mac, &rlc, &pdcp, &nas, &usim, &mac, &rrc_log); rrc.set_ue_category(atoi(args->expert.ue_cateogry.c_str())); @@ -193,6 +195,10 @@ bool ue::init(all_args_t *args_) earfcn_list.push_back(args->rf.dl_earfcn); phy.set_earfcn(earfcn_list); + if (args->rf.dl_freq > 0 && args->rf.ul_freq > 0) { + phy.force_freq(args->rf.dl_freq, args->rf.ul_freq); + } + printf("Waiting PHY to initialize...\n"); phy.wait_initialize(); phy.configure_ul_params(); diff --git a/srsue/src/upper/gw.cc b/srsue/src/upper/gw.cc index 07ac36989..1e3e81999 100644 --- a/srsue/src/upper/gw.cc +++ b/srsue/src/upper/gw.cc @@ -44,6 +44,7 @@ gw::gw() :if_up(false) { current_ip_addr = 0; + default_netmask = true; } void gw::init(pdcp_interface_gw *pdcp_, nas_interface_gw *nas_, srslte::log *gw_log_, uint32_t lcid_) @@ -104,6 +105,12 @@ void gw::get_metrics(gw_metrics_t &m) ul_tput_bytes = 0; } +void gw::set_netmask(std::string netmask) { + default_netmask = false; + this->netmask = netmask; +} + + /******************************************************************************* PDCP interface *******************************************************************************/ @@ -152,7 +159,11 @@ srslte::error_t gw::setup_if_addr(uint32_t ip_addr, char *err_str) return(srslte::ERROR_CANT_START); } ifr.ifr_netmask.sa_family = AF_INET; - ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr = inet_addr("255.255.255.0"); + const char *mask = "255.255.255.0"; + if (!default_netmask) { + mask = netmask.c_str(); + } + ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr = inet_addr(mask); if(0 > ioctl(sock, SIOCSIFNETMASK, &ifr)) { err_str = strerror(errno); diff --git a/srsue/src/upper/nas.cc b/srsue/src/upper/nas.cc index f0fd8cf54..110c8c78e 100644 --- a/srsue/src/upper/nas.cc +++ b/srsue/src/upper/nas.cc @@ -281,7 +281,6 @@ void nas::parse_attach_accept(uint32_t lcid, byte_buffer_t *pdu) { LIBLTE_MME_ACTIVATE_DEFAULT_EPS_BEARER_CONTEXT_ACCEPT_MSG_STRUCT act_def_eps_bearer_context_accept; nas_log->info("Received Attach Accept\n"); - count_dl++; liblte_mme_unpack_attach_accept_msg((LIBLTE_BYTE_MSG_STRUCT *) pdu, &attach_accept); @@ -359,6 +358,8 @@ void nas::parse_attach_accept(uint32_t lcid, byte_buffer_t *pdu) { state = EMM_STATE_REGISTERED; current_plmn = selecting_plmn; + count_dl++; + // Send EPS bearer context accept and attach complete count_ul++; act_def_eps_bearer_context_accept.eps_bearer_id = eps_bearer_id; @@ -437,6 +438,9 @@ void nas::parse_authentication_request(uint32_t lcid, byte_buffer_t *pdu) { nas_log->console("Warning: Network authentication failure\n"); pool->deallocate(pdu); } + + // Reset DL counter (as per 24.301 5.4.3.2) + count_dl = 0; } void nas::parse_authentication_reject(uint32_t lcid, byte_buffer_t *pdu) { @@ -539,6 +543,8 @@ void nas::parse_security_mode_command(uint32_t lcid, byte_buffer_t *pdu) { } } + count_dl++; + if (!success) { // Reuse pdu for response pdu->reset(); diff --git a/srsue/src/upper/rrc.cc b/srsue/src/upper/rrc.cc index 94c2e449a..10373dcf2 100644 --- a/srsue/src/upper/rrc.cc +++ b/srsue/src/upper/rrc.cc @@ -35,8 +35,6 @@ #include "srslte/common/security.h" #include "srslte/common/bcd_helpers.h" -#define TIMEOUT_RESYNC_REESTABLISH 100 - using namespace srslte; namespace srsue { @@ -92,6 +90,8 @@ void rrc::init(phy_interface_rrc *phy_, pthread_mutex_init(&mutex, NULL); + reestablishment_in_progress = false; + ue_category = SRSLTE_UE_CATEGORY; t301 = mac_timers->timer_get_unique_id(); t310 = mac_timers->timer_get_unique_id(); @@ -207,7 +207,11 @@ void rrc::run_thread() { break; case RRC_STATE_CELL_SELECTED: rrc_log->info("RRC Cell Selected: Sending connection request...\n"); - send_con_request(); + if (reestablishment_in_progress) { + con_restablish_cell_reselected(); + } else { + send_con_request(); + } state = RRC_STATE_CONNECTING; connecting_timeout = 0; break; @@ -226,6 +230,7 @@ void rrc::run_thread() { usleep(60000); rrc_log->info("Leaving RRC_CONNECTED state\n"); drb_up = false; + reestablishment_in_progress = false; pdcp->reset(); rlc->reset(); phy->reset(); @@ -484,12 +489,12 @@ void rrc::earfcn_end() { // Detection of physical layer problems (5.3.11.1) void rrc::out_of_sync() { - current_cell->in_sync = false; + current_cell->in_sync = false; if (!mac_timers->timer_get(t311)->is_running() && !mac_timers->timer_get(t310)->is_running()) { n310_cnt++; if (n310_cnt == N310) { // attempt resync - phy->sync_reset(); + //phy->sync_reset(); mac_timers->timer_get(t310)->reset(); mac_timers->timer_get(t310)->run(); @@ -663,6 +668,8 @@ void rrc::send_con_restablish_request() { ul_ccch_msg.msg.rrc_con_reest_req.cause = LIBLTE_RRC_CON_REEST_REQ_CAUSE_OTHER_FAILURE; liblte_rrc_pack_ul_ccch_msg(&ul_ccch_msg, (LIBLTE_BIT_MSG_STRUCT *) &bit_buf); + reestablishment_in_progress = true; + rrc_log->info("Initiating RRC Connection Reestablishment Procedure\n"); rrc_log->console("RRC Connection Reestablishment\n"); mac_timers->timer_get(t310)->stop(); @@ -673,19 +680,16 @@ void rrc::send_con_restablish_request() { set_phy_default(); mac->reset(); set_mac_default(); +} - // FIXME: Cell selection should be different?? - - // Wait for cell re-synchronization - uint32_t timeout_cnt = 0; - while (!phy->sync_status() && timeout_cnt < TIMEOUT_RESYNC_REESTABLISH) { - usleep(10000); - timeout_cnt++; - } +// Actions following cell reselection 5.3.7.3 +void rrc::con_restablish_cell_reselected() +{ + reestablishment_in_progress = false; + rrc_log->info("Cell Selection finished. Initiating transmission of RRC Connection Reestablishment Request\n"); mac_timers->timer_get(t301)->reset(); mac_timers->timer_get(t301)->run(); mac_timers->timer_get(t311)->stop(); - rrc_log->info("Cell Selection finished. Initiating transmission of RRC Connection Reestablishment Request\n"); // Byte align and pack the message bits for PDCP if ((bit_buf.N_bits % 8) != 0) { diff --git a/srsue/ue.conf.example b/srsue/ue.conf.example index 30c05069a..5b3559ae9 100644 --- a/srsue/ue.conf.example +++ b/srsue/ue.conf.example @@ -9,6 +9,8 @@ # rx_gain: Optional receive gain (dB). If disabled, AGC if enabled # # Optional parameters: +# dl_freq: Override DL frequency corresponding to dl_earfcn +# ul_freq: Override UL frequency corresponding to dl_earfcn # nof_rx_ant: Number of RX antennas (Default 1, supported 1 or 2) # device_name: Device driver family. Supported options: "auto" (uses first found), "UHD" or "bladeRF" # device_args: Arguments for the device driver. Options are "auto" or any string. @@ -96,7 +98,11 @@ enable = false ##################################################################### # Expert configuration options # -# ue_category: Sets UE category (range 1-5). Default: 4 +# ue_category: Sets UE category (range 1-5). Default: 4 +# ip_netmask: Netmask of the tun_srsue device. Default: 255.255.255.0 +# rssi_sensor_enabled: Enable or disable RF frontend RSSI sensor. Required for RSRP metrics but +# can cause UHD instability for long-duration testing. Default true. +# ue_category: Sets UE category (range 1-5). Default: 4 # # prach_gain: PRACH gain (dB). If defined, forces a gain for the tranmsission of PRACH only., # Default is to use tx_gain in [rf] section. @@ -113,7 +119,9 @@ enable = false # nof_phy_threads: Selects the number of PHY threads (maximum 4, minimum 1, default 2) # equalizer_mode: Selects equalizer mode. Valid modes are: "mmse", "zf" or any # non-negative real number to indicate a regularized zf coefficient. -# Default is MMSE. +# Default is MMSE. +# cfo_ema: CFO Exponential Moving Average coefficient. Lower makes it more robust to noise +# but vulnerable to periodic interruptions due to VCO corrections. # cfo_integer_enabled: Enables integer CFO estimation and correction. This needs improvement # and may lead to incorrect synchronization. Use with caution. # cfo_correct_tol_hz: Tolerance (in Hz) for digial CFO compensation. Lower tolerance means that @@ -136,6 +144,8 @@ enable = false # ##################################################################### [expert] +#ip_netmask = 255.255.255.0 +#rssi_sensor_enabled = false #ue_category = 4 #prach_gain = 30 #cqi_max = 15 @@ -146,6 +156,7 @@ enable = false #attach_enable_64qam = false #nof_phy_threads = 2 #equalizer_mode = mmse +#cfo_ema = 0.4 #cfo_integer_enabled = false #cfo_correct_tol_hz = 50 #time_correct_period = 5