diff --git a/CMakeLists.txt b/CMakeLists.txt index d72bb5fef..efaa1973a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "") option(ENABLE_SRSUE "Build srsUE application" ON) option(ENABLE_SRSENB "Build srsENB application" ON) -option(ENABLE_VOLK "Enable use of VOLK SIMD library" ON) +option(ENABLE_VOLK "Enable use of VOLK SIMD library" OFF) option(ENABLE_GUI "Enable GUI (using srsGUI)" ON) option(ENABLE_BLADERF "Enable BladeRF" ON) @@ -282,6 +282,11 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") endif(HAVE_AVX) endif (HAVE_AVX2) + if (HAVE_AVX512) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") + endif(HAVE_AVX512) + if(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") if(HAVE_SSE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Ofast -funroll-loops") diff --git a/cmake/modules/FindSSE.cmake b/cmake/modules/FindSSE.cmake index 3440f01c1..4c9673a9d 100644 --- a/cmake/modules/FindSSE.cmake +++ b/cmake/modules/FindSSE.cmake @@ -4,10 +4,11 @@ include(CheckCSourceRuns) -option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) -option(ENABLE_AVX "Enable compile-time AVX support." ON) -option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) -option(ENABLE_FMA "Enable compile-time FMA support." ON) +option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) +option(ENABLE_AVX "Enable compile-time AVX support." ON) +option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) +option(ENABLE_FMA "Enable compile-time FMA support." ON) +option(ENABLE_AVX512 "Enable compile-time AVX512 support." ON) if (ENABLE_SSE) # @@ -135,6 +136,41 @@ if (ENABLE_SSE) endif() endif() + if (ENABLE_AVX512) + + # + # Check compiler for AVX intrinsics + # + if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG ) + set(CMAKE_REQUIRED_FLAGS "-mavx512f") + check_c_source_runs(" + #include + int main() + { + __m512i a, b, c; + const int src[16] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 , 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF}; + int dst[16]; + a = _mm512_loadu_si512( (__m512i*)src ); + b = _mm512_loadu_si512( (__m512i*)src ); + c = _mm512_add_epi32( a, b ); + _mm512_storeu_si512( (__m512i*)dst, c ); + int i = 0; + for( i = 0; i < 16; i++ ){ + if( ( src[i] + src[i] ) != dst[i] ){ + return -1; + } + } + return 0; + }" + HAVE_AVX512) + endif() + + if (HAVE_AVX512) + message(STATUS "AVX512 is enabled - target CPU must support it") + endif() + endif() + + endif() -mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2, HAVE_FMA) +mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2, HAVE_FMA, HAVE_AVX512) diff --git a/cmake/modules/FindVolk.cmake b/cmake/modules/FindVolk.cmake deleted file mode 100644 index 96262a3b8..000000000 --- a/cmake/modules/FindVolk.cmake +++ /dev/null @@ -1,161 +0,0 @@ -INCLUDE(FindPkgConfig) -PKG_CHECK_MODULES(PC_VOLK volk QUIET) - -FIND_PATH( - VOLK_INCLUDE_DIRS - NAMES volk/volk.h - HINTS $ENV{VOLK_DIR}/include - ${CMAKE_INSTALL_PREFIX}/include - ${PC_VOLK_INCLUDE_DIR} - PATHS /usr/local/include - /usr/include -) - -FIND_LIBRARY( - VOLK_LIBRARIES - NAMES volk - HINTS $ENV{VOLK_DIR}/lib - ${CMAKE_INSTALL_PREFIX}/lib - ${CMAKE_INSTALL_PREFIX}/lib64 - ${PC_VOLK_LIBDIR} - PATHS /usr/local/lib - /usr/local/lib64 - /usr/lib - /usr/lib64 -) - -INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS) -MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS VOLK_DEFINITIONS) - -IF(VOLK_FOUND) - SET(CMAKE_REQUIRED_LIBRARIES ${VOLK_LIBRARIES} m) - CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_16u HAVE_VOLK_MAX_FUNCTION_16) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_32u HAVE_VOLK_MAX_FUNCTION_32) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_max_32f HAVE_VOLK_MAX_VEC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_accumulator_s32f HAVE_VOLK_ACC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32fc_multiply_32fc HAVE_VOLK_MULT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_conjugate_32fc HAVE_VOLK_CONJ_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_32fc HAVE_VOLK_MULT2_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_conjugate_32fc HAVE_VOLK_MULT2_CONJ_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_multiply_32fc HAVE_VOLK_MULT_REAL_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_multiply_32f HAVE_VOLK_MULT_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_32f HAVE_VOLK_MAG_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_squared_32f HAVE_VOLK_MAG_SQUARE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_divide_32f HAVE_VOLK_DIVIDE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_dot_prod_32fc HAVE_VOLK_DOTPROD_FC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_dot_prod_32fc HAVE_VOLK_DOTPROD_CFC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_conjugate_dot_prod_32fc HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_dot_prod_32f HAVE_VOLK_DOTPROD_F_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32f_atan2_32f HAVE_VOLK_ATAN_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_convert_16i HAVE_VOLK_CONVERT_FI_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_32f_x2 HAVE_VOLK_DEINTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_interleave_32fc HAVE_VOLK_INTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_subtract_32f HAVE_VOLK_SUB_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_add_32f HAVE_VOLK_ADD_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_square_dist_32f HAVE_VOLK_SQUARE_DIST_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_real_32f HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_16u HAVE_VOLK_MAX_ABS_FUNCTION_16) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_32u HAVE_VOLK_MAX_ABS_FUNCTION_32) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_multiply_32f HAVE_VOLK_MULT_REAL2_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_16i_max_star_16i HAVE_VOLK_MAX_STAR_S_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_8i_convert_16i HAVE_VOLK_CONVERT_CI_FUNCTION) - - - - SET(VOLK_DEFINITIONS "HAVE_VOLK") - IF(${HAVE_VOLK_CONVERT_IF_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_IF_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_REAL2_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL2_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONVERT_CI_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_CI_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_STAR_S_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_STAR_S_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_ABS_FUNCTION_16}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION_16") - ENDIF() - IF(${HAVE_VOLK_MAX_ABS_FUNCTION_32}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION_32") - ENDIF() - IF(${HAVE_VOLK_MAX_VEC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_VEC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAG_SQUARE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_SQUARE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_SQUARE_DIST_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SQUARE_DIST_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_INTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_INTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_SUB_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SUB_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ADD_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ADD_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT2_CONJ_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_CONJ_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONVERT_FI_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_FI_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_FUNCTION_16}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION_16") - ENDIF() - IF(${HAVE_VOLK_MAX_FUNCTION_32}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION_32") - ENDIF() - IF(${HAVE_VOLK_ACC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ACC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONJ_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONJ_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT2_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_REAL_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAG_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DIVIDE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DIVIDE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_FC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_FC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_CFC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CFC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_F_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_F_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ATAN_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ATAN_FUNCTION") - ENDIF() -ENDIF(VOLK_FOUND) diff --git a/lib/examples/cell_measurement.c b/lib/examples/cell_measurement.c index 37796682f..bfb8194df 100644 --- a/lib/examples/cell_measurement.c +++ b/lib/examples/cell_measurement.c @@ -249,7 +249,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Error initiating ue_sync\n"); return -1; } - if (srslte_ue_dl_init(&ue_dl, cell.nof_prb, 1)) { + if (srslte_ue_dl_init(&ue_dl, sf_buffer, cell.nof_prb, 1)) { fprintf(stderr, "Error initiating UE downlink processing module\n"); return -1; } @@ -257,7 +257,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Error initiating UE downlink processing module\n"); return -1; } - if (srslte_ue_mib_init(&ue_mib, cell.nof_prb)) { + if (srslte_ue_mib_init(&ue_mib, sf_buffer, cell.nof_prb)) { fprintf(stderr, "Error initaiting UE MIB decoder\n"); return -1; } @@ -271,8 +271,16 @@ int main(int argc, char **argv) { /* Initialize subframe counter */ sf_cnt = 0; - - if (srslte_ofdm_rx_init(&fft, cell.cp, cell.nof_prb)) { + + int sf_re = SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp); + + cf_t *sf_symbols = srslte_vec_malloc(sf_re * sizeof(cf_t)); + + for (int i=0;i class metrics_hub : public periodic_thread { public: - bool init(metrics_interface *m_, float report_period_secs=1.0) { - m = m_; + metrics_hub() + :m(NULL) + ,report_period_secs(1) + {} + bool init(metrics_interface *m_, float report_period_secs_=1.0) { + m = m_; + report_period_secs = report_period_secs_; start_periodic(report_period_secs*1e6); return true; } @@ -47,16 +52,18 @@ public: private: void run_period() { - metrics_t metric; - bzero(&metric, sizeof(metrics_t)); - m->get_metrics(metric); - for (uint32_t i=0;iset_metrics(metric); + if (m) { + metrics_t metric; + bzero(&metric, sizeof(metrics_t)); + m->get_metrics(metric); + for (uint32_t i=0;iset_metrics(metric, report_period_secs); + } } } metrics_interface *m; - std::vector*> listeners; - + std::vector*> listeners; + float report_period_secs; }; } // namespace srslte diff --git a/lib/include/srslte/config.h b/lib/include/srslte/config.h index 68076c0c8..8a988a971 100644 --- a/lib/include/srslte/config.h +++ b/lib/include/srslte/config.h @@ -59,5 +59,6 @@ // cf_t definition typedef _Complex float cf_t; +typedef _Complex short int c16_t; #endif // CONFIG_H diff --git a/lib/include/srslte/interfaces/ue_interfaces.h b/lib/include/srslte/interfaces/ue_interfaces.h index 8561ba55c..88dfd285c 100644 --- a/lib/include/srslte/interfaces/ue_interfaces.h +++ b/lib/include/srslte/interfaces/ue_interfaces.h @@ -445,7 +445,8 @@ typedef struct { float snr_ema_coeff; std::string snr_estim_alg; bool cfo_integer_enabled; - float cfo_correct_tol_hz; + float cfo_correct_tol_hz; + float cfo_ema; int time_correct_period; bool sfo_correct_disable; std::string sss_algorithm; diff --git a/lib/include/srslte/phy/common/phy_common.h b/lib/include/srslte/phy/common/phy_common.h index 148a12974..4a1eeb27d 100644 --- a/lib/include/srslte/phy/common/phy_common.h +++ b/lib/include/srslte/phy/common/phy_common.h @@ -101,8 +101,8 @@ typedef enum {SRSLTE_SF_NORM, SRSLTE_SF_MBSFN} srslte_sf_t; #define SRSLTE_CP_ISEXT(cp) (cp==SRSLTE_CP_EXT) #define SRSLTE_CP_NSYMB(cp) (SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_NORM_NSYMB:SRSLTE_CP_EXT_NSYMB) -#define SRSLTE_CP_LEN(symbol_sz, c) ((int) ceil((((float) (c)*(symbol_sz))/2048))) -#define SRSLTE_CP_LEN_NORM(symbol, symbol_sz) ((symbol==0)?SRSLTE_CP_LEN((symbol_sz),SRSLTE_CP_NORM_0_LEN):SRSLTE_CP_LEN((symbol_sz),SRSLTE_CP_NORM_LEN)) +#define SRSLTE_CP_LEN(symbol_sz, c) ((int) ceilf((((float) (c)*(symbol_sz))/2048.0f))) +#define SRSLTE_CP_LEN_NORM(symbol, symbol_sz) (((symbol)==0)?SRSLTE_CP_LEN((symbol_sz),SRSLTE_CP_NORM_0_LEN):SRSLTE_CP_LEN((symbol_sz),SRSLTE_CP_NORM_LEN)) #define SRSLTE_CP_LEN_EXT(symbol_sz) (SRSLTE_CP_LEN((symbol_sz),SRSLTE_CP_EXT_LEN)) #define SRSLTE_SLOT_LEN(symbol_sz) (symbol_sz*15/2) @@ -197,7 +197,8 @@ typedef enum SRSLTE_API { SRSLTE_MOD_BPSK = 0, SRSLTE_MOD_QPSK, SRSLTE_MOD_16QAM, - SRSLTE_MOD_64QAM + SRSLTE_MOD_64QAM, + SRSLTE_MOD_LAST } srslte_mod_t; typedef struct SRSLTE_API { diff --git a/lib/include/srslte/phy/dft/dft.h b/lib/include/srslte/phy/dft/dft.h index b7fc663d8..b3dd4378b 100644 --- a/lib/include/srslte/phy/dft/dft.h +++ b/lib/include/srslte/phy/dft/dft.h @@ -63,6 +63,7 @@ typedef struct SRSLTE_API { void *in; // Input buffer void *out; // Output buffer void *p; // DFT plan + bool is_guru; bool forward; // Forward transform? bool mirror; // Shift negative and positive frequencies? bool db; // Provide output in dB? @@ -85,6 +86,17 @@ SRSLTE_API int srslte_dft_plan_c(srslte_dft_plan_t *plan, int dft_points, srslte_dft_dir_t dir); +SRSLTE_API int srslte_dft_plan_guru_c(srslte_dft_plan_t *plan, + int dft_points, + srslte_dft_dir_t dir, + cf_t *in_buffer, + cf_t *out_buffer, + int istride, + int ostride, + int how_many, + int idist, + int odist); + SRSLTE_API int srslte_dft_plan_r(srslte_dft_plan_t *plan, int dft_points, srslte_dft_dir_t dir); @@ -92,6 +104,16 @@ SRSLTE_API int srslte_dft_plan_r(srslte_dft_plan_t *plan, SRSLTE_API int srslte_dft_replan(srslte_dft_plan_t *plan, const int new_dft_points); +SRSLTE_API int srslte_dft_replan_guru_c(srslte_dft_plan_t *plan, + const int new_dft_points, + cf_t *in_buffer, + cf_t *out_buffer, + int istride, + int ostride, + int how_many, + int idist, + int odist); + SRSLTE_API int srslte_dft_replan_c(srslte_dft_plan_t *plan, int new_dft_points); @@ -129,6 +151,8 @@ SRSLTE_API void srslte_dft_run_c(srslte_dft_plan_t *plan, cf_t *in, cf_t *out); +SRSLTE_API void srslte_dft_run_guru_c(srslte_dft_plan_t *plan); + SRSLTE_API void srslte_dft_run_r(srslte_dft_plan_t *plan, float *in, float *out); diff --git a/lib/include/srslte/phy/dft/ofdm.h b/lib/include/srslte/phy/dft/ofdm.h index 1363f5638..cba963ca4 100644 --- a/lib/include/srslte/phy/dft/ofdm.h +++ b/lib/include/srslte/phy/dft/ofdm.h @@ -47,14 +47,18 @@ /* This is common for both directions */ typedef struct SRSLTE_API{ srslte_dft_plan_t fft_plan; + srslte_dft_plan_t fft_plan_sf[2]; uint32_t max_prb; uint32_t nof_symbols; uint32_t symbol_sz; uint32_t nof_guards; uint32_t nof_re; uint32_t slot_sz; + uint32_t sf_sz; srslte_cp_t cp; cf_t *tmp; // for removing zero padding + cf_t *in_buffer; + cf_t *out_buffer; bool mbsfn_subframe; uint32_t mbsfn_guard_len; @@ -69,12 +73,16 @@ typedef struct SRSLTE_API{ SRSLTE_API int srslte_ofdm_init_(srslte_ofdm_t *q, srslte_cp_t cp, + cf_t *in_buffer, + cf_t *out_buffer, int symbol_sz, int nof_prb, srslte_dft_dir_t dir); SRSLTE_API int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, + cf_t *in_buffer, + cf_t *out_buffer, int symbol_sz, int nof_prb, srslte_dft_dir_t dir, @@ -82,12 +90,14 @@ SRSLTE_API int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, SRSLTE_API int srslte_ofdm_rx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp_type, + cf_t *in_buffer, + cf_t *out_buffer, uint32_t nof_prb); - - SRSLTE_API int srslte_ofdm_rx_init(srslte_ofdm_t *q, srslte_cp_t cp_type, + cf_t *in_buffer, + cf_t *out_buffer, uint32_t max_prb); SRSLTE_API int srslte_ofdm_tx_set_prb(srslte_ofdm_t *q, @@ -100,39 +110,35 @@ SRSLTE_API int srslte_ofdm_rx_set_prb(srslte_ofdm_t *q, SRSLTE_API void srslte_ofdm_rx_free(srslte_ofdm_t *q); -SRSLTE_API void srslte_ofdm_rx_slot(srslte_ofdm_t *q, - cf_t *input, - cf_t *output); - -SRSLTE_API void srslte_ofdm_rx_sf(srslte_ofdm_t *q, - cf_t *input, - cf_t *output); - +SRSLTE_API void srslte_ofdm_rx_slot(srslte_ofdm_t *q, + int slot_in_sf); +SRSLTE_API void srslte_ofdm_rx_sf(srslte_ofdm_t *q); SRSLTE_API int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp_type, + cf_t *in_buffer, + cf_t *out_buffer, uint32_t nof_prb); SRSLTE_API int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, + cf_t *in_buffer, + cf_t *out_buffer, uint32_t nof_prb); SRSLTE_API void srslte_ofdm_tx_free(srslte_ofdm_t *q); -SRSLTE_API void srslte_ofdm_tx_slot(srslte_ofdm_t *q, - cf_t *input, - cf_t *output); +SRSLTE_API void srslte_ofdm_tx_slot(srslte_ofdm_t *q, + int slot_in_sf); SRSLTE_API void srslte_ofdm_tx_slot_mbsfn(srslte_ofdm_t *q, cf_t *input, cf_t *output); -SRSLTE_API void srslte_ofdm_tx_sf(srslte_ofdm_t *q, - cf_t *input, - cf_t *output); +SRSLTE_API void srslte_ofdm_tx_sf(srslte_ofdm_t *q); SRSLTE_API int srslte_ofdm_set_freq_shift(srslte_ofdm_t *q, float freq_shift); @@ -144,4 +150,4 @@ SRSLTE_API void srslte_ofdm_set_non_mbsfn_region(srslte_ofdm_t *q, uint8_t non_mbsfn_region); -#endif \ No newline at end of file +#endif diff --git a/lib/include/srslte/phy/enb/enb_dl.h b/lib/include/srslte/phy/enb/enb_dl.h index 581b98e23..f326d4b09 100644 --- a/lib/include/srslte/phy/enb/enb_dl.h +++ b/lib/include/srslte/phy/enb/enb_dl.h @@ -68,7 +68,7 @@ typedef struct SRSLTE_API { cf_t *sf_symbols[SRSLTE_MAX_PORTS]; cf_t *slot1_symbols[SRSLTE_MAX_PORTS]; - srslte_ofdm_t ifft; + srslte_ofdm_t ifft[SRSLTE_MAX_PORTS]; srslte_pbch_t pbch; srslte_pcfich_t pcfich; srslte_regs_t regs; @@ -110,7 +110,8 @@ typedef struct { } srslte_enb_dl_phich_t; /* This function shall be called just after the initial synchronization */ -SRSLTE_API int srslte_enb_dl_init(srslte_enb_dl_t *q, +SRSLTE_API int srslte_enb_dl_init(srslte_enb_dl_t *q, + cf_t *out_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb); SRSLTE_API void srslte_enb_dl_free(srslte_enb_dl_t *q); @@ -147,8 +148,7 @@ SRSLTE_API void srslte_enb_dl_put_phich(srslte_enb_dl_t *q, SRSLTE_API void srslte_enb_dl_put_base(srslte_enb_dl_t *q, uint32_t tti); -SRSLTE_API void srslte_enb_dl_gen_signal(srslte_enb_dl_t *q, - cf_t *signal_buffer[SRSLTE_MAX_PORTS]); +SRSLTE_API void srslte_enb_dl_gen_signal(srslte_enb_dl_t *q); SRSLTE_API int srslte_enb_dl_add_rnti(srslte_enb_dl_t *q, uint16_t rnti); diff --git a/lib/include/srslte/phy/enb/enb_ul.h b/lib/include/srslte/phy/enb/enb_ul.h index 855b645ca..6839cfc9e 100644 --- a/lib/include/srslte/phy/enb/enb_ul.h +++ b/lib/include/srslte/phy/enb/enb_ul.h @@ -101,6 +101,7 @@ typedef struct { /* This function shall be called just after the initial synchronization */ SRSLTE_API int srslte_enb_ul_init(srslte_enb_ul_t *q, + cf_t *in_buffer, uint32_t max_prb); SRSLTE_API void srslte_enb_ul_free(srslte_enb_ul_t *q); @@ -124,8 +125,7 @@ SRSLTE_API int srslte_enb_ul_cfg_ue(srslte_enb_ul_t *q, uint16_t rnti, srslte_refsignal_srs_cfg_t *srs_cfg); -SRSLTE_API void srslte_enb_ul_fft(srslte_enb_ul_t *q, - cf_t *signal_buffer); +SRSLTE_API void srslte_enb_ul_fft(srslte_enb_ul_t *q); SRSLTE_API int srslte_enb_ul_get_pucch(srslte_enb_ul_t *q, uint16_t rnti, diff --git a/lib/include/srslte/phy/phch/ra.h b/lib/include/srslte/phy/phch/ra.h index 3039455ed..aef8f4751 100644 --- a/lib/include/srslte/phy/phch/ra.h +++ b/lib/include/srslte/phy/phch/ra.h @@ -103,9 +103,7 @@ typedef struct SRSLTE_API { bool prb_idx[2][SRSLTE_MAX_PRB]; uint32_t nof_prb; uint32_t Qm[SRSLTE_MAX_CODEWORDS]; - uint32_t Qm2[SRSLTE_MAX_CODEWORDS]; srslte_ra_mcs_t mcs[SRSLTE_MAX_CODEWORDS]; - srslte_ra_mcs_t mcs2[SRSLTE_MAX_CODEWORDS]; uint32_t nof_tb; srslte_sf_t sf_type; bool tb_en[SRSLTE_MAX_CODEWORDS]; @@ -226,8 +224,7 @@ SRSLTE_API uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, SRSLTE_API int srslte_ra_ul_dci_to_grant(srslte_ra_ul_dci_t *dci, uint32_t nof_prb, uint32_t n_rb_ho, - srslte_ra_ul_grant_t *grant, - uint32_t harq_pid); + srslte_ra_ul_grant_t *grant); SRSLTE_API void srslte_ra_ul_grant_to_nbits(srslte_ra_ul_grant_t *grant, srslte_cp_t cp, diff --git a/lib/include/srslte/phy/ue/ue_dl.h b/lib/include/srslte/phy/ue/ue_dl.h index 0b0fc44d2..0486ad7d4 100644 --- a/lib/include/srslte/phy/ue/ue_dl.h +++ b/lib/include/srslte/phy/ue/ue_dl.h @@ -80,7 +80,7 @@ typedef struct SRSLTE_API { srslte_pmch_t pmch; srslte_phich_t phich; srslte_regs_t regs; - srslte_ofdm_t fft; + srslte_ofdm_t fft[SRSLTE_MAX_PORTS]; srslte_ofdm_t fft_mbsfn; srslte_chest_dl_t chest; @@ -128,6 +128,7 @@ typedef struct SRSLTE_API { /* This function shall be called just after the initial synchronization */ SRSLTE_API int srslte_ue_dl_init(srslte_ue_dl_t *q, + cf_t *in_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb, uint32_t nof_rx_antennas); diff --git a/lib/include/srslte/phy/ue/ue_mib.h b/lib/include/srslte/phy/ue/ue_mib.h index 5d2ef14fa..ee52b31d4 100644 --- a/lib/include/srslte/phy/ue/ue_mib.h +++ b/lib/include/srslte/phy/ue/ue_mib.h @@ -79,6 +79,7 @@ typedef struct SRSLTE_API { } srslte_ue_mib_t; SRSLTE_API int srslte_ue_mib_init(srslte_ue_mib_t *q, + cf_t *in_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb); SRSLTE_API void srslte_ue_mib_free(srslte_ue_mib_t *q); @@ -89,7 +90,6 @@ SRSLTE_API int srslte_ue_mib_set_cell(srslte_ue_mib_t * q, SRSLTE_API void srslte_ue_mib_reset(srslte_ue_mib_t * q); SRSLTE_API int srslte_ue_mib_decode(srslte_ue_mib_t * q, - cf_t *input, uint8_t bch_payload[SRSLTE_BCH_PAYLOAD_LEN], uint32_t *nof_tx_ports, int *sfn_offset); diff --git a/lib/include/srslte/phy/ue/ue_sync.h b/lib/include/srslte/phy/ue/ue_sync.h index bd280acd0..e5877dd23 100644 --- a/lib/include/srslte/phy/ue/ue_sync.h +++ b/lib/include/srslte/phy/ue/ue_sync.h @@ -185,7 +185,10 @@ SRSLTE_API void srslte_ue_sync_set_cfo_tol(srslte_ue_sync_t *q, float tol); SRSLTE_API void srslte_ue_sync_set_cfo(srslte_ue_sync_t *q, - float cfo); + float cfo); + +SRSLTE_API void srslte_ue_sync_set_cfo_ema(srslte_ue_sync_t *q, + float ema); SRSLTE_API void srslte_ue_sync_cfo_i_detec_en(srslte_ue_sync_t *q, bool enable); diff --git a/lib/include/srslte/phy/ue/ue_ul.h b/lib/include/srslte/phy/ue/ue_ul.h index 3492180e3..617833a61 100644 --- a/lib/include/srslte/phy/ue/ue_ul.h +++ b/lib/include/srslte/phy/ue/ue_ul.h @@ -108,6 +108,7 @@ typedef struct SRSLTE_API { /* This function shall be called just after the initial synchronization */ SRSLTE_API int srslte_ue_ul_init(srslte_ue_ul_t *q, + cf_t *out_buffer, uint32_t max_prb); SRSLTE_API void srslte_ue_ul_free(srslte_ue_ul_t *q); diff --git a/lib/include/srslte/phy/utils/mat.h b/lib/include/srslte/phy/utils/mat.h index d960590c4..339cfea23 100644 --- a/lib/include/srslte/phy/utils/mat.h +++ b/lib/include/srslte/phy/utils/mat.h @@ -27,14 +27,8 @@ #ifndef SRSLTE_MAT_H #define SRSLTE_MAT_H -#include "srslte/phy/utils/simd.h" #include "srslte/config.h" - - -/* - * Generic Macros - */ -#define RANDOM_CF() (((float)rand())/((float)RAND_MAX) + _Complex_I*((float)rand())/((float)RAND_MAX)) +#include "srslte/phy/utils/simd.h" /* Generic implementation for complex reciprocal */ SRSLTE_API cf_t srslte_mat_cf_recip_gen(cf_t a); @@ -66,7 +60,6 @@ SRSLTE_API float srslte_mat_2x2_cn(cf_t h00, #ifdef LV_HAVE_SSE -#include /* SSE implementation for complex reciprocal */ SRSLTE_API __m128 srslte_mat_cf_recip_sse(__m128 a); @@ -90,8 +83,6 @@ SRSLTE_API void srslte_mat_2x2_mmse_sse(__m128 y0, __m128 y1, #ifdef LV_HAVE_AVX -#include - /* AVX implementation for complex reciprocal */ SRSLTE_API __m256 srslte_mat_cf_recip_avx(__m256 a); diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 420d07213..09e9cff8e 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -27,6 +27,18 @@ #ifndef SRSLTE_SIMD_H_H #define SRSLTE_SIMD_H_H +#ifdef LV_HAVE_SSE /* AVX, AVX2, FMA, AVX512 are in this group */ +#ifndef __OPTIMIZE__ +#define __OPTIMIZE__ +#endif +#include +#endif /* LV_HAVE_SSE */ +#include + +#ifdef HAVE_NEON +#include +#endif + /* * SSE Macros */ @@ -44,7 +56,7 @@ /* * AVX Macros */ -#ifdef LV_HAVE_AVX +#ifdef LV_HAVE_AVX2 #define _MM256_MULJ_PS(X) _mm256_permute_ps(_MM256_CONJ_PS(X), 0b10110001) #define _MM256_CONJ_PS(X) (_mm256_xor_ps(X, _mm256_set_ps(-0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f))) @@ -60,7 +72,7 @@ #define _MM256_PROD_PS(a, b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),\ _mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b))) #endif /* LV_HAVE_FMA */ -#endif /* LV_HAVE_AVX */ +#endif /* LV_HAVE_AVX2 */ /* @@ -78,4 +90,1415 @@ _mm256_fmsubadd_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C)) #endif /* LV_HAVE_FMA */ + + +/* Memory Sizes for Single Floating Point and fixed point */ +#ifdef LV_HAVE_AVX512 + +#define SRSLTE_SIMD_F_SIZE 16 +#define SRSLTE_SIMD_CF_SIZE 16 + +#define SRSLTE_SIMD_I_SIZE 16 + +#define SRSLTE_SIMD_S_SIZE 32 +#define SRSLTE_SIMD_C16_SIZE 0 + +#else +#ifdef LV_HAVE_AVX2 + +#define SRSLTE_SIMD_F_SIZE 8 +#define SRSLTE_SIMD_CF_SIZE 8 + +#define SRSLTE_SIMD_I_SIZE 8 + +#define SRSLTE_SIMD_S_SIZE 16 +#define SRSLTE_SIMD_C16_SIZE 16 + +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + +#define SRSLTE_SIMD_F_SIZE 4 +#define SRSLTE_SIMD_CF_SIZE 4 + +#define SRSLTE_SIMD_I_SIZE 4 + +#define SRSLTE_SIMD_S_SIZE 8 +#define SRSLTE_SIMD_C16_SIZE 8 + +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + +#define SRSLTE_SIMD_F_SIZE 4 +#define SRSLTE_SIMD_CF_SIZE 4 + +#define SRSLTE_SIMD_I_SIZE 4 + +#define SRSLTE_SIMD_S_SIZE 8 +#define SRSLTE_SIMD_C16_SIZE 8 + +#else /* LV_HAVE_NEON */ +#define SRSLTE_SIMD_F_SIZE 0 +#define SRSLTE_SIMD_CF_SIZE 0 + +#define SRSLTE_SIMD_I_SIZE 0 + +#define SRSLTE_SIMD_S_SIZE 0 +#define SRSLTE_SIMD_C16_SIZE 0 + +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + + + +#if SRSLTE_SIMD_F_SIZE + +/* Data types */ +#ifdef LV_HAVE_AVX512 +typedef __m512 simd_f_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256 simd_f_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128 simd_f_t; +#else /* HAVE_NEON */ +#ifdef HAVE_NEON +typedef float32x4_t simd_f_t; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + +/* Single precision Floating point functions */ +static inline simd_f_t srslte_simd_f_load(float *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_ps(ptr); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_ps(ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_load_ps(ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_f32(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_loadu(float *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_loadu_ps(ptr); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + return _mm256_loadu_ps(ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_loadu_ps(ptr); +#else /* LV_HAVE_SSE */ + #ifdef HAVE_NEON + return vld1q_f32(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_f_store(float *ptr, simd_f_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_ps(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_ps(ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_ps(ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_f32(ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_f_storeu(float *ptr, simd_f_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_ps(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + _mm256_storeu_ps(ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_ps(ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_f32(ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_set1(float x) { +#ifdef LV_HAVE_AVX512 + return _mm512_set1_ps(x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_set1_ps(x); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_set1_ps(x); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vdupq_n_f32(x); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_mul(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_mul_ps(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_mul_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_mul_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vmulq_f32(a,b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_rcp(simd_f_t a) { +#ifdef LV_HAVE_AVX512 + return _mm512_rcp14_ps(a); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_rcp_ps(a); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_rcp_ps(a); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vmulq_f32(vrecpeq_f32(a), vrecpsq_f32(vrecpeq_f32(a), a)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + __m512 r = _mm512_add_ps(a, b); + return _mm512_mask_sub_ps(r, 0b0101010101010101, a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_addsub_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_addsub_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON + float* a_ptr = (float*) &a; + float* b_ptr = (float*) &b; + simd_f_t ret; + float* c_ptr = (float*) &ret; + for(int i = 0; i<4;i++){ + if(i%2==0){ + c_ptr[i] = a_ptr[i] - b_ptr[i]; + }else{ + c_ptr[i] = a_ptr[i] + b_ptr[i]; + } + } + + return ret; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + + + + +static inline simd_f_t srslte_simd_f_sub(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_sub_ps(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_sub_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_sub_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vsubq_f32(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_add(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_add_ps(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_add_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_add_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vaddq_f32(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_zero (void) { +#ifdef LV_HAVE_AVX512 + return _mm512_setzero_ps(); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_setzero_ps(); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_setzero_ps(); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vdupq_n_f32(0); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_swap(simd_f_t a) { +#ifdef LV_HAVE_AVX512 + return _mm512_permute_ps(a, 0b10110001); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_permute_ps(a, 0b10110001); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_shuffle_ps(a, a, 0b10110001); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vcombine_f32(vrev64_f32(vget_low_f32(a)), vrev64_f32(vget_high_f32(a))); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_hadd(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + const __m512i idx1 = _mm512_setr_epi32((0b00000), (0b00010), + (0b00100), (0b00110), + (0b01000), (0b01010), + (0b01100), (0b01110), + (0b10000), (0b10010), + (0b10100), (0b10110), + (0b11000), (0b11010), + (0b11100), (0b11110)); + const __m512i idx2 = _mm512_or_epi32(idx1, _mm512_set1_epi32(1)); + + simd_f_t a1 = _mm512_permutex2var_ps(a, idx1, b); + simd_f_t b1 = _mm512_permutex2var_ps(a, idx2, b); + return _mm512_add_ps(a1, b1); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + simd_f_t a1 = _mm256_permute2f128_ps(a, b, 0b00100000); + simd_f_t b1 = _mm256_permute2f128_ps(a, b, 0b00110001); + return _mm256_hadd_ps(a1, b1); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_hadd_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vcombine_f32( vpadd_f32( vget_low_f32(a), vget_high_f32(a) ), vpadd_f32( vget_low_f32(b), vget_high_f32(b) ) ); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_sqrt(simd_f_t a) { +#ifdef LV_HAVE_AVX512 + return _mm512_sqrt_ps(a); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + return _mm256_sqrt_ps(a); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_sqrt_ps(a); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + float32x4_t sqrt_reciprocal = vrsqrteq_f32(a); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a,sqrt_reciprocal), sqrt_reciprocal),sqrt_reciprocal); + return vmulq_f32(a,sqrt_reciprocal); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_F_SIZE */ + + +#if SRSLTE_SIMD_CF_SIZE + +#ifdef HAVE_NEON + typedef float32x4x2_t simd_cf_t; +#else +typedef struct { + simd_f_t re; + simd_f_t im; + +} simd_cf_t; +#endif + +/* Complex Single precission Floating point functions */ +static inline simd_cf_t srslte_simd_cfi_load(cf_t *ptr) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + __m512 in1 = _mm512_load_ps((float*)(ptr)); + __m512 in2 = _mm512_load_ps((float*)(ptr + SRSLTE_SIMD_CF_SIZE/2)); + ret.re = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x00, 0x02, 0x04, 0x06, + 0x08, 0x0A, 0x0C, 0x0E, + 0x10, 0x12, 0x14, 0x16, + 0x18, 0x1A, 0x1C, 0x1E), in2); + ret.im = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x01, 0x03, 0x05, 0x07, + 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, + 0x19, 0x1B, 0x1D, 0x1F), in2); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 in1 = _mm256_permute_ps(_mm256_load_ps((float*)(ptr)), 0b11011000); + __m256 in2 = _mm256_permute_ps(_mm256_load_ps((float*)(ptr + 4)), 0b11011000); + ret.re = _mm256_unpacklo_ps(in1, in2); + ret.im = _mm256_unpackhi_ps(in1, in2); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + __m128 i1 = _mm_load_ps((float*)(ptr)); + __m128 i2 = _mm_load_ps((float*)(ptr + 2)); + ret.re = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(2,0,2,0)); + ret.im = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(3,1,3,1)); +#else +#ifdef HAVE_NEON + ret = vld2q_f32((float*)(ptr)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +/* Complex Single precission Floating point functions */ +static inline simd_cf_t srslte_simd_cfi_loadu(cf_t *ptr) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + __m512 in1 = _mm512_loadu_ps((float*)(ptr)); + __m512 in2 = _mm512_loadu_ps((float*)(ptr + SRSLTE_SIMD_CF_SIZE/2)); + ret.re = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x00, 0x02, 0x04, 0x06, + 0x08, 0x0A, 0x0C, 0x0E, + 0x10, 0x12, 0x14, 0x16, + 0x18, 0x1A, 0x1C, 0x1E), in2); + ret.im = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x01, 0x03, 0x05, 0x07, + 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, + 0x19, 0x1B, 0x1D, 0x1F), in2); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + __m256 in1 = _mm256_permute_ps(_mm256_loadu_ps((float*)(ptr)), 0b11011000); + __m256 in2 = _mm256_permute_ps(_mm256_loadu_ps((float*)(ptr + 4)), 0b11011000); + ret.re = _mm256_unpacklo_ps(in1, in2); + ret.im = _mm256_unpackhi_ps(in1, in2); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + __m128 i1 = _mm_loadu_ps((float*)(ptr)); + __m128 i2 = _mm_loadu_ps((float*)(ptr + 2)); + ret.re = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(2,0,2,0)); + ret.im = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(3,1,3,1)); +#else +#ifdef HAVE_NEON + ret = vld2q_f32((float*)(ptr)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_load(float *re, float *im) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_load_ps(re); + ret.im = _mm512_load_ps(im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_load_ps(re); + ret.im = _mm256_load_ps(im); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_load_ps(re); + ret.im = _mm_load_ps(im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + ret.val[0] = vld1q_f32(re); + ret.val[1] = vld1q_f32(im); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_loadu(float *re, float *im) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_loadu_ps(re); + ret.im = _mm512_loadu_ps(im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_loadu_ps(re); + ret.im = _mm256_loadu_ps(im); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_loadu_ps(re); + ret.im = _mm_loadu_ps(im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + ret.val[0] = vld1q_f32(re); + ret.val[1] = vld1q_f32(im); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline void srslte_simd_cfi_store(cf_t *ptr, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + __m512 s1 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x00, 0x10, 0x01, 0x11, + 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, + 0x06, 0x16, 0x07, 0x17), simdreg.im); + __m512 s2 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x08, 0x18, 0x09, 0x19, + 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, + 0x0E, 0x1E, 0x0F, 0x1F), simdreg.im); + _mm512_store_ps((float*)(ptr), s1); + _mm512_store_ps((float*)(ptr + 8), s2); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 out1 = _mm256_permute_ps(simdreg.re, 0b11011000); + __m256 out2 = _mm256_permute_ps(simdreg.im, 0b11011000); + _mm256_store_ps((float*)(ptr), _mm256_unpacklo_ps(out1, out2)); + _mm256_store_ps((float*)(ptr + 4), _mm256_unpackhi_ps(out1, out2)); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_ps((float*)(ptr), _mm_unpacklo_ps(simdreg.re, simdreg.im)); + _mm_store_ps((float*)(ptr + 2), _mm_unpackhi_ps(simdreg.re, simdreg.im)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_f32((float*)(ptr), simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_cfi_storeu(cf_t *ptr, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + __m512 s1 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x00, 0x10, 0x01, 0x11, + 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, + 0x06, 0x16, 0x07, 0x17), simdreg.im); + __m512 s2 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x08, 0x18, 0x09, 0x19, + 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, + 0x0E, 0x1E, 0x0F, 0x1F), simdreg.im); + _mm512_storeu_ps((float*)(ptr), s1); + _mm512_storeu_ps((float*)(ptr + 8), s2); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 out1 = _mm256_permute_ps(simdreg.re, 0b11011000); + __m256 out2 = _mm256_permute_ps(simdreg.im, 0b11011000); + _mm256_storeu_ps((float*)(ptr), _mm256_unpacklo_ps(out1, out2)); + _mm256_storeu_ps((float*)(ptr + 4), _mm256_unpackhi_ps(out1, out2)); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_ps((float*)(ptr), _mm_unpacklo_ps(simdreg.re, simdreg.im)); + _mm_storeu_ps((float*)(ptr + 2), _mm_unpackhi_ps(simdreg.re, simdreg.im)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_f32((float*)(ptr), simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_cf_store(float *re, float *im, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_ps(re, simdreg.re); + _mm512_store_ps(im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_ps((float *) re, simdreg.re); + _mm256_store_ps((float *) im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_SSE + _mm_store_ps((float *) re, simdreg.re); + _mm_store_ps((float *) im, simdreg.im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst1q_f32((float *) re, simdreg.val[0]); + vst1q_f32((float *) im, simdreg.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_cf_storeu(float *re, float *im, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_ps(re, simdreg.re); + _mm512_storeu_ps(im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_storeu_ps((float *) re, simdreg.re); + _mm256_storeu_ps((float *) im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_SSE + _mm_storeu_ps((float *) re, simdreg.re); + _mm_storeu_ps((float *) im, simdreg.im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst1q_f32((float *) re, simdreg.val[0]); + vst1q_f32((float *) im, simdreg.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_cf_t srslte_simd_cf_set1 (cf_t x) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_set1_ps(__real__ x); + ret.im = _mm512_set1_ps(__imag__ x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_set1_ps(__real__ x); + ret.im = _mm256_set1_ps(__imag__ x); +#else +#ifdef LV_HAVE_SSE + ret.re = _mm_set1_ps(__real__ x); + ret.im = _mm_set1_ps(__imag__ x); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + ret.val[0] = vdupq_n_f32(__real__ x); + ret.val[1] = vdupq_n_f32(__imag__ x); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_prod (simd_cf_t a, simd_cf_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_sub_ps(_mm512_mul_ps(a.re, b.re), + _mm512_mul_ps(a.im, b.im)); + ret.im = _mm512_add_ps(_mm512_mul_ps(a.re, b.im), + _mm512_mul_ps(a.im, b.re)); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_sub_ps(_mm256_mul_ps(a.re, b.re), + _mm256_mul_ps(a.im, b.im)); + ret.im = _mm256_add_ps(_mm256_mul_ps(a.re, b.im), + _mm256_mul_ps(a.im, b.re)); +#else +#ifdef LV_HAVE_SSE + ret.re = _mm_sub_ps(_mm_mul_ps(a.re, b.re), + _mm_mul_ps(a.im, b.im)); + ret.im = _mm_add_ps(_mm_mul_ps(a.re, b.im), + _mm_mul_ps(a.im, b.re)); +#else +#ifdef HAVE_NEON + ret.val[0] = vsubq_f32(vmulq_f32(a.val[0],b.val[0]), + vmulq_f32(a.val[1],b.val[1])); + ret.val[1] = vaddq_f32(vmulq_f32(a.val[0],b.val[1]), + vmulq_f32(a.val[1],b.val[0])); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_conjprod (simd_cf_t a, simd_cf_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_add_ps(_mm512_mul_ps(a.re, b.re), + _mm512_mul_ps(a.im, b.im)); + ret.im = _mm512_sub_ps(_mm512_mul_ps(a.im, b.re), + _mm512_mul_ps(a.re, b.im)); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_add_ps(_mm256_mul_ps(a.re, b.re), + _mm256_mul_ps(a.im, b.im)); + ret.im = _mm256_sub_ps(_mm256_mul_ps(a.im, b.re), + _mm256_mul_ps(a.re, b.im)); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_add_ps(_mm_mul_ps(a.re, b.re), + _mm_mul_ps(a.im, b.im)); + ret.im = _mm_sub_ps(_mm_mul_ps(a.im, b.re), + _mm_mul_ps(a.re, b.im)); + #else +#ifdef HAVE_NEON + ret.val[0] = vaddq_f32(vmulq_f32(a.val[0],b.val[0]), + vmulq_f32(a.val[1],b.val[1])); + ret.val[1] = vsubq_f32(vmulq_f32(a.val[1],b.val[0]), + vmulq_f32(a.val[0],b.val[1])); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_add (simd_cf_t a, simd_cf_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_add_ps(a.re, b.re); + ret.im = _mm512_add_ps(a.im, b.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_add_ps(a.re, b.re); + ret.im = _mm256_add_ps(a.im, b.im); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_add_ps(a.re, b.re); + ret.im = _mm_add_ps(a.im, b.im); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + ret.val[0] = vaddq_f32(a.val[0],b.val[0]); + ret.val[1] = vaddq_f32(a.val[1],b.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_mul (simd_cf_t a, simd_f_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_mul_ps(a.re, b); + ret.im = _mm512_mul_ps(a.im, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + b = _mm256_permutevar8x32_ps(b, _mm256_setr_epi32(0,4,1,5,2,6,3,7)); + ret.re = _mm256_mul_ps(a.re, b); + ret.im = _mm256_mul_ps(a.im, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_mul_ps(a.re, b); + ret.im = _mm_mul_ps(a.im, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + ret.val[0] = vmulq_f32(a.val[0],b); + ret.val[1] = vmulq_f32(a.val[1],b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + simd_f_t a2re = _mm512_mul_ps(a.re, a.re); + simd_f_t a2im = _mm512_mul_ps(a.im, a.im); + simd_f_t mod2 = _mm512_add_ps(a2re, a2im); + simd_f_t rcp = _mm512_rcp14_ps(mod2); + simd_f_t neg_a_im = _mm512_xor_ps(_mm512_set1_ps(-0.0f), a.im); + ret.re = _mm512_mul_ps(a.re, rcp); + ret.im = _mm512_mul_ps(neg_a_im, rcp); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + simd_f_t a2re = _mm256_mul_ps(a.re, a.re); + simd_f_t a2im = _mm256_mul_ps(a.im, a.im); + simd_f_t mod2 = _mm256_add_ps(a2re, a2im); + simd_f_t rcp = _mm256_rcp_ps(mod2); + simd_f_t neg_a_im = _mm256_xor_ps(_mm256_set1_ps(-0.0f), a.im); + ret.re = _mm256_mul_ps(a.re, rcp); + ret.im = _mm256_mul_ps(neg_a_im, rcp); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + simd_f_t a2re = _mm_mul_ps(a.re, a.re); + simd_f_t a2im = _mm_mul_ps(a.im, a.im); + simd_f_t mod2 = _mm_add_ps(a2re, a2im); + simd_f_t rcp = _mm_rcp_ps(mod2); + simd_f_t neg_a_im = _mm_xor_ps(_mm_set1_ps(-0.0f), a.im); + ret.re = _mm_mul_ps(a.re, rcp); + ret.im = _mm_mul_ps(neg_a_im, rcp); + #else /* LV_HAVE_SSE */ + #ifdef HAVE_NEON + simd_f_t a2re = vmulq_f32(a.val[0], a.val[0]); + simd_f_t a2im = vmulq_f32(a.val[1], a.val[1]); + simd_f_t mod2 = vaddq_f32(a2re, a2im); + simd_f_t rcp = vmulq_f32(vrecpeq_f32(mod2), vrecpsq_f32(vrecpeq_f32(mod2), mod2)); + simd_f_t neg_a_im = vnegq_f32(a.val[1]); + ret.val[0] = vmulq_f32(a.val[0], rcp); + ret.val[1] = vmulq_f32(neg_a_im, rcp); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_zero (void) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_setzero_ps(); + ret.im = _mm512_setzero_ps(); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_setzero_ps(); + ret.im = _mm256_setzero_ps(); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_setzero_ps(); + ret.im = _mm_setzero_ps(); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + ret.val[0] = vdupq_n_f32(0); + ret.val[1] = vdupq_n_f32(0); +#endif /* HAVE_NEON */ +#endif /* HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +#endif /* SRSLTE_SIMD_CF_SIZE */ + +#if SRSLTE_SIMD_I_SIZE + +#ifdef LV_HAVE_AVX512 +typedef __m512i simd_i_t; +typedef __mmask16 simd_sel_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256i simd_i_t; +typedef __m256 simd_sel_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128i simd_i_t; +typedef __m128 simd_sel_t; +#else /* LV_HAVE_AVX2 */ +#ifdef HAVE_NEON +typedef int32x4_t simd_i_t; +typedef int32x4_t simd_sel_t; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + +static inline simd_i_t srslte_simd_i_load(int *x) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_epi32((__m512i*)x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_si256((__m256i*)x); +#else + #ifdef LV_HAVE_SSE + return _mm_load_si128((__m128i*)x); +#else + #ifdef HAVE_NEON + return vld1q_s32((int*)x); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_i_store(int *x, simd_i_t reg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_epi32((__m512i*)x, reg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_si256((__m256i*)x, reg); +#else +#ifdef LV_HAVE_SSE + _mm_store_si128((__m128i*)x, reg); +#else +#ifdef HAVE_NEON + vst1q_s32((int*)x, reg); +#endif /*HAVE_NEON*/ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_i_set1(int x) { +#ifdef LV_HAVE_AVX512 + return _mm512_set1_epi32(x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_set1_epi32(x); +#else + #ifdef LV_HAVE_SSE + return _mm_set1_epi32(x); +#else + #ifdef HAVE_NEON + return vdupq_n_s32(x); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_i_add(simd_i_t a, simd_i_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_add_epi32(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_add_epi32(a, b); +#else +#ifdef LV_HAVE_SSE + return _mm_add_epi32(a, b); +#else +#ifdef HAVE_NEON + return vaddq_s32(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_sel_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_cmp_ps_mask(a, b, _CMP_GT_OS); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_cmp_ps(a, b, _CMP_GT_OS); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return (simd_sel_t) _mm_cmpgt_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return (simd_sel_t) vcgtq_f32(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_sel_t selector) { +#ifdef LV_HAVE_AVX512 + return (__m512i) _mm512_mask_blend_ps( selector, (__m512)a, (__m512) b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return (__m256i) _mm256_blendv_ps((__m256) a,(__m256) b, selector); +#else + #ifdef LV_HAVE_SSE + return (__m128i) _mm_blendv_ps((__m128)a, (__m128)b, selector); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON + int* a_ptr = (int*) &a; + int* b_ptr = (int*) &b; + simd_i_t ret; + int* sel = (int*) &selector; + int* c_ptr = (int*) &ret; + for(int i = 0;i<4;i++) + { + if(sel[i] == -1){ + c_ptr[i] = b_ptr[i]; + }else{ + c_ptr[i] = a_ptr[i]; + } + } + return ret; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_I_SIZE*/ + + +#if SRSLTE_SIMD_S_SIZE + + +#ifdef LV_HAVE_AVX512 +typedef __m512i simd_s_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256i simd_s_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128i simd_s_t; +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON +typedef int16x8_t simd_s_t; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + +static inline simd_s_t srslte_simd_s_load(int16_t *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_si512(ptr); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_si256((__m256i*) ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_load_si128((__m128i*) ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_s16(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_loadu(int16_t *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_loadu_si512(ptr); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + return _mm256_loadu_si256((__m256i*) ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_loadu_si128((__m128i*) ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_s16(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_s_store(int16_t *ptr, simd_s_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_si512(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_si256((__m256i*) ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_si128((__m128i*) ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_s16( ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_s_storeu(int16_t *ptr, simd_s_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_si512(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_storeu_si256((__m256i*) ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_si128((__m128i*) ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_s16(ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} +static inline simd_s_t srslte_simd_s_zero(void) { +#ifdef LV_HAVE_AVX512 + return _mm512_setzero_si512(); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_setzero_si256(); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_setzero_si128(); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vdupq_n_s16(0); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_mullo_epi16(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_mullo_epi16(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_mullo_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vmulq_s16(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_add(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_add_epi16(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_add_epi16(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_add_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vaddq_s16(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_sub(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_sub_epi16(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_sub_epi16(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_sub_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vsubq_s16(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_S_SIZE */ + + +#if SRSLTE_SIMD_C16_SIZE + +typedef +#ifdef LV_HAVE_AVX512 + struct { + union { + __m512i m512; + int16_t i16[32]; + } re; + union { + __m512i m512; + int16_t i16[32]; + } im; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + struct { + union { + __m256i m256; + int16_t i16[16]; + } re; + union { + __m256i m256; + int16_t i16[16]; + } im; +#else +#ifdef LV_HAVE_SSE + struct { + union { + __m128i m128; + int16_t i16[8]; + } re; + union { + __m128i m128; + int16_t i16[8]; + } im; +#else +#ifdef HAVE_NEON + union { + int16x8x2_t m128; + int16_t i16[16]; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} simd_c16_t; + +/* Fixed point precision (16-bit) functions */ +static inline simd_c16_t srslte_simd_c16i_load(c16_t *ptr) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX512 + __m512i in1 = _mm512_load_si512((__m512i*)(ptr)); + __m512i in2 = _mm512_load_si512((__m512i*)(ptr + 8)); + ret.re.m512 = _mm512_mask_blend_epi16(0xAAAAAAAA, in1,_mm512_shufflelo_epi16(_mm512_shufflehi_epi16(in2, 0b10100000), 0b10100000)); + ret.im.m512 = _mm512_mask_blend_epi16(0xAAAAAAAA, _mm512_shufflelo_epi16(_mm512_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_AVX2 + __m256i in1 = _mm256_load_si256((__m256i*)(ptr)); + __m256i in2 = _mm256_load_si256((__m256i*)(ptr + 8)); + ret.re.m256 = _mm256_blend_epi16(in1,_mm256_shufflelo_epi16(_mm256_shufflehi_epi16(in2, 0b10100000), 0b10100000), 0b10101010); + ret.im.m256 = _mm256_blend_epi16(_mm256_shufflelo_epi16(_mm256_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2, 0b10101010); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + __m128i in1 = _mm_load_si128((__m128i*)(ptr)); + __m128i in2 = _mm_load_si128((__m128i*)(ptr + 8)); + ret.re.m128 = _mm_blend_epi16(in1,_mm_shufflelo_epi16(_mm_shufflehi_epi16(in2, 0b10100000), 0b10100000), 0b10101010); + ret.im.m128 = _mm_blend_epi16(_mm_shufflelo_epi16(_mm_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2, 0b10101010); +#else /* LV_HAVE_SSE*/ +#ifdef HAVE_NEON + ret.m128 = vld2q_s16((int16_t*)(ptr)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_load(int16_t *re, int16_t *im) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_load_si256((__m256i*)(re)); + ret.im.m256 = _mm256_load_si256((__m256i*)(im)); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_load_si128((__m128i*)(re)); + ret.im.m128 = _mm_load_si128((__m128i*)(im)); +#else /* LV_HAVE_SSE*/ +#ifdef HAVE_NEON + ret.m128.val[0] = vld1q_s16((int16_t*)(re)); + ret.m128.val[1] = vld1q_s16((int16_t*)(im)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_loadu(int16_t *re, int16_t *im) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_loadu_si256((__m256i*)(re)); + ret.im.m256 = _mm256_loadu_si256((__m256i*)(im)); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_loadu_si128((__m128i*)(re)); + ret.im.m128 = _mm_loadu_si128((__m128i*)(im)); +#else /* LV_HAVE_SSE*/ +#ifdef HAVE_NEON + ret.m128.val[0] = vld1q_s16((int16_t*)(re)); + ret.m128.val[1] = vld1q_s16((int16_t*)(im)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline void srslte_simd_c16i_store(c16_t *ptr, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + __m256i re_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.re.m256, 0b10110001), 0b10110001); + __m256i im_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.im.m256, 0b10110001), 0b10110001); + _mm256_store_si256((__m256i *) (ptr), _mm256_blend_epi16(simdreg.re.m256, im_sw, 0b10101010)); + _mm256_store_si256((__m256i *) (ptr + 8), _mm256_blend_epi16(re_sw, simdreg.im.m256, 0b10101010)); +#else +#ifdef LV_HAVE_SSE + __m128i re_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.re.m128, 0b10110001), 0b10110001); + __m128i im_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.im.m128, 0b10110001), 0b10110001); + _mm_store_si128((__m128i *) (ptr), _mm_blend_epi16(simdreg.re.m128, im_sw, 0b10101010)); + _mm_store_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_s16((int16_t*)(ptr) ,simdreg.m128); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline void srslte_simd_c16i_storeu(c16_t *ptr, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + __m256i re_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.re.m256, 0b10110001), 0b10110001); + __m256i im_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.im.m256, 0b10110001), 0b10110001); + _mm256_storeu_si256((__m256i *) (ptr), _mm256_blend_epi16(simdreg.re.m256, im_sw, 0b10101010)); + _mm256_storeu_si256((__m256i *) (ptr + 8), _mm256_blend_epi16(re_sw, simdreg.im.m256, 0b10101010)); +#else +#ifdef LV_HAVE_SSE + __m128i re_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.re.m128, 0b10110001), 0b10110001); + __m128i im_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.im.m128, 0b10110001), 0b10110001); + _mm_storeu_si128((__m128i *) (ptr), _mm_blend_epi16(simdreg.re.m128, im_sw, 0b10101010)); + _mm_storeu_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_s16((int16_t*)(ptr) ,simdreg.m128); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline void srslte_simd_c16_store(int16_t *re, int16_t *im, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + _mm256_store_si256((__m256i *) re, simdreg.re.m256); + _mm256_store_si256((__m256i *) im, simdreg.im.m256); +#else +#ifdef LV_HAVE_SSE + _mm_store_si128((__m128i *) re, simdreg.re.m128); + _mm_store_si128((__m128i *) im, simdreg.im.m128); +#else +#ifdef HAVE_NEON + vst1q_s16((int16_t *) re, simdreg.m128.val[0]); + vst1q_s16((int16_t *) im, simdreg.m128.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline void srslte_simd_c16_storeu(int16_t *re, int16_t *im, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + _mm256_storeu_si256((__m256i *) re, simdreg.re.m256); + _mm256_storeu_si256((__m256i *) im, simdreg.im.m256); +#else +#ifdef LV_HAVE_SSE + _mm_storeu_si128((__m128i *) re, simdreg.re.m128); + _mm_storeu_si128((__m128i *) im, simdreg.im.m128); +#else +#ifdef HAVE_NEON + vst1q_s16((int16_t *) re, simdreg.m128.val[0]); + vst1q_s16((int16_t *) im, simdreg.m128.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline simd_c16_t srslte_simd_c16_prod (simd_c16_t a, simd_c16_t b) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_sub_epi16(_mm256_mulhrs_epi16(a.re.m256, _mm256_slli_epi16(b.re.m256, 1)), + _mm256_mulhrs_epi16(a.im.m256, _mm256_slli_epi16(b.im.m256, 1))); + ret.im.m256 = _mm256_add_epi16(_mm256_mulhrs_epi16(a.re.m256, _mm256_slli_epi16(b.im.m256, 1)), + _mm256_mulhrs_epi16(a.im.m256, _mm256_slli_epi16(b.re.m256, 1))); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_sub_epi16(_mm_mulhrs_epi16(a.re.m128, _mm_slli_epi16(b.re.m128, 1)), + _mm_mulhrs_epi16(a.im.m128, _mm_slli_epi16(b.im.m128, 1))); + ret.im.m128 = _mm_add_epi16(_mm_mulhrs_epi16(a.re.m128, _mm_slli_epi16(b.im.m128, 1)), + _mm_mulhrs_epi16(a.im.m128, _mm_slli_epi16(b.re.m128, 1))); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_add (simd_c16_t a, simd_c16_t b) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_add_epi16(a.re.m256, b.re.m256); + ret.im.m256 = _mm256_add_epi16(a.im.m256, b.im.m256); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_add_epi16(a.re.m128, b.re.m128); + ret.im.m128 = _mm_add_epi16(a.im.m128, b.im.m128); +#else +#ifdef HAVE_NEON + ret.m128.val[0] = vaddq_s16(a.m128.val[0],a.m128.val[0]); + ret.m128.val[1] = vaddq_s16(a.m128.val[1],a.m128.val[1]); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_zero (void) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_setzero_si256(); + ret.im.m256 = _mm256_setzero_si256(); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_setzero_si128(); + ret.im.m128 = _mm_setzero_si128(); +#else +#ifdef HAVE_NEON + ret.m128.val[0] = vdupq_n_s16(0); + ret.m128.val[1] = vdupq_n_s16(0); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +#endif /* SRSLTE_SIMD_C16_SIZE */ + +#if SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE + +static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + __m512 aa = _mm512_permutex2var_ps(a, _mm512_setr_epi32(0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, + 0x18, 0x19, 0x1A, 0x1B), b); + __m512 bb = _mm512_permutex2var_ps(a, _mm512_setr_epi32(0x04, 0x05, 0x06, 0x07, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, + 0x1C, 0x1D, 0x1E, 0x1F), b); + __m512i ai = _mm512_cvttps_epi32(aa); + __m512i bi = _mm512_cvttps_epi32(bb); + return _mm512_packs_epi32(ai, bi); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 aa = _mm256_permute2f128_ps(a, b, 0x20); + __m256 bb = _mm256_permute2f128_ps(a, b, 0x31); + __m256i ai = _mm256_cvttps_epi32(aa); + __m256i bi = _mm256_cvttps_epi32(bb); + return _mm256_packs_epi32(ai, bi); +#else +#ifdef LV_HAVE_SSE + __m128i ai = _mm_cvttps_epi32(a); + __m128i bi = _mm_cvttps_epi32(b); + return _mm_packs_epi32(ai, bi); + #else +#ifdef HAVE_NEON + int32x4_t ai = vcvtq_s32_f32(a); + int32x4_t bi = vcvtq_s32_f32(b); + return (simd_s_t)vcombine_s16(vqmovn_s32(ai), vqmovn_s32(bi)); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_C16_SIZE */ + #endif //SRSLTE_SIMD_H_H diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index 4a55d18b6..4a5daefb3 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -54,7 +54,6 @@ extern "C" { #define SRSLTE_VEC_EMA(data, average, alpha) ((alpha)*(data)+(1-alpha)*(average)) /** Return the sum of all the elements */ -SRSLTE_API int srslte_vec_acc_ii(int *x, uint32_t len); SRSLTE_API float srslte_vec_acc_ff(float *x, uint32_t len); SRSLTE_API cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len); @@ -77,52 +76,29 @@ SRSLTE_API void srslte_vec_save_file(char *filename, void *buffer, uint32_t len) SRSLTE_API void srslte_vec_load_file(char *filename, void *buffer, uint32_t len); /* sum two vectors */ -SRSLTE_API void srslte_vec_sum_ch(uint8_t *x, uint8_t *y, char *z, uint32_t len); SRSLTE_API void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_sum_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); -SRSLTE_API void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); +SRSLTE_API void srslte_vec_sum_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); /* substract two vectors z=x-y */ SRSLTE_API void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); -/* EMA filter: output=coeff*new_data + (1-coeff)*average */ -SRSLTE_API void srslte_vec_ema_filter(cf_t *new_data, cf_t *average, cf_t *output, float coeff, uint32_t len); - -/* Square distance */ -SRSLTE_API void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t npoints); - -/* scalar addition */ -SRSLTE_API void srslte_vec_sc_add_fff(float *x, float h, float *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_add_cfc(cf_t *x, float h, cf_t *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_add_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_add_sss(int16_t *x, int16_t h, int16_t *z, uint32_t len); - /* scalar product */ SRSLTE_API void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_div2_sss(short *x, int pow2_div, short *z, uint32_t len); -/* Normalization */ -SRSLTE_API void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len); SRSLTE_API void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len); SRSLTE_API void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len); -SRSLTE_API void srslte_vec_convert_ci(int8_t *x, int16_t *z, uint32_t len); - -SRSLTE_API void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len); -SRSLTE_API void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len); -SRSLTE_API void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len); -SRSLTE_API void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len); - -SRSLTE_API void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len); +SRSLTE_API void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len); /* vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_split(float *x_re, float *x_im, float *y_re, float *y_im, float *z_re, float *z_im, uint32_t len); /* vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_cfc(cf_t *x, float *y, cf_t *z, uint32_t len); @@ -132,7 +108,7 @@ SRSLTE_API void srslte_vec_prod_conj_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len /* real vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len); -SRSLTE_API void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); /* Dot-product */ SRSLTE_API cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len); @@ -142,8 +118,8 @@ SRSLTE_API float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len); SRSLTE_API int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len); /* z=x/y vector division (element-wise) */ -SRSLTE_API void srslte_vec_div_ccc(cf_t *x, cf_t *y, float *y_mod, cf_t *z, float *z_real, float *z_imag, uint32_t len); -void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, float *z_real, float *z_imag, uint32_t len); +SRSLTE_API void srslte_vec_div_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_div_fff(float *x, float *y, float *z, uint32_t len); /* conjugate */ @@ -158,11 +134,6 @@ SRSLTE_API float srslte_vec_corr_ccc(cf_t *x, cf_t *y, uint32_t len); /* return the index of the maximum value in the vector */ SRSLTE_API uint32_t srslte_vec_max_fi(float *x, uint32_t len); SRSLTE_API uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len); -SRSLTE_API int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len); -SRSLTE_API int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len); - -/* maximum between two vectors */ -SRSLTE_API void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len); /* quantify vector of floats or int16 and convert to uint8_t */ SRSLTE_API void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len); @@ -172,9 +143,6 @@ SRSLTE_API void srslte_vec_quant_suc(int16_t *in, uint8_t *out, float gain, int1 SRSLTE_API void srslte_vec_abs_cf(cf_t *x, float *abs, uint32_t len); SRSLTE_API void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t len); -/* argument of each vector element */ -SRSLTE_API void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len); - /* Copy 256 bit aligned vector */ SRSLTE_API void srs_vec_cf_cpy(cf_t *src, cf_t *dst, int len); diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 1894a0803..294cff50f 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -35,32 +35,94 @@ extern "C" { #include #include "srslte/config.h" -SRSLTE_API int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len); +#ifdef LV_HAVE_AVX512 +#define SRSLTE_SIMD_BIT_ALIGN 512 +#define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x3F) == 0) +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX +#define SRSLTE_SIMD_BIT_ALIGN 256 +#define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x1F) == 0) +#else /* LV_HAVE_AVX */ +#ifdef LV_HAVE_SSE +#define SRSLTE_SIMD_BIT_ALIGN 128 +#define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x0F) == 0) +#else /* LV_HAVE_SSE */ +#define SRSLTE_SIMD_BIT_ALIGN 64 +#define SRSLTE_IS_ALIGNED(PTR) (1) +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX */ +#endif /* LV_HAVE_AVX512 */ -SRSLTE_API int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len); +/* SIMD Basic vector math */ +SRSLTE_API void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); -SRSLTE_API void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); -SRSLTE_API void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len); +SRSLTE_API float srslte_vec_acc_ff_simd(float *x, int len); -SRSLTE_API void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API cf_t srslte_vec_acc_cc_simd(cf_t *x, int len); -SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_add_fff_simd(float *x, float *y, float *z, int len); -SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_fff_simd(float *x, float *y, float *z, int len); -SRSLTE_API void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len); +/* SIMD Vector Scalar Product */ +SRSLTE_API void srslte_vec_sc_prod_cfc_simd(const cf_t *x,const float h,cf_t *y,const int len); +SRSLTE_API void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, int len); -SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, int len); -SRSLTE_API void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len); +/* SIMD Vector Product */ +SRSLTE_API void srslte_vec_prod_ccc_split_simd(float *a_re, float *a_im, float *b_re, float *b_im, float *r_re, float *r_im, int len); -SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_c16_simd(int16_t *a_re, int16_t *a_im, int16_t *b_re, int16_t *b_im, int16_t *r_re, + int16_t *r_im, int len); -SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len); +SRSLTE_API void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); + +SRSLTE_API void srslte_vec_prod_cfc_simd(cf_t *x, float *y, cf_t *z, int len); + +SRSLTE_API void srslte_vec_prod_fff_simd(float *x, float *y, float *z, int len); + +SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); + +SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); + +/* SIMD Division */ +SRSLTE_API void srslte_vec_div_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); + +SRSLTE_API void srslte_vec_div_cfc_simd(cf_t *x, float *y, cf_t *z, int len); + +SRSLTE_API void srslte_vec_div_fff_simd(float *x, float *y, float *z, int len); + +/* SIMD Dot product */ +SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, int len); + +SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, int len); + +SRSLTE_API c16_t srslte_vec_dot_prod_ccc_c16i_simd(c16_t *x, c16_t *y, int len); + +SRSLTE_API int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len); + +/* SIMD Modulus functions */ +SRSLTE_API void srslte_vec_abs_cf_simd(cf_t *x, float *z, int len); + +SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, int len); + +/* Other Functions */ +SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, int len); + +SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, int len); + +SRSLTE_API void srslte_vec_cp_simd(cf_t *src, cf_t *dst, int len); + + +/* SIMD Find Max functions */ +SRSLTE_API uint32_t srslte_vec_max_fi_simd(float *x, int len); + +SRSLTE_API uint32_t srslte_vec_max_ci_simd(cf_t *x, int len); -SRSLTE_API void srslte_vec_sc_prod_cfc_avx(const cf_t *x,const float h,cf_t *y,const uint32_t len); #ifdef __cplusplus } #endif diff --git a/lib/src/asn1/liblte_rrc.cc b/lib/src/asn1/liblte_rrc.cc index e4621994e..c53399384 100644 --- a/lib/src/asn1/liblte_rrc.cc +++ b/lib/src/asn1/liblte_rrc.cc @@ -4764,6 +4764,9 @@ LIBLTE_ERROR_ENUM liblte_rrc_pack_plmn_identity_ie(LIBLTE_RRC_PLMN_IDENTITY_STRU if(plmn_id != NULL && ie_ptr != NULL) { + if(0xFFFF == plmn_id->mcc) { + mcc_opt = false; + } liblte_value_2_bits(mcc_opt, ie_ptr, 1); if(true == mcc_opt) @@ -12754,7 +12757,241 @@ LIBLTE_ERROR_ENUM liblte_rrc_unpack_paging_msg(LIBLTE_BIT_MSG_STRUCT *msg, Document Reference: 36.331 v10.0.0 Section 6.2.2 *********************************************************************/ -// FIXME +LIBLTE_ERROR_ENUM liblte_rrc_pack_cgi_info_ie(LIBLTE_RRC_CGI_INFO_STRUCT *cgi_info, + uint8 **ie_ptr) +{ + LIBLTE_ERROR_ENUM err = LIBLTE_ERROR_INVALID_INPUTS; + + if(cgi_info != NULL && + ie_ptr != NULL) + { + liblte_value_2_bits(cgi_info->have_plmn_identity_list, ie_ptr, 1); + liblte_rrc_pack_cell_global_id_eutra_ie(&cgi_info->cell_global_id, ie_ptr); + liblte_rrc_pack_tracking_area_code_ie(cgi_info->tracking_area_code, ie_ptr); + if(cgi_info->have_plmn_identity_list) { + liblte_value_2_bits(cgi_info->n_plmn_identity_list-1, ie_ptr, 3); + for(uint32 i=0; in_plmn_identity_list; i++) { + liblte_rrc_pack_plmn_identity_ie(&cgi_info->plmn_identity_list[i], ie_ptr); + } + } + + err = LIBLTE_SUCCESS; + } + + return(err); +} +LIBLTE_ERROR_ENUM liblte_rrc_unpack_cgi_info_ie(uint8 **ie_ptr, + LIBLTE_RRC_CGI_INFO_STRUCT *cgi_info) +{ + LIBLTE_ERROR_ENUM err = LIBLTE_ERROR_INVALID_INPUTS; + + if(ie_ptr != NULL && + cgi_info != NULL) + { + cgi_info->have_plmn_identity_list = (bool)liblte_bits_2_value(ie_ptr, 1); + liblte_rrc_unpack_cell_global_id_eutra_ie(ie_ptr, &cgi_info->cell_global_id); + liblte_rrc_unpack_tracking_area_code_ie(ie_ptr, &cgi_info->tracking_area_code); + if(cgi_info->have_plmn_identity_list) { + cgi_info->n_plmn_identity_list = liblte_bits_2_value(ie_ptr, 3) + 1; + for(uint32 i=0; in_plmn_identity_list; i++) { + liblte_rrc_unpack_plmn_identity_ie(ie_ptr, &cgi_info->plmn_identity_list[i]); + } + } + err = LIBLTE_SUCCESS; + } + + return(err); +} + +LIBLTE_ERROR_ENUM liblte_rrc_pack_meas_result_ie(LIBLTE_RRC_MEAS_RESULT_STRUCT *meas_result, + uint8 **ie_ptr) +{ + LIBLTE_ERROR_ENUM err = LIBLTE_ERROR_INVALID_INPUTS; + + if(meas_result != NULL && + ie_ptr != NULL) + { + //ext + liblte_value_2_bits(0, ie_ptr, 1); + + //options + liblte_value_2_bits(meas_result->have_rsrp, ie_ptr, 1); + liblte_value_2_bits(meas_result->have_rsrq, ie_ptr, 1); + + if(meas_result->have_rsrp) { + liblte_rrc_pack_rsrp_range_ie(meas_result->rsrp_result, ie_ptr); + } + if(meas_result->have_rsrq) { + liblte_rrc_pack_rsrq_range_ie(meas_result->rsrq_result, ie_ptr); + } + + err = LIBLTE_SUCCESS; + } + + return(err); +} +LIBLTE_ERROR_ENUM liblte_rrc_unpack_meas_result_ie(uint8 **ie_ptr, + LIBLTE_RRC_MEAS_RESULT_STRUCT *meas_result) +{ + LIBLTE_ERROR_ENUM err = LIBLTE_ERROR_INVALID_INPUTS; + + if(ie_ptr != NULL && + meas_result != NULL) + { + //ext + bool ext = liblte_bits_2_value(ie_ptr, 1); + + //options + meas_result->have_rsrp = liblte_bits_2_value(ie_ptr, 1); + meas_result->have_rsrq = liblte_bits_2_value(ie_ptr, 1); + + if(meas_result->have_rsrp) { + liblte_rrc_unpack_rsrp_range_ie(ie_ptr, &meas_result->rsrp_result); + } + if(meas_result->have_rsrq) { + liblte_rrc_unpack_rsrq_range_ie(ie_ptr, &meas_result->rsrq_result); + } + + //skip extensions + liblte_rrc_consume_noncrit_extension(ext, __func__, ie_ptr); + + err = LIBLTE_SUCCESS; + } + + return(err); +} + +LIBLTE_ERROR_ENUM liblte_rrc_pack_meas_result_eutra_ie(LIBLTE_RRC_MEAS_RESULT_EUTRA_STRUCT *meas_result_eutra, + uint8 **ie_ptr) +{ + LIBLTE_ERROR_ENUM err = LIBLTE_ERROR_INVALID_INPUTS; + + if(meas_result_eutra != NULL && + ie_ptr != NULL) + { + liblte_value_2_bits(meas_result_eutra->have_cgi_info, ie_ptr, 1); + liblte_rrc_pack_phys_cell_id_ie(meas_result_eutra->phys_cell_id, ie_ptr); + if(meas_result_eutra->have_cgi_info) { + liblte_rrc_pack_cgi_info_ie(&meas_result_eutra->cgi_info, ie_ptr); + } + liblte_rrc_pack_meas_result_ie(&meas_result_eutra->meas_result, ie_ptr); + + err = LIBLTE_SUCCESS; + } + + return(err); +} +LIBLTE_ERROR_ENUM liblte_rrc_unpack_meas_result_eutra_ie(uint8 **ie_ptr, + LIBLTE_RRC_MEAS_RESULT_EUTRA_STRUCT *meas_result_eutra) +{ + LIBLTE_ERROR_ENUM err = LIBLTE_ERROR_INVALID_INPUTS; + + if(ie_ptr != NULL && + meas_result_eutra != NULL) + { + meas_result_eutra->have_cgi_info = liblte_bits_2_value(ie_ptr, 1); + liblte_rrc_unpack_phys_cell_id_ie(ie_ptr, &meas_result_eutra->phys_cell_id); + if(meas_result_eutra->have_cgi_info) { + liblte_rrc_unpack_cgi_info_ie(ie_ptr, &meas_result_eutra->cgi_info); + } + liblte_rrc_unpack_meas_result_ie(ie_ptr, &meas_result_eutra->meas_result); + + err = LIBLTE_SUCCESS; + } + + return(err); +} + +LIBLTE_ERROR_ENUM liblte_rrc_pack_measurement_report_msg(LIBLTE_RRC_MEASUREMENT_REPORT_STRUCT *meas_report, + LIBLTE_BIT_MSG_STRUCT *msg) +{ + LIBLTE_ERROR_ENUM err = LIBLTE_ERROR_INVALID_INPUTS; + uint8 *msg_ptr = msg->msg; + + if(meas_report != NULL && + msg != NULL) + { + //MeasurementReport + liblte_value_2_bits(0, &msg_ptr, 1); //critical extensions + liblte_value_2_bits(0, &msg_ptr, 3); //c1 + + //MeasurementReport-r8-IEs + liblte_value_2_bits(0, &msg_ptr, 1); //non-critical extensions + + //MeasResults + liblte_value_2_bits(0, &msg_ptr, 1); //ext + liblte_value_2_bits(meas_report->have_meas_result_neigh_cells, &msg_ptr, 1); + liblte_rrc_pack_meas_id_ie(meas_report->meas_id, &msg_ptr); + liblte_rrc_pack_rsrp_range_ie(meas_report->pcell_rsrp_result, &msg_ptr); + liblte_rrc_pack_rsrq_range_ie(meas_report->pcell_rsrq_result, &msg_ptr); + if(meas_report->have_meas_result_neigh_cells) { + liblte_value_2_bits(0, &msg_ptr, 1); //choice from before extension marker + liblte_value_2_bits(meas_report->meas_result_neigh_cells_choice, &msg_ptr, 2); + if(meas_report->meas_result_neigh_cells_choice != LIBLTE_RRC_MEAS_RESULT_LIST_EUTRA) { + printf("NOT HANDLING %s\n", liblte_rrc_meas_reult_neigh_cells_text[meas_report->meas_result_neigh_cells_choice]); + } else { + //MeasResultListEUTRA + liblte_value_2_bits(meas_report->meas_result_neigh_cells.eutra.n_result-1, &msg_ptr, 3); + for(uint32 i=0; imeas_result_neigh_cells.eutra.n_result; i++) { + liblte_rrc_pack_meas_result_eutra_ie(&meas_report->meas_result_neigh_cells.eutra.result_eutra_list[i], &msg_ptr); + } + } + } + + // Fill in the number of bits used + msg->N_bits = msg_ptr - msg->msg; + + err = LIBLTE_SUCCESS; + } + + return(err); +} +LIBLTE_ERROR_ENUM liblte_rrc_unpack_measurement_report_msg(LIBLTE_BIT_MSG_STRUCT *msg, + LIBLTE_RRC_MEASUREMENT_REPORT_STRUCT *meas_report) +{ + LIBLTE_ERROR_ENUM err = LIBLTE_ERROR_INVALID_INPUTS; + uint8 *msg_ptr = msg->msg; + + if(msg != NULL && + meas_report != NULL) + { + //MeasurementReport + bool crit_ext = liblte_bits_2_value(&msg_ptr, 1); //critical extensions + liblte_bits_2_value(&msg_ptr, 3); //c1 + + //MeasurementReport-r8-IEs + bool non_crit_ext = liblte_bits_2_value(&msg_ptr, 1); //non-critical extensions + + //MeasResults + bool ext = liblte_bits_2_value(&msg_ptr, 1); + meas_report->have_meas_result_neigh_cells = liblte_bits_2_value(&msg_ptr, 1); + liblte_rrc_unpack_meas_id_ie(&msg_ptr, &meas_report->meas_id); + liblte_rrc_unpack_rsrp_range_ie(&msg_ptr, &meas_report->pcell_rsrp_result); + liblte_rrc_unpack_rsrq_range_ie(&msg_ptr, &meas_report->pcell_rsrq_result); + if(meas_report->have_meas_result_neigh_cells) { + liblte_bits_2_value(&msg_ptr, 1); //choice from before extension marker + meas_report->meas_result_neigh_cells_choice = (LIBLTE_RRC_MEAS_RESULT_NEIGH_CELLS_ENUM) liblte_bits_2_value(&msg_ptr, 2); + if(meas_report->meas_result_neigh_cells_choice != LIBLTE_RRC_MEAS_RESULT_LIST_EUTRA) { + printf("NOT HANDLING %s\n", liblte_rrc_meas_reult_neigh_cells_text[meas_report->meas_result_neigh_cells_choice]); + } else { + //MeasResultListEUTRA + meas_report->meas_result_neigh_cells.eutra.n_result = liblte_bits_2_value(&msg_ptr, 3) + 1; + for(uint32 i=0; imeas_result_neigh_cells.eutra.n_result; i++) { + liblte_rrc_unpack_meas_result_eutra_ie(&msg_ptr, &meas_report->meas_result_neigh_cells.eutra.result_eutra_list[i]); + } + } + } + + //skip extensions + liblte_rrc_consume_noncrit_extension(crit_ext, __func__, &msg_ptr); + liblte_rrc_consume_noncrit_extension(non_crit_ext, __func__, &msg_ptr); + liblte_rrc_consume_noncrit_extension(ext, __func__, &msg_ptr); + + err = LIBLTE_SUCCESS; + } + + return(err); +} /********************************************************************* Message Name: MBSFN Area Configuration @@ -13550,9 +13787,8 @@ LIBLTE_ERROR_ENUM liblte_rrc_pack_ul_dcch_msg(LIBLTE_RRC_UL_DCCH_MSG_STRUCT *ul_ err = liblte_rrc_pack_csfb_parameters_request_cdma2000_msg((LIBLTE_RRC_CSFB_PARAMETERS_REQUEST_CDMA2000_STRUCT *)&ul_dcch_msg->msg, &global_msg); }else if(LIBLTE_RRC_UL_DCCH_MSG_TYPE_MEASUREMENT_REPORT == ul_dcch_msg->msg_type){ - printf("NOT HANDLING MEASUREMENT REPORT\n"); -// err = liblte_rrc_pack_measurement_report_msg((LIBLTE_RRC_MEASUREMENT_REPORT_STRUCT *)&ul_dcch_msg->msg, -// &global_msg); + err = liblte_rrc_pack_measurement_report_msg((LIBLTE_RRC_MEASUREMENT_REPORT_STRUCT *)&ul_dcch_msg->msg, + &global_msg); }else if(LIBLTE_RRC_UL_DCCH_MSG_TYPE_RRC_CON_RECONFIG_COMPLETE == ul_dcch_msg->msg_type){ err = liblte_rrc_pack_rrc_connection_reconfiguration_complete_msg((LIBLTE_RRC_CONNECTION_RECONFIGURATION_COMPLETE_STRUCT *)&ul_dcch_msg->msg, &global_msg); @@ -13630,9 +13866,8 @@ LIBLTE_ERROR_ENUM liblte_rrc_unpack_ul_dcch_msg(LIBLTE_BIT_MSG_STRUCT *m err = liblte_rrc_unpack_csfb_parameters_request_cdma2000_msg(&global_msg, (LIBLTE_RRC_CSFB_PARAMETERS_REQUEST_CDMA2000_STRUCT *)&ul_dcch_msg->msg); }else if(LIBLTE_RRC_UL_DCCH_MSG_TYPE_MEASUREMENT_REPORT == ul_dcch_msg->msg_type){ - printf("NOT HANDLING MEASUREMENT REPORT\n"); -// err = liblte_rrc_unpack_measurement_report_msg(&global_msg, -// (LIBLTE_RRC_MEASUREMENT_REPORT_STRUCT *)&ul_dcch_msg->msg); + err = liblte_rrc_unpack_measurement_report_msg(&global_msg, + (LIBLTE_RRC_MEASUREMENT_REPORT_STRUCT *)&ul_dcch_msg->msg); }else if(LIBLTE_RRC_UL_DCCH_MSG_TYPE_RRC_CON_RECONFIG_COMPLETE == ul_dcch_msg->msg_type){ err = liblte_rrc_unpack_rrc_connection_reconfiguration_complete_msg(&global_msg, (LIBLTE_RRC_CONNECTION_RECONFIGURATION_COMPLETE_STRUCT *)&ul_dcch_msg->msg); diff --git a/lib/src/common/logger_file.cc b/lib/src/common/logger_file.cc index 94e5e8405..739eded8b 100644 --- a/lib/src/common/logger_file.cc +++ b/lib/src/common/logger_file.cc @@ -55,7 +55,7 @@ void logger_file::init(std::string file) { filename = file; logfile = fopen(filename.c_str(), "w"); if(logfile==NULL) { - printf("Error: could not create log file, no messages will be logged"); + printf("Error: could not create log file, no messages will be logged!\n"); } start(); inited = true; diff --git a/lib/src/phy/ch_estimation/refsignal_dl.c b/lib/src/phy/ch_estimation/refsignal_dl.c index 9adbc6c18..308be6877 100644 --- a/lib/src/phy/ch_estimation/refsignal_dl.c +++ b/lib/src/phy/ch_estimation/refsignal_dl.c @@ -86,11 +86,7 @@ uint32_t srslte_refsignal_cs_nof_symbols(uint32_t port_id) uint32_t srslte_refsignal_mbsfn_nof_symbols() { - if(false){ - return 3; - }else{ - return 3; - } + return 3; } diff --git a/lib/src/phy/common/phy_common.c b/lib/src/phy/common/phy_common.c index b502fbd75..b4f5e122b 100644 --- a/lib/src/phy/common/phy_common.c +++ b/lib/src/phy/common/phy_common.c @@ -605,7 +605,7 @@ int srslte_band_get_fd_region(enum band_geographical_area region, srslte_earfcn_ /* Returns the interval tti1-tti2 mod 10240 */ uint32_t srslte_tti_interval(uint32_t tti1, uint32_t tti2) { - if (tti1 > tti2) { + if (tti1 >= tti2) { return tti1-tti2; } else { return 10240-tti2+tti1; diff --git a/lib/src/phy/dft/dft_fftw.c b/lib/src/phy/dft/dft_fftw.c index b4a627742..9d6898117 100644 --- a/lib/src/phy/dft/dft_fftw.c +++ b/lib/src/phy/dft/dft_fftw.c @@ -56,9 +56,7 @@ void srslte_dft_load() { void srslte_dft_exit() { #ifdef FFTW_WISDOM_FILE - if (!fftwf_export_wisdom_to_filename(FFTW_WISDOM_FILE)) { - fprintf(stderr, "Error saving FFTW wisdom to file %s\n", FFTW_WISDOM_FILE); - } + fftwf_export_wisdom_to_filename(FFTW_WISDOM_FILE); #endif } @@ -93,6 +91,27 @@ static void allocate(srslte_dft_plan_t *plan, int size_in, int size_out, int len plan->out = fftwf_malloc(size_out*len); } +int srslte_dft_replan_guru_c(srslte_dft_plan_t *plan, const int new_dft_points, cf_t *in_buffer, + cf_t *out_buffer, int istride, int ostride, int how_many, + int idist, int odist) { + int sign = (plan->forward) ? FFTW_FORWARD : FFTW_BACKWARD; + + const fftwf_iodim iodim = {new_dft_points, istride, ostride}; + const fftwf_iodim howmany_dims = {how_many, idist, odist}; + + /* Destroy current plan */ + fftwf_destroy_plan(plan->p); + + plan->p = fftwf_plan_guru_dft(1, &iodim, 1, &howmany_dims, in_buffer, out_buffer, sign, FFTW_TYPE); + if (!plan->p) { + return -1; + } + plan->size = new_dft_points; + plan->init_size = plan->size; + + return 0; +} + int srslte_dft_replan_c(srslte_dft_plan_t *plan, const int new_dft_points) { int sign = (plan->dir == SRSLTE_DFT_FORWARD) ? FFTW_FORWARD : FFTW_BACKWARD; if (plan->p) { @@ -107,6 +126,32 @@ int srslte_dft_replan_c(srslte_dft_plan_t *plan, const int new_dft_points) { return 0; } +int srslte_dft_plan_guru_c(srslte_dft_plan_t *plan, const int dft_points, srslte_dft_dir_t dir, cf_t *in_buffer, + cf_t *out_buffer, int istride, int ostride, int how_many, + int idist, int odist) { + int sign = (dir == SRSLTE_DFT_FORWARD) ? FFTW_FORWARD : FFTW_BACKWARD; + + const fftwf_iodim iodim = {dft_points, istride, ostride}; + const fftwf_iodim howmany_dims = {how_many, idist, odist}; + + plan->p = fftwf_plan_guru_dft(1, &iodim, 1, &howmany_dims, in_buffer, out_buffer, sign, FFTW_TYPE); + if (!plan->p) { + return -1; + } + plan->size = dft_points; + plan->init_size = plan->size; + plan->mode = SRSLTE_DFT_COMPLEX; + plan->dir = dir; + plan->forward = (dir==SRSLTE_DFT_FORWARD)?true:false; + plan->mirror = false; + plan->db = false; + plan->norm = false; + plan->dc = false; + plan->is_guru = true; + + return 0; +} + int srslte_dft_plan_c(srslte_dft_plan_t *plan, const int dft_points, srslte_dft_dir_t dir) { allocate(plan,sizeof(fftwf_complex),sizeof(fftwf_complex), dft_points); int sign = (dir == SRSLTE_DFT_FORWARD) ? FFTW_FORWARD : FFTW_BACKWARD; @@ -123,6 +168,7 @@ int srslte_dft_plan_c(srslte_dft_plan_t *plan, const int dft_points, srslte_dft_ plan->db = false; plan->norm = false; plan->dc = false; + plan->is_guru = false; return 0; } @@ -232,6 +278,14 @@ void srslte_dft_run_c(srslte_dft_plan_t *plan, cf_t *in, cf_t *out) { plan->forward, plan->mirror, plan->dc); } +void srslte_dft_run_guru_c(srslte_dft_plan_t *plan) { + if (plan->is_guru == true) { + fftwf_execute(plan->p); + } else { + fprintf(stderr, "srslte_dft_run_guru_c: the selected plan is not guru!\n"); + } +} + void srslte_dft_run_r(srslte_dft_plan_t *plan, float *in, float *out) { float norm; int i; @@ -255,8 +309,10 @@ void srslte_dft_run_r(srslte_dft_plan_t *plan, float *in, float *out) { void srslte_dft_plan_free(srslte_dft_plan_t *plan) { if (!plan) return; if (!plan->size) return; - if (plan->in) fftwf_free(plan->in); - if (plan->out) fftwf_free(plan->out); + if (!plan->is_guru) { + if (plan->in) fftwf_free(plan->in); + if (plan->out) fftwf_free(plan->out); + } if (plan->p) fftwf_destroy_plan(plan->p); bzero(plan, sizeof(srslte_dft_plan_t)); } diff --git a/lib/src/phy/dft/ofdm.c b/lib/src/phy/dft/ofdm.c index db5939274..8ea690bb5 100644 --- a/lib/src/phy/dft/ofdm.c +++ b/lib/src/phy/dft/ofdm.c @@ -37,23 +37,79 @@ #include "srslte/phy/utils/debug.h" #include "srslte/phy/utils/vector.h" +/* Uncomment next line for avoiding Guru DFT call */ +//#define AVOID_GURU -int srslte_ofdm_init_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof_prb, srslte_dft_dir_t dir) { - return srslte_ofdm_init_mbsfn_(q, cp, symbol_sz, nof_prb, dir, SRSLTE_SF_NORM); +int srslte_ofdm_init_(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, int symbol_sz, int nof_prb, srslte_dft_dir_t dir) { + return srslte_ofdm_init_mbsfn_(q, cp, in_buffer, out_buffer, symbol_sz, nof_prb, dir, SRSLTE_SF_NORM); } +int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, int symbol_sz, int nof_prb, srslte_dft_dir_t dir, srslte_sf_t sf_type) { -int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof_prb, srslte_dft_dir_t dir, srslte_sf_t sf_type) { + /* Set OFDM object attributes */ + q->symbol_sz = (uint32_t) symbol_sz; + q->nof_symbols = SRSLTE_CP_NSYMB(cp); + q->nof_symbols_mbsfn = SRSLTE_CP_NSYMB(SRSLTE_CP_EXT); + q->cp = cp; + q->freq_shift = false; + q->nof_re = (uint32_t) nof_prb * SRSLTE_NRE; + q->nof_guards = ((symbol_sz - q->nof_re) / 2); + q->slot_sz = (uint32_t) SRSLTE_SLOT_LEN(symbol_sz); + q->sf_sz = (uint32_t) SRSLTE_SF_LEN(symbol_sz); + q->in_buffer = in_buffer; + q->out_buffer= out_buffer; if (srslte_dft_plan_c(&q->fft_plan, symbol_sz, dir)) { fprintf(stderr, "Error: Creating DFT plan\n"); return -1; } + +#ifdef AVOID_GURU q->tmp = srslte_vec_malloc((uint32_t) symbol_sz * sizeof(cf_t)); if (!q->tmp) { perror("malloc"); return -1; } + bzero(q->tmp, sizeof(cf_t) * symbol_sz); +#else + int cp1 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(0, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + int cp2 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(1, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + + q->tmp = srslte_vec_malloc(sizeof(cf_t) * q->sf_sz); + if (!q->tmp) { + perror("malloc"); + return -1; + } + bzero(q->tmp, sizeof(cf_t) * q->sf_sz); + + if (dir == SRSLTE_DFT_BACKWARD) { + bzero(in_buffer, sizeof(cf_t) * SRSLTE_SF_LEN_RE(nof_prb, cp)); + }else { + bzero(in_buffer, sizeof(cf_t) * q->sf_sz); + } + + for (int slot = 0; slot < 2; slot++) { + //bzero(&q->fft_plan_sf[slot], sizeof(srslte_dft_plan_t)); + //bzero(q->tmp + SRSLTE_CP_NSYMB(cp)*symbol_sz*slot, sizeof(cf_t) * (cp1 + (SRSLTE_CP_NSYMB(cp) - 1)*cp2 + SRSLTE_CP_NSYMB(cp)*symbol_sz)); + if (dir == SRSLTE_DFT_FORWARD) { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + in_buffer + cp1 + q->slot_sz * slot, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz + cp2, symbol_sz)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } else { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + out_buffer + cp1 + q->slot_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz, symbol_sz + cp2)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } + } +#endif q->shift_buffer = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN(symbol_sz)); if (!q->shift_buffer) { @@ -64,15 +120,6 @@ int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int srslte_dft_plan_set_mirror(&q->fft_plan, true); srslte_dft_plan_set_dc(&q->fft_plan, true); - q->symbol_sz = (uint32_t) symbol_sz; - q->nof_symbols = SRSLTE_CP_NSYMB(cp); - q->nof_symbols_mbsfn = SRSLTE_CP_NSYMB(SRSLTE_CP_EXT); - q->cp = cp; - q->freq_shift = false; - q->nof_re = nof_prb * SRSLTE_NRE; - q->nof_guards = ((symbol_sz - q->nof_re) / 2); - q->slot_sz = SRSLTE_SLOT_LEN(symbol_sz); - DEBUG("Init %s symbol_sz=%d, nof_symbols=%d, cp=%s, nof_re=%d, nof_guards=%d\n", dir==SRSLTE_DFT_FORWARD?"FFT":"iFFT", q->symbol_sz, q->nof_symbols, q->cp==SRSLTE_CP_NORM?"Normal":"Extended", q->nof_re, q->nof_guards); @@ -101,9 +148,60 @@ int srslte_ofdm_replan_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof q->symbol_sz = (uint32_t) symbol_sz; q->nof_symbols = SRSLTE_CP_NSYMB(cp); q->cp = cp; - q->nof_re = nof_prb * SRSLTE_NRE; + q->nof_re = (uint32_t) nof_prb * SRSLTE_NRE; q->nof_guards = ((symbol_sz - q->nof_re) / 2); - q->slot_sz = SRSLTE_SLOT_LEN(symbol_sz); + q->slot_sz = (uint32_t) SRSLTE_SLOT_LEN(symbol_sz); + q->sf_sz = (uint32_t) SRSLTE_SF_LEN(symbol_sz); + +#ifndef AVOID_GURU + cf_t *in_buffer = q->in_buffer; + cf_t *out_buffer = q->out_buffer; + + int cp1 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(0, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + int cp2 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(1, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + + srslte_dft_dir_t dir = q->fft_plan_sf[0].dir; + + if (q->tmp) { + free(q->tmp); + } + + q->tmp = srslte_vec_malloc(sizeof(cf_t) * q->sf_sz); + if (!q->tmp) { + perror("malloc"); + return -1; + } + bzero(q->tmp, sizeof(cf_t) * q->sf_sz); + + if (dir == SRSLTE_DFT_BACKWARD) { + bzero(in_buffer, sizeof(cf_t) * SRSLTE_SF_LEN_RE(nof_prb, cp)); + }else { + bzero(in_buffer, sizeof(cf_t) * q->sf_sz); + } + + for (int slot = 0; slot < 2; slot++) { + srslte_dft_plan_free(&q->fft_plan_sf[slot]); + + if (dir == SRSLTE_DFT_FORWARD) { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + in_buffer + cp1 + q->slot_sz * slot, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz + cp2, symbol_sz)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } else { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + out_buffer + cp1 + q->slot_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz, symbol_sz + cp2)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } + } +#endif /* AVOID_GURU */ + if (q->freq_shift) { srslte_ofdm_set_freq_shift(q, q->freq_shift_f); @@ -118,6 +216,15 @@ int srslte_ofdm_replan_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof void srslte_ofdm_free_(srslte_ofdm_t *q) { srslte_dft_plan_free(&q->fft_plan); + +#ifndef AVOID_GURU + for (int slot = 0; slot < 2; slot++) { + if (q->fft_plan_sf[slot].init_size) { + srslte_dft_plan_free(&q->fft_plan_sf[slot]); + } + } +#endif + if (q->tmp) { free(q->tmp); } @@ -127,28 +234,28 @@ void srslte_ofdm_free_(srslte_ofdm_t *q) { bzero(q, sizeof(srslte_ofdm_t)); } -int srslte_ofdm_rx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { +int srslte_ofdm_rx_init(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t max_prb) { int symbol_sz = srslte_symbol_sz(max_prb); if (symbol_sz < 0) { fprintf(stderr, "Error: Invalid nof_prb=%d\n", max_prb); return -1; } q->max_prb = max_prb; - return srslte_ofdm_init_(q, cp, symbol_sz, max_prb, SRSLTE_DFT_FORWARD); + return srslte_ofdm_init_(q, cp, in_buffer, out_buffer, symbol_sz, max_prb, SRSLTE_DFT_FORWARD); } -int srslte_ofdm_rx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) +int srslte_ofdm_rx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t nof_prb) { int symbol_sz = srslte_symbol_sz(nof_prb); if (symbol_sz < 0) { fprintf(stderr, "Error: Invalid nof_prb=%d\n", nof_prb); return -1; } - return srslte_ofdm_init_mbsfn_(q, cp, symbol_sz, nof_prb, SRSLTE_DFT_FORWARD, SRSLTE_SF_MBSFN); + return srslte_ofdm_init_mbsfn_(q, cp, in_buffer, out_buffer, symbol_sz, nof_prb, SRSLTE_DFT_FORWARD, SRSLTE_SF_MBSFN); } -int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { +int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t max_prb) { uint32_t i; int ret; @@ -158,7 +265,7 @@ int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { return -1; } q->max_prb = max_prb; - ret = srslte_ofdm_init_(q, cp, symbol_sz, max_prb, SRSLTE_DFT_BACKWARD); + ret = srslte_ofdm_init_(q, cp, in_buffer, out_buffer, symbol_sz, max_prb, SRSLTE_DFT_BACKWARD); if (ret == SRSLTE_SUCCESS) { @@ -173,7 +280,7 @@ int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { return ret; } -int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) +int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t nof_prb) { uint32_t i; int ret; @@ -184,7 +291,7 @@ int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb return -1; } - ret = srslte_ofdm_init_mbsfn_(q, cp, symbol_sz, nof_prb, SRSLTE_DFT_BACKWARD, SRSLTE_SF_MBSFN); + ret = srslte_ofdm_init_mbsfn_(q, cp, in_buffer, out_buffer, symbol_sz, nof_prb, SRSLTE_DFT_BACKWARD, SRSLTE_SF_MBSFN); if (ret == SRSLTE_SUCCESS) { srslte_dft_plan_set_norm(&q->fft_plan, false); @@ -207,7 +314,8 @@ int srslte_ofdm_rx_set_prb(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) { } return srslte_ofdm_replan_(q, cp, symbol_sz, nof_prb); } else { - fprintf(stderr, "OFDM: Error calling set_prb: nof_prb must be equal or lower initialized max_prb\n"); + fprintf(stderr, "OFDM (Rx): Error calling set_prb: nof_prb (%d) must be equal or lower initialized max_prb (%d)\n", + nof_prb, q->max_prb); return -1; } } @@ -234,7 +342,8 @@ int srslte_ofdm_tx_set_prb(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) { } return ret; } else { - fprintf(stderr, "OFDM: Error calling set_prb: nof_prb must be equal or lower initialized max_prb\n"); + fprintf(stderr, "OFDM (Tx): Error calling set_prb: nof_prb (%d) must be equal or lower initialized max_prb (%d)\n", + nof_prb, q->max_prb); return -1; } } @@ -274,8 +383,12 @@ void srslte_ofdm_tx_free(srslte_ofdm_t *q) { /* Transforms input samples into output OFDM symbols. * Performs FFT on a each symbol and removes CP. */ -void srslte_ofdm_rx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { +void srslte_ofdm_rx_slot(srslte_ofdm_t *q, int slot_in_sf) { + cf_t *output = q->out_buffer + slot_in_sf * q->nof_re * q->nof_symbols; + +#ifdef AVOID_GURU uint32_t i; + cf_t *input = q->in_buffer + slot_in_sf * q->slot_sz; for (i=0;inof_symbols;i++) { input += SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); srslte_dft_run_c(&q->fft_plan, input, q->tmp); @@ -283,6 +396,25 @@ void srslte_ofdm_rx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { input += q->symbol_sz; output += q->nof_re; } +#else + float norm = 1.0f/sqrtf(q->fft_plan.size); + cf_t *tmp = q->tmp + slot_in_sf * q->symbol_sz * q->nof_symbols; + uint32_t dc = (q->fft_plan.dc) ? 1:0; + + srslte_dft_run_guru_c(&q->fft_plan_sf[slot_in_sf]); + + for (int i = 0; i < q->nof_symbols; i++) { + memcpy(output, tmp + q->symbol_sz - q->nof_re / 2, sizeof(cf_t) * q->nof_re / 2); + memcpy(output + q->nof_re / 2, &tmp[dc], sizeof(cf_t) * q->nof_re / 2); + + if (q->fft_plan.norm) { + srslte_vec_sc_prod_cfc(output, norm, output, q->nof_re); + } + + tmp += q->symbol_sz; + output += q->nof_re; + } +#endif } void srslte_ofdm_rx_slot_mbsfn(srslte_ofdm_t *q, cf_t *input, cf_t *output) @@ -314,29 +446,32 @@ void srslte_ofdm_rx_slot_zerocopy(srslte_ofdm_t *q, cf_t *input, cf_t *output) { } } -void srslte_ofdm_rx_sf(srslte_ofdm_t *q, cf_t *input, cf_t *output) { - uint32_t n; +void srslte_ofdm_rx_sf(srslte_ofdm_t *q) { + uint32_t n; if (q->freq_shift) { - srslte_vec_prod_ccc(input, q->shift_buffer, input, 2*q->slot_sz); + srslte_vec_prod_ccc(q->in_buffer, q->shift_buffer, q->in_buffer, 2*q->slot_sz); } if(!q->mbsfn_subframe){ for (n=0;n<2;n++) { - srslte_ofdm_rx_slot(q, &input[n*q->slot_sz], &output[n*q->nof_re*q->nof_symbols]); + srslte_ofdm_rx_slot(q, n); } } else{ - srslte_ofdm_rx_slot_mbsfn(q, &input[0*q->slot_sz], &output[0*q->nof_re*q->nof_symbols]); - srslte_ofdm_rx_slot(q, &input[1*q->slot_sz], &output[1*q->nof_re*q->nof_symbols]); + srslte_ofdm_rx_slot_mbsfn(q, &q->in_buffer[0*q->slot_sz], &q->out_buffer[0*q->nof_re*q->nof_symbols]); + srslte_ofdm_rx_slot(q, 1); } } /* Transforms input OFDM symbols into output samples. * Performs FFT on a each symbol and adds CP. */ -void srslte_ofdm_tx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { - uint32_t i, cp_len; - for (i=0;inof_symbols;i++) { - cp_len = SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); +void srslte_ofdm_tx_slot(srslte_ofdm_t *q, int slot_in_sf) { + cf_t *input = q->in_buffer + slot_in_sf * q->nof_re * q->nof_symbols; + cf_t *output = q->out_buffer + slot_in_sf * q->slot_sz; + +#ifdef AVOID_GURU + for (int i=0;inof_symbols;i++) { + int cp_len = SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); memcpy(&q->tmp[q->nof_guards], input, q->nof_re * sizeof(cf_t)); srslte_dft_run_c(&q->fft_plan, q->tmp, &output[cp_len]); input += q->nof_re; @@ -344,6 +479,60 @@ void srslte_ofdm_tx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { memcpy(output, &output[q->symbol_sz], cp_len * sizeof(cf_t)); output += q->symbol_sz + cp_len; } +#else + float norm = 1.0f/sqrtf(q->symbol_sz); + cf_t *tmp = q->tmp + slot_in_sf * q->symbol_sz * q->nof_symbols; + + bzero(tmp, q->slot_sz); + uint32_t dc = (q->fft_plan.dc) ? 1:0; + + for (int i = 0; i < q->nof_symbols; i++) { + memcpy(&tmp[dc], &input[q->nof_re / 2], q->nof_re / 2 * sizeof(cf_t)); + memcpy(&tmp[q->symbol_sz - q->nof_re / 2], &input[0], q->nof_re / 2 * sizeof(cf_t)); + + input += q->nof_re; + tmp += q->symbol_sz; + } + + srslte_dft_run_guru_c(&q->fft_plan_sf[slot_in_sf]); + + for (int i=0;inof_symbols;i++) { + int cp_len = SRSLTE_CP_ISNORM(q->cp) ? SRSLTE_CP_LEN_NORM(i, q->symbol_sz) : SRSLTE_CP_LEN_EXT(q->symbol_sz); + + if (q->fft_plan.norm) { + srslte_vec_sc_prod_cfc(&output[cp_len], norm, &output[cp_len], q->symbol_sz); + } + + /* add CP */ + memcpy(output, &output[q->symbol_sz], cp_len * sizeof(cf_t)); + output += q->symbol_sz + cp_len; + } +#endif + + /*input = q->in_buffer + slot_in_sf * q->nof_re * q->nof_symbols; + cf_t *output2 = srslte_vec_malloc(sizeof(cf_t) * q->slot_sz); + cf_t *o2 = output2; + bzero(q->tmp, sizeof(cf_t)*q->symbol_sz); + //bzero(output2, sizeof(cf_t)*q->slot_sz); + for (int i=0;inof_symbols;i++) { + int cp_len = SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); + memcpy(&q->tmp[q->nof_guards], input, q->nof_re * sizeof(cf_t)); + srslte_dft_run_c(&q->fft_plan, q->tmp, &o2[cp_len]); + input += q->nof_re; + memcpy(o2, &o2[q->symbol_sz], cp_len * sizeof(cf_t)); + o2 += q->symbol_sz + cp_len; + } + cf_t *output1 = q->out_buffer + slot_in_sf * q->slot_sz;//srslte_vec_malloc(sizeof(cf_t) * q->slot_sz); + + for (int i = 0; i < q->slot_sz; i++) { + float error = cabsf(output1[i] - output2[i])/cabsf(output2[i]); + cf_t k = output1[i]/output2[i]; + if (error > 0.1) printf("%d/%05d error=%f output=%+f%+fi gold=%+f%+fi k=%+f%+fi\n", slot_in_sf, i, error, + __real__ output1[i], __imag__ output1[i], + __real__ output2[i], __imag__ output2[i], + __real__ k, __imag__ k); + } + free(output2);/**/ } void srslte_ofdm_tx_slot_mbsfn(srslte_ofdm_t *q, cf_t *input, cf_t *output) @@ -369,20 +558,20 @@ void srslte_ofdm_set_normalize(srslte_ofdm_t *q, bool normalize_enable) { srslte_dft_plan_set_norm(&q->fft_plan, normalize_enable); } -void srslte_ofdm_tx_sf(srslte_ofdm_t *q, cf_t *input, cf_t *output) +void srslte_ofdm_tx_sf(srslte_ofdm_t *q) { - uint32_t n; + uint32_t n; if(!q->mbsfn_subframe){ for (n=0;n<2;n++) { - srslte_ofdm_tx_slot(q, &input[n*q->nof_re*q->nof_symbols], &output[n*q->slot_sz]); + srslte_ofdm_tx_slot(q, n); } } else{ - srslte_ofdm_tx_slot_mbsfn(q, &input[0*q->nof_re*q->nof_symbols], &output[0*q->slot_sz]); - srslte_ofdm_tx_slot(q, &input[1*q->nof_re*q->nof_symbols], &output[1*q->slot_sz]); + srslte_ofdm_tx_slot_mbsfn(q, &q->in_buffer[0*q->nof_re*q->nof_symbols], &q->out_buffer[0*q->slot_sz]); + srslte_ofdm_tx_slot(q, 1); } if (q->freq_shift) { - srslte_vec_prod_ccc(output, q->shift_buffer, output, 2*q->slot_sz); + srslte_vec_prod_ccc(q->out_buffer, q->shift_buffer, q->out_buffer, 2*q->slot_sz); } } diff --git a/lib/src/phy/dft/test/ofdm_test.c b/lib/src/phy/dft/test/ofdm_test.c index 11aac7f4e..e77fcd39e 100644 --- a/lib/src/phy/dft/test/ofdm_test.c +++ b/lib/src/phy/dft/test/ofdm_test.c @@ -35,16 +35,28 @@ int nof_prb = -1; srslte_cp_t cp = SRSLTE_CP_NORM; +int nof_repetitions = 128; + +static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { + if (ts_end->tv_usec > ts_start->tv_usec) { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec) * 1000000 + + (double) ts_end->tv_usec - (double) ts_start->tv_usec; + } else { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec - 1) * 1000000 + + ((double) ts_end->tv_usec + 1000000) - (double) ts_start->tv_usec; + } +} void usage(char *prog) { printf("Usage: %s\n", prog); printf("\t-n nof_prb [Default All]\n"); printf("\t-e extended cyclic prefix [Default Normal]\n"); + printf("\t-r nof_repetitions [Default %d]\n", nof_repetitions); } void parse_args(int argc, char **argv) { int opt; - while ((opt = getopt(argc, argv, "ne")) != -1) { + while ((opt = getopt(argc, argv, "ner")) != -1) { switch (opt) { case 'n': nof_prb = atoi(argv[optind]); @@ -52,6 +64,9 @@ void parse_args(int argc, char **argv) { case 'e': cp = SRSLTE_CP_EXT; break; + case 'r': + nof_repetitions = atoi(argv[optind]); + break; default: usage(argv[0]); exit(-1); @@ -61,6 +76,7 @@ void parse_args(int argc, char **argv) { int main(int argc, char **argv) { + struct timeval start, end; srslte_ofdm_t fft, ifft; cf_t *input, *outfft, *outifft; float mse; @@ -81,48 +97,65 @@ int main(int argc, char **argv) { printf("Running test for %d PRB, %d RE... ", n_prb, n_re);fflush(stdout); - input = malloc(sizeof(cf_t) * n_re); + input = srslte_vec_malloc(sizeof(cf_t) * n_re * 2); if (!input) { perror("malloc"); exit(-1); } - outfft = malloc(sizeof(cf_t) * SRSLTE_SLOT_LEN(srslte_symbol_sz(n_prb))); + outfft = srslte_vec_malloc(sizeof(cf_t) * n_re * 2); if (!outfft) { perror("malloc"); exit(-1); } - outifft = malloc(sizeof(cf_t) * n_re); + outifft = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SLOT_LEN(srslte_symbol_sz(n_prb)) * 2); if (!outifft) { perror("malloc"); exit(-1); } + bzero(outifft, sizeof(cf_t) * SRSLTE_SLOT_LEN(srslte_symbol_sz(n_prb)) * 2); - if (srslte_ofdm_rx_init(&fft, cp, n_prb)) { + if (srslte_ofdm_rx_init(&fft, cp, outifft, outfft, n_prb)) { fprintf(stderr, "Error initializing FFT\n"); exit(-1); } - srslte_dft_plan_set_norm(&fft.fft_plan, true); + srslte_ofdm_set_normalize(&fft, true); - if (srslte_ofdm_tx_init(&ifft, cp, n_prb)) { + if (srslte_ofdm_tx_init(&ifft, cp, input, outifft, n_prb)) { fprintf(stderr, "Error initializing iFFT\n"); exit(-1); } - srslte_dft_plan_set_norm(&ifft.fft_plan, true); + srslte_ofdm_set_normalize(&ifft, true); for (i=0;i 1.0f) printf("%04d. %+.1f%+.1fi Vs. %+.1f%+.1f %+.1f%+.1f (mse=%f)\n", i, __real__ input[i], __imag__ input[i], __real__ outifft[i], __imag__ outifft[i], __real__ outfft[i], __imag__ outfft[i], mse); } - printf("MSE=%f\n", mse); + /*for (i=0;i= 0.07) { printf("MSE too large\n"); diff --git a/lib/src/phy/enb/enb_dl.c b/lib/src/phy/enb/enb_dl.c index b845b4c8a..fbc22212d 100644 --- a/lib/src/phy/enb/enb_dl.c +++ b/lib/src/phy/enb/enb_dl.c @@ -41,7 +41,7 @@ #define SRSLTE_ENB_RF_AMP 0.1 -int srslte_enb_dl_init(srslte_enb_dl_t *q, uint32_t max_prb) +int srslte_enb_dl_init(srslte_enb_dl_t *q, cf_t *out_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -53,13 +53,26 @@ int srslte_enb_dl_init(srslte_enb_dl_t *q, uint32_t max_prb) q->cfi = 3; q->tx_amp = SRSLTE_ENB_RF_AMP; - - if (srslte_ofdm_tx_init(&q->ifft, SRSLTE_CP_NORM, max_prb)) { - fprintf(stderr, "Error initiating FFT\n"); - goto clean_exit; + + for (int i=0;isf_symbols[i] = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); + if (!q->sf_symbols[i]) { + perror("malloc"); + goto clean_exit; + } + q->slot1_symbols[i] = &q->sf_symbols[i][SRSLTE_SLOT_LEN_RE(max_prb, SRSLTE_CP_NORM)]; + } + + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + if (srslte_ofdm_tx_init(&q->ifft[i], SRSLTE_CP_NORM, q->sf_symbols[i], out_buffer[i], max_prb)) { + fprintf(stderr, "Error initiating FFT (%d)\n", i); + goto clean_exit; + } } - srslte_ofdm_set_normalize(&q->ifft, true); + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + srslte_ofdm_set_normalize(&q->ifft[i], true); + } if (srslte_pbch_init(&q->pbch)) { fprintf(stderr, "Error creating PBCH object\n"); @@ -89,15 +102,6 @@ int srslte_enb_dl_init(srslte_enb_dl_t *q, uint32_t max_prb) goto clean_exit; } - for (int i=0;isf_symbols[i] = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); - if (!q->sf_symbols[i]) { - perror("malloc"); - goto clean_exit; - } - q->slot1_symbols[i] = &q->sf_symbols[i][SRSLTE_SLOT_LEN_RE(max_prb, SRSLTE_CP_NORM)]; - } - ret = SRSLTE_SUCCESS; } else { @@ -114,7 +118,9 @@ clean_exit: void srslte_enb_dl_free(srslte_enb_dl_t *q) { if (q) { - srslte_ofdm_tx_free(&q->ifft); + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + srslte_ofdm_tx_free(&q->ifft[i]); + } srslte_regs_free(&q->regs); srslte_pbch_free(&q->pbch); srslte_pcfich_free(&q->pcfich); @@ -152,9 +158,11 @@ int srslte_enb_dl_set_cell(srslte_enb_dl_t *q, srslte_cell_t cell) fprintf(stderr, "Error resizing REGs\n"); return SRSLTE_ERROR; } - if (srslte_ofdm_rx_set_prb(&q->ifft, q->cell.cp, q->cell.nof_prb)) { - fprintf(stderr, "Error initiating FFT\n"); - return SRSLTE_ERROR; + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + if (srslte_ofdm_tx_set_prb(&q->ifft[i], q->cell.cp, q->cell.nof_prb)) { + fprintf(stderr, "Error re-planning iFFT (%d)\n", i); + return SRSLTE_ERROR; + } } if (srslte_pbch_set_cell(&q->pbch, q->cell)) { fprintf(stderr, "Error creating PBCH object\n"); @@ -260,7 +268,7 @@ void srslte_enb_dl_put_phich(srslte_enb_dl_t *q, uint8_t ack, uint32_t n_prb_low void srslte_enb_dl_put_base(srslte_enb_dl_t *q, uint32_t tti) { uint32_t sf_idx = tti%10; - + srslte_enb_dl_put_sync(q, sf_idx); srslte_enb_dl_put_refs(q, sf_idx); srslte_enb_dl_put_mib(q, tti); @@ -268,14 +276,14 @@ void srslte_enb_dl_put_base(srslte_enb_dl_t *q, uint32_t tti) } -void srslte_enb_dl_gen_signal(srslte_enb_dl_t *q, cf_t *signal_buffer[SRSLTE_MAX_PORTS]) +void srslte_enb_dl_gen_signal(srslte_enb_dl_t *q) { // TODO: PAPR control float norm_factor = (float) sqrt(q->cell.nof_prb)/15; - for (int p = 0; p < SRSLTE_MAX_PORTS; p++) { - srslte_ofdm_tx_sf(&q->ifft, q->sf_symbols[p], signal_buffer[p]); - srslte_vec_sc_prod_cfc(signal_buffer[p], q->tx_amp*norm_factor, signal_buffer[p], (uint32_t) SRSLTE_SF_LEN_PRB(q->cell.nof_prb)); + for (int i = 0; i < q->cell.nof_ports; i++) { + srslte_ofdm_tx_sf(&q->ifft[i]); + srslte_vec_sc_prod_cfc(q->ifft[i].out_buffer, q->tx_amp*norm_factor, q->ifft[i].out_buffer, (uint32_t) SRSLTE_SF_LEN_PRB(q->cell.nof_prb)); } } diff --git a/lib/src/phy/enb/enb_ul.c b/lib/src/phy/enb/enb_ul.c index c9c557a02..c492369dc 100644 --- a/lib/src/phy/enb/enb_ul.c +++ b/lib/src/phy/enb/enb_ul.c @@ -40,6 +40,7 @@ #define MAX_CANDIDATES 16 int srslte_enb_ul_init(srslte_enb_ul_t *q, + cf_t *in_buffer, uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -55,8 +56,20 @@ int srslte_enb_ul_init(srslte_enb_ul_t *q, perror("malloc"); goto clean_exit; } - - if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { + + q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); + if (!q->sf_symbols) { + perror("malloc"); + goto clean_exit; + } + + q->ce = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); + if (!q->ce) { + perror("malloc"); + goto clean_exit; + } + + if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, in_buffer, q->sf_symbols, max_prb)) { fprintf(stderr, "Error initiating FFT\n"); goto clean_exit; } @@ -80,18 +93,6 @@ int srslte_enb_ul_init(srslte_enb_ul_t *q, goto clean_exit; } - q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); - if (!q->sf_symbols) { - perror("malloc"); - goto clean_exit; - } - - q->ce = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); - if (!q->ce) { - perror("malloc"); - goto clean_exit; - } - ret = SRSLTE_SUCCESS; } else { @@ -252,9 +253,9 @@ int srslte_enb_ul_cfg_ue(srslte_enb_ul_t *q, uint16_t rnti, } } -void srslte_enb_ul_fft(srslte_enb_ul_t *q, cf_t *signal_buffer) +void srslte_enb_ul_fft(srslte_enb_ul_t *q) { - srslte_ofdm_rx_sf(&q->fft, signal_buffer, q->sf_symbols); + srslte_ofdm_rx_sf(&q->fft); } int get_pucch(srslte_enb_ul_t *q, uint16_t rnti, diff --git a/lib/src/phy/fec/rm_turbo.c b/lib/src/phy/fec/rm_turbo.c index 23929fff3..cdc8ac88d 100644 --- a/lib/src/phy/fec/rm_turbo.c +++ b/lib/src/phy/fec/rm_turbo.c @@ -741,4 +741,3 @@ int srslte_rm_turbo_rx(float *w_buff, uint32_t w_buff_len, float *input, uint32_ return 0; } - diff --git a/lib/src/phy/mimo/precoding.c b/lib/src/phy/mimo/precoding.c index 6f3e43889..a21219a91 100644 --- a/lib/src/phy/mimo/precoding.c +++ b/lib/src/phy/mimo/precoding.c @@ -33,6 +33,7 @@ #include "srslte/phy/mimo/precoding.h" #include "srslte/phy/utils/vector.h" #include "srslte/phy/utils/debug.h" +#include "srslte/phy/utils/mat.h" #ifdef LV_HAVE_SSE #include @@ -46,7 +47,6 @@ int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_ #endif #include "srslte/phy/utils/mat.h" - static srslte_mimo_decoder_t mimo_decoder = SRSLTE_MIMO_DECODER_MMSE; /************************************************ diff --git a/lib/src/phy/modem/demod_hard.c b/lib/src/phy/modem/demod_hard.c index 76f54236d..899559ecc 100644 --- a/lib/src/phy/modem/demod_hard.c +++ b/lib/src/phy/modem/demod_hard.c @@ -44,6 +44,7 @@ int srslte_demod_hard_demodulate(srslte_demod_hard_t* q, cf_t* symbols, uint8_t int nbits=-1; switch(q->mod) { + case SRSLTE_MOD_LAST: case SRSLTE_MOD_BPSK: hard_bpsk_demod(symbols,bits,nsymbols); nbits=nsymbols; diff --git a/lib/src/phy/modem/modem_table.c b/lib/src/phy/modem/modem_table.c index c19e52e77..3c4ad2417 100644 --- a/lib/src/phy/modem/modem_table.c +++ b/lib/src/phy/modem/modem_table.c @@ -82,6 +82,7 @@ int srslte_modem_table_set(srslte_modem_table_t* q, cf_t* table, uint32_t nsymbo int srslte_modem_table_lte(srslte_modem_table_t* q, srslte_mod_t modulation) { srslte_modem_table_init(q); switch(modulation) { + case SRSLTE_MOD_LAST: case SRSLTE_MOD_BPSK: q->nbits_x_symbol = 1; q->nsymbols = 2; diff --git a/lib/src/phy/phch/dci.c b/lib/src/phy/phch/dci.c index 2daa7e10d..471429145 100644 --- a/lib/src/phy/phch/dci.c +++ b/lib/src/phy/phch/dci.c @@ -111,7 +111,7 @@ int srslte_dci_rar_to_ul_grant(srslte_dci_rar_grant_t *rar, uint32_t nof_prb, srslte_ra_type2_from_riv(riv, &ul_dci->type2_alloc.L_crb, &ul_dci->type2_alloc.RB_start, nof_prb, nof_prb); - if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant, 0)) { + if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant)) { return SRSLTE_ERROR; } @@ -177,7 +177,7 @@ int srslte_dci_msg_to_ul_grant(srslte_dci_msg_t *msg, uint32_t nof_prb, return ret; } - if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant, harq_pid)) { + if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant)) { return ret; } diff --git a/lib/src/phy/phch/pmch.c b/lib/src/phy/phch/pmch.c index 99e40d50f..aa7fd77b1 100644 --- a/lib/src/phy/phch/pmch.c +++ b/lib/src/phy/phch/pmch.c @@ -381,11 +381,11 @@ int srslte_pmch_decode_multi(srslte_pmch_t *q, srslte_predecoding_single_multi(q->symbols, q->ce[0], q->d, q->nof_rx_antennas, cfg->nbits[0].nof_re, noise_estimate); if (SRSLTE_VERBOSE_ISDEBUG()) { - DEBUG("SAVED FILE subframe.dat: received subframe symbols\n",0); + DEBUG("SAVED FILE subframe.dat: received subframe symbols\n"); srslte_vec_save_file("subframe.dat", sf_symbols, SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t)); - DEBUG("SAVED FILE hest0.dat: channel estimates for port 4\n",0); + DEBUG("SAVED FILE hest0.dat: channel estimates for port 4\n"); srslte_vec_save_file("hest0.dat", ce[0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t)); - DEBUG("SAVED FILE pmch_symbols.dat: symbols after equalization\n",0); + DEBUG("SAVED FILE pmch_symbols.dat: symbols after equalization\n"); srslte_vec_save_file("pmch_symbols.bin", q->d, cfg->nbits[0].nof_re*sizeof(cf_t)); } @@ -393,16 +393,13 @@ int srslte_pmch_decode_multi(srslte_pmch_t *q, * The MAX-log-MAP algorithm used in turbo decoding is unsensitive to SNR estimation, * thus we don't need tot set it in thde LLRs normalization */ - - - srslte_demod_soft_demodulate_s(cfg->grant.mcs[0].mod, q->d, q->e, cfg->nbits[0].nof_re); /* descramble */ srslte_scrambling_s_offset(&q->seqs[area_id]->seq[cfg->sf_idx], q->e, 0, cfg->nbits[0].nof_bits); if (SRSLTE_VERBOSE_ISDEBUG()) { - DEBUG("SAVED FILE llr.dat: LLR estimates after demodulation and descrambling\n",0); + DEBUG("SAVED FILE llr.dat: LLR estimates after demodulation and descrambling\n"); srslte_vec_save_file("llr.dat", q->e, cfg->nbits[0].nof_bits*sizeof(int16_t)); } return srslte_dlsch_decode(&q->dl_sch, cfg, softbuffer, q->e, data); diff --git a/lib/src/phy/phch/pusch.c b/lib/src/phy/phch/pusch.c index f52bbbb42..ba1195507 100644 --- a/lib/src/phy/phch/pusch.c +++ b/lib/src/phy/phch/pusch.c @@ -323,7 +323,7 @@ int srslte_pusch_set_cell(srslte_pusch_t *q, srslte_cell_t cell) { q->max_re = cell.nof_prb * MAX_PUSCH_RE(q->cell.cp); - INFO("PUSCH: Cell config PCI=5d, %d ports %d PRBs, max_symbols: %d\n", + INFO("PUSCH: Cell config PCI=%d, %d ports %d PRBs, max_symbols: %d\n", q->cell.id, q->cell.nof_ports, q->cell.nof_prb, q->max_re); if (q->cell.id != cell.id || q->cell.nof_prb == 0) { diff --git a/lib/src/phy/phch/ra.c b/lib/src/phy/phch/ra.c index be10c304c..913bd9548 100644 --- a/lib/src/phy/phch/ra.c +++ b/lib/src/phy/phch/ra.c @@ -185,108 +185,87 @@ int srslte_ra_ul_dci_to_grant_prb_allocation(srslte_ra_ul_dci_t *dci, srslte_ra_ } } -srslte_mod_t last_mod[8]; -uint32_t last_ul_tbs_idx[8]; -uint32_t last_dl_tbs[8]; -uint32_t last_dl_tbs2[8]; - -static int ul_dci_to_grant_mcs(srslte_ra_ul_dci_t *dci, srslte_ra_ul_grant_t *grant, uint32_t harq_pid) { - int tbs = -1; +static void ul_dci_to_grant_mcs(srslte_ra_ul_dci_t *dci, srslte_ra_ul_grant_t *grant) { // 8.6.2 First paragraph if (dci->mcs_idx <= 28) { /* Table 8.6.1-1 on 36.213 */ if (dci->mcs_idx < 11) { grant->mcs.mod = SRSLTE_MOD_QPSK; - tbs = srslte_ra_tbs_from_idx(dci->mcs_idx, grant->L_prb); - last_ul_tbs_idx[harq_pid%8] = dci->mcs_idx; + grant->mcs.tbs = srslte_ra_tbs_from_idx(dci->mcs_idx, grant->L_prb); } else if (dci->mcs_idx < 21) { grant->mcs.mod = SRSLTE_MOD_16QAM; - tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-1, grant->L_prb); - last_ul_tbs_idx[harq_pid%8] = dci->mcs_idx-1; + grant->mcs.tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-1, grant->L_prb); } else if (dci->mcs_idx < 29) { grant->mcs.mod = SRSLTE_MOD_64QAM; - tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-2, grant->L_prb); - last_ul_tbs_idx[harq_pid%8] = dci->mcs_idx-2; + grant->mcs.tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-2, grant->L_prb); } else { fprintf(stderr, "Invalid MCS index %d\n", dci->mcs_idx); } - last_mod[harq_pid%8] = grant->mcs.mod; } else if (dci->mcs_idx == 29 && dci->cqi_request && grant->L_prb <= 4) { // 8.6.1 and 8.6.2 36.213 second paragraph grant->mcs.mod = SRSLTE_MOD_QPSK; - tbs = srslte_ra_tbs_from_idx(last_ul_tbs_idx[harq_pid%8], grant->L_prb); - dci->rv_idx = 1; + grant->mcs.tbs = 0; + dci->rv_idx = 1; } else if (dci->mcs_idx >= 29) { - // Else use last TBS/Modulation and use mcs to obtain rv_idx - tbs = srslte_ra_tbs_from_idx(last_ul_tbs_idx[harq_pid%8], grant->L_prb); - grant->mcs.mod = last_mod[harq_pid%8]; + // Else use last TBS/Modulation and use mcs to obtain rv_idx + grant->mcs.tbs = -1; + grant->mcs.mod = SRSLTE_MOD_LAST; dci->rv_idx = dci->mcs_idx - 28; - DEBUG("TTI=%d, harq_pid=%d, mcs_idx=%d, tbs=%d, mod=%d, rv=%d\n", - harq_pid, harq_pid%8, dci->mcs_idx, tbs/8, grant->mcs.mod, dci->rv_idx); - } - if (tbs < 0) { - fprintf(stderr, "Error computing TBS\n"); - return SRSLTE_ERROR; - } else { - grant->mcs.tbs = (uint32_t) tbs; - return SRSLTE_SUCCESS; + DEBUG("mcs_idx=%d, tbs=%d, mod=%d, rv=%d\n", + dci->mcs_idx, grant->mcs.tbs/8, grant->mcs.mod, dci->rv_idx); } } -void srslte_ra_ul_grant_to_nbits(srslte_ra_ul_grant_t *grant, srslte_cp_t cp, uint32_t N_srs, srslte_ra_nbits_t *nbits) +void srslte_ra_ul_grant_to_nbits(srslte_ra_ul_grant_t *grant, srslte_cp_t cp, uint32_t N_srs, srslte_ra_nbits_t *nbits) { - nbits->nof_symb = 2*(SRSLTE_CP_NSYMB(cp)-1) - N_srs; + nbits->nof_symb = 2*(SRSLTE_CP_NSYMB(cp)-1) - N_srs; nbits->nof_re = nbits->nof_symb*grant->M_sc; nbits->nof_bits = nbits->nof_re * grant->Qm; } /** Compute PRB allocation for Uplink as defined in 8.1 and 8.4 of 36.213 */ -int srslte_ra_ul_dci_to_grant(srslte_ra_ul_dci_t *dci, uint32_t nof_prb, uint32_t n_rb_ho, srslte_ra_ul_grant_t *grant, - uint32_t harq_pid) +int srslte_ra_ul_dci_to_grant(srslte_ra_ul_dci_t *dci, uint32_t nof_prb, uint32_t n_rb_ho, srslte_ra_ul_grant_t *grant) { - - // Compute PRB allocation + + // Compute PRB allocation if (!srslte_ra_ul_dci_to_grant_prb_allocation(dci, grant, n_rb_ho, nof_prb)) { - - // Compute MCS - if (!ul_dci_to_grant_mcs(dci, grant, harq_pid)) { - - // Fill rest of grant structure - grant->mcs.idx = dci->mcs_idx; - grant->M_sc = grant->L_prb*SRSLTE_NRE; - grant->M_sc_init = grant->M_sc; // FIXME: What should M_sc_init be? - grant->Qm = srslte_mod_bits_x_symbol(grant->mcs.mod); - } else { - fprintf(stderr, "Error computing MCS\n"); - return SRSLTE_ERROR; - } + + // Compute MCS + ul_dci_to_grant_mcs(dci, grant); + + // Fill rest of grant structure + grant->mcs.idx = dci->mcs_idx; + grant->M_sc = grant->L_prb*SRSLTE_NRE; + grant->M_sc_init = grant->M_sc; // FIXME: What should M_sc_init be? + grant->Qm = srslte_mod_bits_x_symbol(grant->mcs.mod); + } else { printf("Error computing UL PRB allocation\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } return SRSLTE_SUCCESS; } -uint32_t srslte_ra_dl_approx_nof_re(srslte_cell_t cell, uint32_t nof_prb, uint32_t nof_ctrl_symbols) +uint32_t srslte_ra_dl_approx_nof_re(srslte_cell_t cell, uint32_t nof_prb, uint32_t nof_ctrl_symbols) { - uint32_t nof_refs = 0; + uint32_t nof_refs = 0; uint32_t nof_symb = 2*SRSLTE_CP_NSYMB(cell.cp)-nof_ctrl_symbols; switch(cell.nof_ports) { - case 1: - nof_refs = 2*3; - break; - case 2: - nof_refs = 4*3; - break; - case 4: - nof_refs = 4*4; - break; + case 1: + nof_refs = 2*3; + break; + case 2: + nof_refs = 4*3; + break; + case 4: + nof_refs = 4*4; + break; } return nof_prb * (nof_symb*SRSLTE_NRE-nof_refs); } /* Computes the number of RE for each PRB in the prb_dist structure */ -uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t cell, +uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t cell, uint32_t sf_idx, uint32_t nof_ctrl_symbols) { uint32_t j, s; @@ -300,7 +279,7 @@ uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t ce } } } - return nof_re; + return nof_re; } @@ -315,7 +294,7 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ uint32_t bitmask; uint32_t P = srslte_ra_type0_P(nof_prb); uint32_t n_rb_rbg_subset, n_rb_type1; - + bzero(grant, sizeof(srslte_ra_dl_grant_t)); switch (dci->alloc_type) { case SRSLTE_RA_ALLOC_TYPE0: @@ -352,14 +331,14 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ * P * P + dci->type1_alloc.rbg_subset * P + (i + shift) % P] = true; grant->nof_prb++; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } } memcpy(&grant->prb_idx[1], &grant->prb_idx[0], SRSLTE_MAX_PRB*sizeof(bool)); break; case SRSLTE_RA_ALLOC_TYPE2: - if (dci->type2_alloc.mode == SRSLTE_RA_TYPE2_LOC) { + if (dci->type2_alloc.mode == SRSLTE_RA_TYPE2_LOC) { for (i = 0; i < dci->type2_alloc.L_crb; i++) { grant->prb_idx[0][i + dci->type2_alloc.RB_start] = true; grant->nof_prb++; @@ -408,13 +387,13 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ if (n_tilde_prb_odd < nof_prb) { grant->prb_idx[0][n_tilde_prb_odd] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } else { if (n_tilde_prb_odd + N_gap - N_tilde_vrb / 2 < nof_prb) { grant->prb_idx[0][n_tilde_prb_odd + N_gap - N_tilde_vrb / 2] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } grant->nof_prb++; @@ -422,13 +401,13 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ if(n_tilde_prb_even < nof_prb) { grant->prb_idx[1][n_tilde_prb_even] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } else { if (n_tilde_prb_even + N_gap - N_tilde_vrb / 2 < nof_prb) { grant->prb_idx[1][n_tilde_prb_even + N_gap - N_tilde_vrb / 2] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } } @@ -442,8 +421,7 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ } int srslte_dl_fill_ra_mcs(srslte_ra_mcs_t *mcs, uint32_t nprb) { - uint32_t i_tbs = 0; - int tbs = -1; + int i_tbs = 0; if (mcs->idx < 10) { mcs->mod = SRSLTE_MOD_QPSK; i_tbs = mcs->idx; @@ -455,30 +433,26 @@ int srslte_dl_fill_ra_mcs(srslte_ra_mcs_t *mcs, uint32_t nprb) { i_tbs = mcs->idx-2; } else if (mcs->idx == 29) { mcs->mod = SRSLTE_MOD_QPSK; - tbs = 0; - i_tbs = 0; + i_tbs = -1; } else if (mcs->idx == 30) { mcs->mod = SRSLTE_MOD_16QAM; - tbs = 0; - i_tbs = 0; + i_tbs = -1; } else if (mcs->idx == 31) { mcs->mod = SRSLTE_MOD_64QAM; - tbs = 0; - i_tbs = 0; + i_tbs = -1; } - - if (tbs == -1) { + + int tbs = -1; + if (i_tbs >= 0) { tbs = srslte_ra_tbs_from_idx(i_tbs, nprb); - if (tbs >= 0) { - mcs->tbs = tbs; - } - } - return tbs; + mcs->tbs = tbs; + } + return tbs; } int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { - uint32_t i_tbs = 0; - int tbs = -1; + uint32_t i_tbs = 0; + int tbs = -1; if (mcs->idx < 5) { mcs->mod = SRSLTE_MOD_QPSK; i_tbs = mcs->idx*2; @@ -492,7 +466,7 @@ int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { mcs->mod = SRSLTE_MOD_64QAM; i_tbs = mcs->idx + 5; }else if (mcs->idx < 28) { - //mcs->mod = SRSLTE_MOD_256QAM; + //mcs->mod = SRSLTE_MOD_256QAM; i_tbs = mcs->idx + 5; }else if (mcs->idx == 28) { mcs->mod = SRSLTE_MOD_QPSK; @@ -511,15 +485,15 @@ int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { tbs = 0; i_tbs = 0; } - - + + if (tbs == -1) { tbs = srslte_ra_tbs_from_idx(i_tbs, nprb); if (tbs >= 0) { - mcs->tbs = tbs; + mcs->tbs = tbs; } - } - return tbs; + } + return tbs; } /* Modulation order and transport block size determination 7.1.7 in 36.213 @@ -530,9 +504,9 @@ int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { * */ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *grant, bool crc_is_crnti) { uint32_t n_prb=0; - int tbs = -1; - uint32_t i_tbs = 0; - + int tbs = -1; + uint32_t i_tbs = 0; + if (!crc_is_crnti) { if (dci->dci_is_1a) { n_prb = dci->type2_alloc.n_prb1a == SRSLTE_RA_TYPE2_NPRB1A_2 ? 2 : 3; @@ -546,35 +520,23 @@ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *gr } } else { fprintf(stderr, "Error decoding DCI: P/SI/RA-RNTI supports Format1A/1C only\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } grant->mcs[0].mod = SRSLTE_MOD_QPSK; grant->mcs[0].tbs = (uint32_t) tbs; } else { n_prb = grant->nof_prb; - grant->nof_tb = 0; + grant->nof_tb = 0; if (dci->tb_en[0]) { grant->mcs[0].idx = dci->mcs_idx; - tbs = srslte_dl_fill_ra_mcs(&grant->mcs[0], n_prb); - if (tbs) { - last_dl_tbs[dci->harq_process%8] = tbs; - } else { - // For mcs>=29, set last TBS received for this PID - grant->mcs[0].tbs = last_dl_tbs[dci->harq_process%8]; - } + grant->mcs[0].tbs = srslte_dl_fill_ra_mcs(&grant->mcs[0], n_prb); grant->nof_tb++; } else { grant->mcs[0].tbs = 0; } if (dci->tb_en[1]) { grant->mcs[1].idx = dci->mcs_idx_1; - tbs = srslte_dl_fill_ra_mcs(&grant->mcs[1], n_prb); - if (tbs) { - last_dl_tbs2[dci->harq_process%8] = tbs; - } else { - // For mcs>=29, set last TBS received for this PID - grant->mcs[1].tbs = last_dl_tbs2[dci->harq_process%8]; - } + grant->mcs[1].tbs = srslte_dl_fill_ra_mcs(&grant->mcs[1], n_prb); } else { grant->mcs[1].tbs = 0; } @@ -587,7 +549,7 @@ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *gr } grant->pinfo = dci->pinfo; - if (tbs < 0) { + if (grant->mcs[0].tbs < 0 || grant->mcs[1].tbs < 0) { return SRSLTE_ERROR; } else { return SRSLTE_SUCCESS; @@ -622,10 +584,12 @@ int srslte_ra_dl_dci_to_grant(srslte_ra_dl_dci_t *dci, if (msg_rnti >= SRSLTE_CRNTI_START && msg_rnti <= SRSLTE_CRNTI_END) { crc_is_crnti = true; } - // Compute PRB allocation - if (!srslte_ra_dl_dci_to_grant_prb_allocation(dci, grant, nof_prb)) { - // Compute MCS - if (!dl_dci_to_grant_mcs(dci, grant, crc_is_crnti)) { + // Compute PRB allocation + int ret =srslte_ra_dl_dci_to_grant_prb_allocation(dci, grant, nof_prb); + if (!ret) { + // Compute MCS + ret = dl_dci_to_grant_mcs(dci, grant, crc_is_crnti); + if (ret == SRSLTE_SUCCESS) { // Apply Section 7.1.7.3. If RA-RNTI and Format1C rv_idx=0 if (msg_rnti >= SRSLTE_RARNTI_START && msg_rnti <= SRSLTE_RARNTI_END && dci->dci_is_1c) @@ -907,4 +871,4 @@ void srslte_ra_prb_fprint(FILE *f, srslte_ra_dl_grant_t *grant) { } } -} \ No newline at end of file +} diff --git a/lib/src/phy/phch/test/pbch_file_test.c b/lib/src/phy/phch/test/pbch_file_test.c index 734640d55..2ca12e4c9 100644 --- a/lib/src/phy/phch/test/pbch_file_test.c +++ b/lib/src/phy/phch/test/pbch_file_test.c @@ -140,7 +140,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -203,7 +203,7 @@ int main(int argc, char **argv) { if (nread > 0) { // process 1st subframe only - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); /* Get channel estimates for each port */ srslte_chest_dl_estimate(&chest, fft_buffer, ce, 0); diff --git a/lib/src/phy/phch/test/pcfich_file_test.c b/lib/src/phy/phch/test/pcfich_file_test.c index dfb8d72e3..e92d6c7ba 100644 --- a/lib/src/phy/phch/test/pcfich_file_test.c +++ b/lib/src/phy/phch/test/pcfich_file_test.c @@ -120,15 +120,15 @@ int base_init() { fmatlab = NULL; } - flen = SRSLTE_SF_LEN(srslte_symbol_sz(cell.nof_prb)); + flen = SRSLTE_SF_LEN(srslte_symbol_sz_power2(cell.nof_prb)); - input_buffer = malloc(flen * sizeof(cf_t)); + input_buffer = srslte_vec_malloc(flen * sizeof(cf_t)); if (!input_buffer) { perror("malloc"); exit(-1); } - fft_buffer = malloc(SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp) * sizeof(cf_t)); + fft_buffer = srslte_vec_malloc(SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp) * sizeof(cf_t)); if (!fft_buffer) { perror("malloc"); return -1; @@ -151,7 +151,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -215,7 +215,7 @@ int main(int argc, char **argv) { n = srslte_filesource_read(&fsrc, input_buffer, flen); - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); if (fmatlab) { fprintf(fmatlab, "infft="); diff --git a/lib/src/phy/phch/test/pdcch_file_test.c b/lib/src/phy/phch/test/pdcch_file_test.c index d4ceed4b6..5482d9f98 100644 --- a/lib/src/phy/phch/test/pdcch_file_test.c +++ b/lib/src/phy/phch/test/pdcch_file_test.c @@ -126,7 +126,7 @@ int base_init() { exit(-1); } - flen = 2 * (SRSLTE_SLOT_LEN(srslte_symbol_sz(cell.nof_prb))); + flen = 2 * (SRSLTE_SLOT_LEN(srslte_symbol_sz_power2(cell.nof_prb))); input_buffer = malloc(flen * sizeof(cf_t)); if (!input_buffer) { @@ -157,7 +157,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -231,7 +231,7 @@ int main(int argc, char **argv) { INFO("Reading %d samples sub-frame %d\n", flen, frame_cnt); - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); /* Get channel estimates for each port */ srslte_chest_dl_estimate(&chest, fft_buffer, ce, frame_cnt %10); diff --git a/lib/src/phy/phch/test/pdsch_pdcch_file_test.c b/lib/src/phy/phch/test/pdsch_pdcch_file_test.c index 90c0e1c17..0faf7eca1 100644 --- a/lib/src/phy/phch/test/pdsch_pdcch_file_test.c +++ b/lib/src/phy/phch/test/pdsch_pdcch_file_test.c @@ -129,7 +129,7 @@ int base_init() { exit(-1); } - flen = 2 * (SRSLTE_SLOT_LEN(srslte_symbol_sz(cell.nof_prb))); + flen = SRSLTE_SF_LEN(srslte_symbol_sz_power2(cell.nof_prb)); input_buffer[0] = malloc(flen * sizeof(cf_t)); if (!input_buffer[0]) { @@ -137,7 +137,7 @@ int base_init() { exit(-1); } - if (srslte_ue_dl_init(&ue_dl, cell.nof_prb, 1)) { + if (srslte_ue_dl_init(&ue_dl, input_buffer, cell.nof_prb, 1)) { fprintf(stderr, "Error initializing UE DL\n"); return -1; } diff --git a/lib/src/phy/phch/test/phich_file_test.c b/lib/src/phy/phch/test/phich_file_test.c index 968aa5cbc..65f7ce9c0 100644 --- a/lib/src/phy/phch/test/phich_file_test.c +++ b/lib/src/phy/phch/test/phich_file_test.c @@ -144,7 +144,7 @@ int base_init() { fmatlab = NULL; } - flen = SRSLTE_SF_LEN(srslte_symbol_sz(cell.nof_prb)); + flen = SRSLTE_SF_LEN(srslte_symbol_sz_power2(cell.nof_prb)); input_buffer = malloc(flen * sizeof(cf_t)); if (!input_buffer) { @@ -175,7 +175,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -242,7 +242,7 @@ int main(int argc, char **argv) { n = srslte_filesource_read(&fsrc, input_buffer, flen); - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); if (fmatlab) { fprintf(fmatlab, "infft="); @@ -263,7 +263,11 @@ int main(int argc, char **argv) { for (ngroup=0;ngrouprx_md; + + uhd_rx_metadata_error_code_t error_code = 0; + uhd_rx_metadata_error_code(*md, &error_code); + + md = &handler->rx_md; n += rxd_samples; trials++; - - uhd_rx_metadata_error_code_t error_code; - uhd_rx_metadata_error_code(*md, &error_code); + if (error_code == UHD_RX_METADATA_ERROR_CODE_OVERFLOW) { log_overflow(handler); } else if (error_code == UHD_RX_METADATA_ERROR_CODE_LATE_COMMAND) { diff --git a/lib/src/phy/sync/find_sss.c b/lib/src/phy/sync/find_sss.c index 2afeced42..082aee52a 100644 --- a/lib/src/phy/sync/find_sss.c +++ b/lib/src/phy/sync/find_sss.c @@ -70,14 +70,12 @@ static void corr_all_sz_partial(cf_t z[SRSLTE_SSS_N], float s[SRSLTE_SSS_N][SRSL static void extract_pair_sss(srslte_sss_synch_t *q, cf_t *input, cf_t *ce, cf_t y[2][SRSLTE_SSS_N]) { cf_t input_fft[SRSLTE_SYMBOL_SZ_MAX]; - float ce_mod[2*SRSLTE_SSS_N], z_real[2*SRSLTE_SSS_N], z_imag[2*SRSLTE_SSS_N]; - + srslte_dft_run_c(&q->dftp_input, input, input_fft); if (ce) { - srslte_vec_div_ccc(&input_fft[q->fft_size/2-SRSLTE_SSS_N], ce, ce_mod, - &input_fft[q->fft_size/2-SRSLTE_SSS_N], z_real, z_imag, - 2*SRSLTE_SSS_N); + srslte_vec_div_ccc(&input_fft[q->fft_size/2-SRSLTE_SSS_N], ce, + &input_fft[q->fft_size/2-SRSLTE_SSS_N], 2*SRSLTE_SSS_N); } for (int i = 0; i < SRSLTE_SSS_N; i++) { diff --git a/lib/src/phy/sync/test/sync_test.c b/lib/src/phy/sync/test/sync_test.c index 8715b316e..1e3951884 100644 --- a/lib/src/phy/sync/test/sync_test.c +++ b/lib/src/phy/sync/test/sync_test.c @@ -108,8 +108,8 @@ int main(int argc, char **argv) { perror("malloc"); exit(-1); } - - if (srslte_ofdm_tx_init(&ifft, cp, nof_prb)) { + + if (srslte_ofdm_tx_init(&ifft, cp, buffer, fft_buffer, nof_prb)) { fprintf(stderr, "Error creating iFFT object\n"); exit(-1); } @@ -150,8 +150,14 @@ int main(int argc, char **argv) { /* Transform to OFDM symbols */ memset(fft_buffer, 0, sizeof(cf_t) * FLEN); - srslte_ofdm_tx_sf(&ifft, buffer, &fft_buffer[offset]); + srslte_ofdm_tx_sf(&ifft); + /* Apply sample offset */ + for (int i = 0; i < FLEN; i++) { + fft_buffer[FLEN - i - 1 + offset] = fft_buffer[FLEN - i - 1]; + } + bzero(fft_buffer, sizeof(cf_t) * offset); + if (srslte_sync_find(&syncobj, fft_buffer, 0, &find_idx) < 0) { fprintf(stderr, "Error running srslte_sync_find\n"); exit(-1); diff --git a/lib/src/phy/ue/ue_dl.c b/lib/src/phy/ue/ue_dl.c index 1bef04981..0f0307b0d 100644 --- a/lib/src/phy/ue/ue_dl.c +++ b/lib/src/phy/ue/ue_dl.c @@ -53,6 +53,7 @@ static srslte_dci_format_t common_formats[] = {SRSLTE_DCI_FORMAT1A,SRSLTE_DCI_FO const uint32_t nof_common_formats = 2; int srslte_ue_dl_init(srslte_ue_dl_t *q, + cf_t *in_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb, uint32_t nof_rx_antennas) { @@ -73,12 +74,35 @@ int srslte_ue_dl_init(srslte_ue_dl_t *q, q->sample_offset = 0; q->nof_rx_antennas = nof_rx_antennas; - if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { - fprintf(stderr, "Error initiating FFT\n"); - goto clean_exit; + for (int j = 0; j < SRSLTE_MAX_PORTS; j++) { + q->sf_symbols_m[j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); + if (!q->sf_symbols_m[j]) { + perror("malloc"); + goto clean_exit; + } + for (uint32_t i=0;ice_m[i][j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); + if (!q->ce_m[i][j]) { + perror("malloc"); + goto clean_exit; + } + bzero(q->ce_m[i][j], MAX_SFLEN_RE * sizeof(cf_t)); + } + } + + q->sf_symbols = q->sf_symbols_m[0]; + for (int i=0;ice[i] = q->ce_m[i][0]; + } + + for (int i = 0; i < nof_rx_antennas; i++) { + if (srslte_ofdm_rx_init(&q->fft[i], SRSLTE_CP_NORM, in_buffer[i], q->sf_symbols_m[i], max_prb)) { + fprintf(stderr, "Error initiating FFT\n"); + goto clean_exit; + } } - - if (srslte_ofdm_rx_init_mbsfn(&q->fft_mbsfn, SRSLTE_CP_EXT, max_prb)) { + + if (srslte_ofdm_rx_init_mbsfn(&q->fft_mbsfn, SRSLTE_CP_EXT, in_buffer[0], q->sf_symbols_m[0], max_prb)) { fprintf(stderr, "Error initiating FFT for MBSFN subframes \n"); goto clean_exit; } @@ -127,28 +151,7 @@ int srslte_ue_dl_init(srslte_ue_dl_t *q, fprintf(stderr, "Error initiating SFO correct\n"); goto clean_exit; } - srslte_cfo_set_tol(&q->sfo_correct, 1e-5f/q->fft.symbol_sz); - - for (int j = 0; j < SRSLTE_MAX_PORTS; j++) { - q->sf_symbols_m[j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); - if (!q->sf_symbols_m[j]) { - perror("malloc"); - goto clean_exit; - } - for (uint32_t i=0;ice_m[i][j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); - if (!q->ce_m[i][j]) { - perror("malloc"); - goto clean_exit; - } - bzero(q->ce_m[i][j], MAX_SFLEN_RE * sizeof(cf_t)); - } - } - - q->sf_symbols = q->sf_symbols_m[0]; - for (int i=0;ice[i] = q->ce_m[i][0]; - } + srslte_cfo_set_tol(&q->sfo_correct, 1e-5f/q->fft[0].symbol_sz); ret = SRSLTE_SUCCESS; } else { @@ -164,7 +167,9 @@ clean_exit: void srslte_ue_dl_free(srslte_ue_dl_t *q) { if (q) { - srslte_ofdm_rx_free(&q->fft); + for (int port = 0; port < SRSLTE_MAX_PORTS; port++) { + srslte_ofdm_rx_free(&q->fft[port]); + } srslte_ofdm_rx_free(&q->fft_mbsfn); srslte_chest_dl_free(&q->chest); srslte_regs_free(&q->regs); @@ -219,10 +224,12 @@ int srslte_ue_dl_set_cell(srslte_ue_dl_t *q, srslte_cell_t cell) fprintf(stderr, "Error resizing SFO correct\n"); return SRSLTE_ERROR; } - srslte_cfo_set_tol(&q->sfo_correct, 1e-5/q->fft.symbol_sz); - if (srslte_ofdm_rx_set_prb(&q->fft, q->cell.cp, q->cell.nof_prb)) { - fprintf(stderr, "Error resizing FFT\n"); - return SRSLTE_ERROR; + srslte_cfo_set_tol(&q->sfo_correct, 1e-5f/q->fft[0].symbol_sz); + for (int port = 0; port < q->nof_rx_antennas; port++) { + if (srslte_ofdm_rx_set_prb(&q->fft[port], q->cell.cp, q->cell.nof_prb)) { + fprintf(stderr, "Error resizing FFT\n"); + return SRSLTE_ERROR; + } } if (srslte_chest_dl_set_cell(&q->chest, q->cell)) { fprintf(stderr, "Error resizing channel estimator\n"); @@ -339,9 +346,9 @@ int srslte_ue_dl_decode_fft_estimate_mbsfn(srslte_ue_dl_t *q, cf_t *input[SRSLTE /* Run FFT for all subframe data */ for (int j=0;jnof_rx_antennas;j++) { if(sf_type == SRSLTE_SF_MBSFN ) { - srslte_ofdm_rx_sf(&q->fft_mbsfn, input[j], q->sf_symbols_m[j]); + srslte_ofdm_rx_sf(&q->fft_mbsfn); }else{ - srslte_ofdm_rx_sf(&q->fft, input[j], q->sf_symbols_m[j]); + srslte_ofdm_rx_sf(&q->fft[j]); } /* Correct SFO multiplying by complex exponential in the time domain */ @@ -351,7 +358,7 @@ int srslte_ue_dl_decode_fft_estimate_mbsfn(srslte_ue_dl_t *q, cf_t *input[SRSLTE srslte_cfo_correct(&q->sfo_correct, &q->sf_symbols_m[j][i*q->cell.nof_prb*SRSLTE_NRE], &q->sf_symbols_m[j][i*q->cell.nof_prb*SRSLTE_NRE], - q->sample_offset / q->fft.symbol_sz); + q->sample_offset / q->fft[j].symbol_sz); } } } @@ -603,7 +610,8 @@ int srslte_ue_dl_decode_mbsfn(srslte_ue_dl_t * q, grant.sf_type = SRSLTE_SF_MBSFN; grant.nof_tb = 1; grant.mcs[0].idx = 2; - + grant.tb_en[0] = true; + grant.tb_en[1] = false; grant.nof_prb = q->pmch.cell.nof_prb; srslte_dl_fill_ra_mcs(&grant.mcs[0], grant.nof_prb); srslte_softbuffer_rx_reset_tbs(q->softbuffers[0], (uint32_t) grant.mcs[0].tbs); diff --git a/lib/src/phy/ue/ue_mib.c b/lib/src/phy/ue/ue_mib.c index 23fec7f6e..c003a2ba3 100644 --- a/lib/src/phy/ue/ue_mib.c +++ b/lib/src/phy/ue/ue_mib.c @@ -35,7 +35,8 @@ #include "srslte/phy/utils/debug.h" #include "srslte/phy/utils/vector.h" -int srslte_ue_mib_init(srslte_ue_mib_t * q, +int srslte_ue_mib_init(srslte_ue_mib_t * q, + cf_t *in_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -65,7 +66,7 @@ int srslte_ue_mib_init(srslte_ue_mib_t * q, } } - if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { + if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, in_buffer[0], q->sf_symbols, max_prb)) { fprintf(stderr, "Error initializing FFT\n"); goto clean_exit; } @@ -143,14 +144,14 @@ void srslte_ue_mib_reset(srslte_ue_mib_t * q) srslte_pbch_decode_reset(&q->pbch); } -int srslte_ue_mib_decode(srslte_ue_mib_t * q, cf_t *input, +int srslte_ue_mib_decode(srslte_ue_mib_t * q, uint8_t bch_payload[SRSLTE_BCH_PAYLOAD_LEN], uint32_t *nof_tx_ports, int *sfn_offset) { int ret = SRSLTE_SUCCESS; cf_t *ce_slot1[SRSLTE_MAX_PORTS]; /* Run FFT for the slot symbols */ - srslte_ofdm_rx_sf(&q->fft, input, q->sf_symbols); + srslte_ofdm_rx_sf(&q->fft); /* Get channel estimates of sf idx #0 for each port */ ret = srslte_chest_dl_estimate(&q->chest, q->sf_symbols, q->ce, 0); @@ -198,7 +199,7 @@ int srslte_ue_mib_sync_init_multi(srslte_ue_mib_sync_t *q, } q->nof_rx_antennas = nof_rx_antennas; - if (srslte_ue_mib_init(&q->ue_mib, SRSLTE_UE_MIB_NOF_PRB)) { + if (srslte_ue_mib_init(&q->ue_mib, q->sf_buffer, SRSLTE_UE_MIB_NOF_PRB)) { fprintf(stderr, "Error initiating ue_mib\n"); return SRSLTE_ERROR; } @@ -261,11 +262,11 @@ int srslte_ue_mib_sync_decode(srslte_ue_mib_sync_t * q, uint32_t nof_frames = 0; int mib_ret = SRSLTE_UE_MIB_NOTFOUND; - srslte_ue_mib_sync_reset(q); - - if (q != NULL) + if (q != NULL) { - ret = SRSLTE_SUCCESS; + srslte_ue_mib_sync_reset(q); + + ret = SRSLTE_SUCCESS; do { mib_ret = SRSLTE_UE_MIB_NOTFOUND; ret = srslte_ue_sync_zerocopy_multi(&q->ue_sync, q->sf_buffer); @@ -274,7 +275,7 @@ int srslte_ue_mib_sync_decode(srslte_ue_mib_sync_t * q, return -1; } else if (srslte_ue_sync_get_sfidx(&q->ue_sync) == 0) { if (ret == 1) { - mib_ret = srslte_ue_mib_decode(&q->ue_mib, q->sf_buffer[0], bch_payload, nof_tx_ports, sfn_offset); + mib_ret = srslte_ue_mib_decode(&q->ue_mib, bch_payload, nof_tx_ports, sfn_offset); } else { DEBUG("Resetting PBCH decoder after %d frames\n", q->ue_mib.frame_cnt); srslte_ue_mib_reset(&q->ue_mib); diff --git a/lib/src/phy/ue/ue_sync.c b/lib/src/phy/ue/ue_sync.c index f121c5bac..e9c02ca8b 100644 --- a/lib/src/phy/ue/ue_sync.c +++ b/lib/src/phy/ue/ue_sync.c @@ -359,6 +359,11 @@ uint32_t srslte_ue_sync_peak_idx(srslte_ue_sync_t *q) { return q->peak_idx; } +void srslte_ue_sync_set_cfo_ema(srslte_ue_sync_t *q, float ema) { + srslte_sync_set_cfo_ema_alpha(&q->sfind, ema); + srslte_sync_set_cfo_ema_alpha(&q->strack, ema); +} + srslte_ue_sync_state_t srslte_ue_sync_get_state(srslte_ue_sync_t *q) { return q->state; } diff --git a/lib/src/phy/ue/ue_ul.c b/lib/src/phy/ue/ue_ul.c index 03b53c40a..cb2a6277a 100644 --- a/lib/src/phy/ue/ue_ul.c +++ b/lib/src/phy/ue/ue_ul.c @@ -41,6 +41,7 @@ #define DEFAULT_CFO_TOL 50.0 // Hz int srslte_ue_ul_init(srslte_ue_ul_t *q, + cf_t *out_buffer, uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -50,8 +51,14 @@ int srslte_ue_ul_init(srslte_ue_ul_t *q, ret = SRSLTE_ERROR; bzero(q, sizeof(srslte_ue_ul_t)); - - if (srslte_ofdm_tx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { + + q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_PRB(max_prb) * sizeof(cf_t)); + if (!q->sf_symbols) { + perror("malloc"); + goto clean_exit; + } + + if (srslte_ofdm_tx_init(&q->fft, SRSLTE_CP_NORM, q->sf_symbols, out_buffer, max_prb)) { fprintf(stderr, "Error initiating FFT\n"); goto clean_exit; } @@ -83,11 +90,6 @@ int srslte_ue_ul_init(srslte_ue_ul_t *q, fprintf(stderr, "Error initiating srslte_refsignal_ul\n"); goto clean_exit; } - q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_PRB(max_prb) * sizeof(cf_t)); - if (!q->sf_symbols) { - perror("malloc"); - goto clean_exit; - } q->refsignal = srslte_vec_malloc(2 * SRSLTE_NRE * max_prb * sizeof(cf_t)); if (!q->refsignal) { perror("malloc"); @@ -360,7 +362,7 @@ int srslte_ue_ul_pucch_encode(srslte_ue_ul_t *q, srslte_uci_data_t uci_data, q->last_pucch_format = format; - srslte_ofdm_tx_sf(&q->fft, q->sf_symbols, output_signal); + srslte_ofdm_tx_sf(&q->fft); if (q->cfo_en) { srslte_cfo_correct(&q->cfo, output_signal, output_signal, q->current_cfo / srslte_symbol_sz(q->cell.nof_prb)); @@ -430,7 +432,7 @@ int srslte_ue_ul_srs_encode(srslte_ue_ul_t *q, uint32_t tti, cf_t *output_signal } } - srslte_ofdm_tx_sf(&q->fft, q->sf_symbols, output_signal); + srslte_ofdm_tx_sf(&q->fft); if (q->cfo_en) { srslte_cfo_correct(&q->cfo, output_signal, output_signal, q->current_cfo / srslte_symbol_sz(q->cell.nof_prb)); @@ -499,7 +501,7 @@ int srslte_ue_ul_pusch_encode_rnti_softbuffer(srslte_ue_ul_t *q, } } - srslte_ofdm_tx_sf(&q->fft, q->sf_symbols, output_signal); + srslte_ofdm_tx_sf(&q->fft); if (q->cfo_en) { srslte_cfo_correct(&q->cfo, output_signal, output_signal, q->current_cfo / srslte_symbol_sz(q->cell.nof_prb)); diff --git a/lib/src/phy/utils/bit.c b/lib/src/phy/utils/bit.c index 9ef53c35a..b1ae383a6 100644 --- a/lib/src/phy/utils/bit.c +++ b/lib/src/phy/utils/bit.c @@ -31,6 +31,12 @@ #include #include +#ifdef LV_HAVE_SSE + +#include + +#endif /* LV_HAVE_SSE */ + #include "srslte/phy/utils/bit.h" void srslte_bit_interleave(uint8_t *input, uint8_t *output, uint16_t *interleaver, uint32_t nof_bits) { @@ -53,6 +59,125 @@ void srslte_bit_interleave_w_offset(uint8_t *input, uint8_t *output, uint16_t *i } w_offset_p=8-w_offset; } +#ifdef LV_HAVE_SSE + __m64 m64mask = _mm_setr_pi8((uint8_t) 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1); + __m128i m128mask = _mm_set1_epi64(m64mask); + + union { + uint8_t v[8]; + __m64 m64; + } a, b, c; + + union { + __m128i m128; + uint16_t u16[8]; + uint8_t u8[16]; + struct { + __m64 reg_a; + __m64 reg_b; + } m64; + struct { + uint16_t i0, i1, i2, i3, i4, i5, i6, i7; + } v; + } ipx, epx, ipx2, epx2, b128, a128, c128; + + uint32_t i = st; + for (; i < (nof_bits / 8 - 1); i += 2) { + ipx.m128 = _mm_loadu_si128((__m128i *) (interleaver + (i * 8) - w_offset_p)); + epx.m128 = _mm_shuffle_epi8(ipx.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + ipx2.m128 = _mm_loadu_si128((__m128i *) (interleaver + ((i + 1) * 8) - w_offset_p)); + epx2.m128 = _mm_shuffle_epi8(ipx2.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + + epx.m64.reg_b = epx2.m64.reg_a; + + b128.m128 = _mm_and_si128(epx.m128, _mm_set1_epi8(0x7)); + b128.m128 = _mm_shuffle_epi8(m128mask, b128.m128); + + ipx.m128 = _mm_srli_epi16(ipx.m128, 3); + ipx2.m128 = _mm_srli_epi16(ipx2.m128, 3); + + a128.m128 = _mm_set_epi8(input[ipx2.v.i0], + input[ipx2.v.i1], + input[ipx2.v.i2], + input[ipx2.v.i3], + input[ipx2.v.i4], + input[ipx2.v.i5], + input[ipx2.v.i6], + input[ipx2.v.i7], + input[ipx.v.i0], + input[ipx.v.i1], + input[ipx.v.i2], + input[ipx.v.i3], + input[ipx.v.i4], + input[ipx.v.i5], + input[ipx.v.i6], + input[ipx.v.i7]); + + c128.m128 = _mm_cmpeq_epi8(_mm_and_si128(a128.m128, b128.m128), b128.m128); + uint16_t o = (uint16_t) _mm_movemask_epi8(c128.m128); + *((uint16_t *) (output + i)) = o; + } + + for (; i < nof_bits / 8; i++) { + ipx.m128 = _mm_loadu_si128((__m128i *) (interleaver + i * 8 - w_offset_p)); + epx.m128 = _mm_shuffle_epi8(ipx.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + b.m64 = _mm_and_si64(epx.m64.reg_a, _mm_set1_pi8(0x7)); + b.m64 = _mm_shuffle_pi8(m64mask, b.m64); + + ipx.m128 = _mm_srli_epi16(ipx.m128, 3); + + a.m64 = _mm_set_pi8(input[ipx.v.i0], + input[ipx.v.i1], + input[ipx.v.i2], + input[ipx.v.i3], + input[ipx.v.i4], + input[ipx.v.i5], + input[ipx.v.i6], + input[ipx.v.i7]); + + c.m64 = _mm_cmpeq_pi8(_mm_and_si64(a.m64, b.m64), b.m64); + output[i] = (uint8_t) _mm_movemask_pi8(c.m64); + } + +#if 0 + /* THIS PIECE OF CODE IS FOR CHECKING SIMD BEHAVIOUR. DO NOT ENABLE. */ + uint8_t *output2 = malloc(nof_bits/8); + for (i=st;i #include +#include #include "srslte/phy/utils/mat.h" diff --git a/lib/src/phy/utils/test/CMakeLists.txt b/lib/src/phy/utils/test/CMakeLists.txt index 4dccbf2a0..1f5c66827 100644 --- a/lib/src/phy/utils/test/CMakeLists.txt +++ b/lib/src/phy/utils/test/CMakeLists.txt @@ -42,3 +42,7 @@ target_link_libraries(algebra_test srslte_phy) add_test(algebra_2x2_zf_solver_test algebra_test -z) add_test(algebra_2x2_mmse_solver_test algebra_test -m) + +add_executable(vector_test vector_test.c) +target_link_libraries(vector_test srslte_phy) +add_test(vector_test vector_test) diff --git a/lib/src/phy/utils/test/mat_test.c b/lib/src/phy/utils/test/mat_test.c index 49be5c9ae..0bfb482a9 100644 --- a/lib/src/phy/utils/test/mat_test.c +++ b/lib/src/phy/utils/test/mat_test.c @@ -29,16 +29,21 @@ #include #include #include -#include #include #include "srslte/phy/utils/mat.h" +#include "srslte/phy/utils/simd.h" +#include "srslte/phy/utils/vector.h" bool zf_solver = false; bool mmse_solver = false; bool verbose = false; +#define RANDOM_F() ((float)rand())/((float)RAND_MAX) +#define RANDOM_S() ((int16_t)(rand() && 0x800F)) +#define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F()) + double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { if (ts_end->tv_usec > ts_start->tv_usec) { return ((double) ts_end->tv_sec - (double) ts_start->tv_sec) * 1000000 + @@ -49,16 +54,16 @@ double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { } } -#define NOF_REPETITIONS 1000 +#define BLOCK_SIZE 1000 #define RUN_TEST(FUNCTION) /*TYPE NAME (void)*/ { \ int i;\ struct timeval start, end;\ gettimeofday(&start, NULL); \ bool ret = true; \ - for (i = 0; i < NOF_REPETITIONS; i++) {ret &= FUNCTION ();}\ + for (i = 0; i < BLOCK_SIZE; i++) {ret &= FUNCTION ();}\ gettimeofday(&end, NULL);\ if (verbose) printf("%32s: %s ... %6.2f us/call\n", #FUNCTION, (ret)?"Pass":"Fail", \ - elapsed_us(&start, &end)/NOF_REPETITIONS);\ + elapsed_us(&start, &end)/BLOCK_SIZE);\ passed &= ret;\ } @@ -373,6 +378,24 @@ bool test_mmse_solver_avx(void) { #endif /* LV_HAVE_AVX */ +bool test_vec_dot_prod_ccc(void) { + __attribute__((aligned(256))) cf_t a[14]; + __attribute__((aligned(256))) cf_t b[14]; + cf_t res = 0, gold = 0; + + for (int i = 0; i < 14; i++) { + a[i] = RANDOM_CF(); + b[i] = RANDOM_CF(); + } + + res = srslte_vec_dot_prod_ccc(a, b, 14); + + for (int i=0;i<14;i++) { + gold += a[i]*b[i]; + } + + return (cabsf(res - gold) < 1e-3); +} int main(int argc, char **argv) { bool passed = true; @@ -405,6 +428,8 @@ int main(int argc, char **argv) { #endif /* LV_HAVE_AVX */ } + RUN_TEST(test_vec_dot_prod_ccc); + printf("%s!\n", (passed) ? "Ok" : "Failed"); if (!passed) { diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c new file mode 100644 index 000000000..4ebed9862 --- /dev/null +++ b/lib/src/phy/utils/test/vector_test.c @@ -0,0 +1,822 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "srslte/phy/utils/mat.h" +#include "srslte/phy/utils/simd.h" +#include "srslte/phy/utils/vector.h" + + +bool zf_solver = false; +bool mmse_solver = false; +bool verbose = false; + +#define MAX_MSE (1e-3) +#define NOF_REPETITIONS (1024) +#define MAX_FUNCTIONS (64) +#define MAX_BLOCKS (16) + +#define RANDOM_F() ((float)rand())/((float)RAND_MAX) +#define RANDOM_S() ((int16_t)(rand() && 0x800F)) +#define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F()) + +#define TEST_CALL(TEST_CODE) gettimeofday(&start, NULL);\ + for (int i = 0; i < NOF_REPETITIONS; i++){TEST_CODE;}\ + gettimeofday(&end, NULL); \ + *timing = elapsed_us(&start, &end); + +#define TEST(X, CODE) static bool test_##X (char *func_name, double *timing, uint32_t block_size) {\ + struct timeval start, end;\ + float mse = 0.0f;\ + bool passed;\ + strncpy(func_name, #X, 32);\ + CODE;\ + passed = (mse < MAX_MSE);\ + printf("%32s (%5d) ... %7.1f MSamp/s ... %3s Passed (%.6f)\n", func_name, block_size, \ + (double) block_size*NOF_REPETITIONS/ *timing, passed?"":"Not", mse);\ + return passed;\ +} + +#define MALLOC(TYPE, NAME) TYPE *NAME = srslte_vec_malloc(sizeof(TYPE)*block_size) + + +static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { + if (ts_end->tv_usec > ts_start->tv_usec) { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec) * 1000000 + + (double) ts_end->tv_usec - (double) ts_start->tv_usec; + } else { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec - 1) * 1000000 + + ((double) ts_end->tv_usec + 1000000) - (double) ts_start->tv_usec; + } +} + +float squared_error (cf_t a, cf_t b) { + float diff_re = __real__ a - __real__ b; + float diff_im = __imag__ a - __imag__ b; + return diff_re*diff_re + diff_im*diff_im; +} + +TEST(srslte_vec_acc_ff, + MALLOC(float, x); + float z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + } + + TEST_CALL(z = srslte_vec_acc_ff(x, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i]; + } + + mse += fabs(gold - z) / gold; + + free(x); +) + +TEST(srslte_vec_dot_prod_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + int16_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(z = srslte_vec_dot_prod_sss(x, y, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i] * y[i]; + } + + mse += cabsf(gold - z) / cabsf(gold); + + free(x); + free(y); +) + +TEST(srslte_vec_sum_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + MALLOC(int16_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(srslte_vec_sum_sss(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] + y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_sub_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + MALLOC(int16_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(srslte_vec_sub_sss(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] - y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_prod_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + MALLOC(int16_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(srslte_vec_prod_sss(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_acc_cc, + MALLOC(cf_t, x); + cf_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + } + + TEST_CALL(z = srslte_vec_acc_cc(x, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i]; + } + + mse += cabsf(gold - z)/cabsf(gold); + + free(x); +) + + +TEST(srslte_vec_sum_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_sum_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] + y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); +) + +TEST(srslte_vec_sub_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_sub_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] - y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); +) + +TEST(srslte_vec_dot_prod_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + cf_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(z = srslte_vec_dot_prod_ccc(x, y, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i] * y[i]; + } + + mse = cabsf(gold - z) / cabsf(gold); + + free(x); + free(y); +) + +TEST(srslte_vec_dot_prod_conj_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + cf_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(z = srslte_vec_dot_prod_conj_ccc(x, y, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i] * conjf(y[i]); + } + + mse = cabsf(gold - z) / cabsf(gold); + + free(x); + free(y); +) + +TEST(srslte_vec_prod_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_prod_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_prod_ccc_split, + MALLOC(float, x_re); + MALLOC(float, x_im); + MALLOC(float, y_re); + MALLOC(float, y_im); + MALLOC(float, z_re); + MALLOC(float, z_im); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x_re[i] = RANDOM_F(); + x_im[i] = RANDOM_F(); + y_re[i] = RANDOM_F(); + y_im[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_prod_ccc_split(x_re, x_im, y_re, y_im, z_re, z_im, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = (x_re[i] + I * x_im[i]) * (y_re[i] + I * y_im[i]); + mse += cabsf(gold - (z_re[i] + I*z_im[i])); + } + + free(x_re); + free(x_im); + free(y_re); + free(y_im); + free(z_re); + free(z_im); +) + +TEST(srslte_vec_prod_conj_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_prod_conj_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * conjf(y[i]); + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_sc_prod_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, z); + cf_t y = RANDOM_CF(); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_sc_prod_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_convert_fi, + MALLOC(float, x); + MALLOC(short, z); + float scale = 1000.0f; + + short gold; + for (int i = 0; i < block_size; i++) { + x[i] = (float) RANDOM_F(); + } + + TEST_CALL(srslte_vec_convert_fi(x, z, scale, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = (short) ((x[i] * scale)); + mse += cabsf((float)gold - (float) z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_prod_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_prod_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_prod_cfc, + MALLOC(cf_t, x); + MALLOC(float, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_prod_cfc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_sc_prod_fff, + MALLOC(float, x); + MALLOC(float, z); + float y = RANDOM_F(); + + float gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_sc_prod_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_abs_cf, + MALLOC(cf_t, x); + MALLOC(float, z); + float gold; + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_abs_cf(x, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = sqrtf(crealf(x[i]) * crealf(x[i]) + cimagf(x[i])*cimagf(x[i])); + mse += cabsf(gold - z[i])/block_size; + } + + free(x); + free(z); +) + +TEST(srslte_vec_abs_square_cf, + MALLOC(cf_t, x); + MALLOC(float, z); + float gold; + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_abs_square_cf(x, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = crealf(x[i]) * crealf(x[i]) + cimagf(x[i])*cimagf(x[i]); + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_sc_prod_cfc, + MALLOC(cf_t, x); + MALLOC(cf_t, z); + cf_t gold; + float h = RANDOM_F(); + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_sc_prod_cfc(x, h, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * h; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_div_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_div_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] / y[i]; + mse += cabsf(gold - z[i]); + } + mse /= block_size; + + free(x); + free(y); + free(z); +) + + +TEST(srslte_vec_div_cfc, + MALLOC(cf_t, x); + MALLOC(float, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_div_cfc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] / y[i]; + mse += cabsf(gold - z[i])/cabsf(gold); + } + mse /= block_size; + + free(x); + free(y); + free(z); +) + + +TEST(srslte_vec_div_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_div_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] / y[i]; + mse += cabsf(gold - z[i]); + } + mse /= block_size; + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_max_fi, + MALLOC(float, x); + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + } + + uint32_t max_index = 0; + TEST_CALL(max_index = srslte_vec_max_fi(x, block_size);) + + float gold_value = -INFINITY; + uint32_t gold_index = 0; + for (int i = 0; i < block_size; i++) { + if (gold_value < x[i]) { + gold_value = x[i]; + gold_index = i; + } + } + mse = (gold_index != max_index) ? 1:0; + + free(x); +) + +TEST(srslte_vec_max_abs_ci, + MALLOC(cf_t, x); + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + uint32_t max_index = 0; + TEST_CALL(max_index = srslte_vec_max_abs_ci(x, block_size);) + + float gold_value = -INFINITY; + uint32_t gold_index = 0; + for (int i = 0; i < block_size; i++) { + cf_t a = x[i]; + float abs2 = __real__ a * __real__ a + __imag__ a * __imag__ a; + if (abs2 > gold_value) { + gold_value = abs2; + gold_index = (uint32_t)i; + } + } + mse = (gold_index != max_index) ? 1:0; + + free(x); +) + +int main(int argc, char **argv) { + char func_names[MAX_FUNCTIONS][32]; + double timmings[MAX_FUNCTIONS][MAX_BLOCKS]; + uint32_t sizes[32]; + uint32_t size_count = 0; + uint32_t func_count = 0; + bool passed[MAX_FUNCTIONS][MAX_BLOCKS]; + bool all_passed = true; + + for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) { + func_count = 0; + + passed[func_count][size_count] = test_srslte_vec_acc_ff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_dot_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sum_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sub_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_acc_cc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sum_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sub_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_dot_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_dot_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_convert_fi(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_ccc_split(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sc_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sc_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_abs_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_abs_square_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sc_prod_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_div_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_div_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_div_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_max_fi(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_max_abs_ci(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + sizes[size_count] = block_size; + size_count++; + } + + char fname[68]; + FILE *f = NULL; + void * p = popen("(date +%g%m%d && hostname) | tr '\\r\\n' '__'", "r"); + if (p) { + fgets(fname, 64, p); + strncpy(fname + strnlen(fname, 64) - 1, ".tsv", 4); + f = fopen(fname, "w"); + if (f) printf("Saving benchmark results in '%s'\n", fname); + } + pclose(p); + + + printf("\n"); + printf("%32s |", "Subroutine/MSps"); + if (f) fprintf(f, "Subroutine/MSps Vs Vector size\t"); + for (int i = 0; i < size_count; i++) { + printf(" %7d", sizes[i]); + if (f) fprintf(f, "%d\t", sizes[i]); + } + printf(" |\n"); + if (f) fprintf(f, "\n"); + + for (int j = 0; j < 32; j++) { + printf("-"); + } + printf("-+-"); + for (int j = 0; j < size_count; j++) { + printf("--------"); + } + printf("-|\n"); + + for (int i = 0; i < func_count; i++) { + printf("%32s | ", func_names[i]); + if (f) fprintf(f, "%s\t", func_names[i]); + + for (int j = 0; j < size_count; j++) { + printf(" %s%7.1f\x1b[0m", (passed[i][j])?"":"\x1B[31m", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + if (f) fprintf(f, "%.1f\t", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + + all_passed &= passed[i][j]; + } + printf(" |\n"); + if (f) fprintf(f, "\n"); + } + + if (f) fclose(f); + + return (all_passed)?SRSLTE_SUCCESS:SRSLTE_ERROR; +} diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index d78f5d707..3bb7fb08f 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -36,48 +36,10 @@ #include "srslte/phy/utils/bit.h" -#ifdef LV_HAVE_SSE -#include -#endif - -#ifdef LV_HAVE_AVX -#include -#endif - - -#ifdef HAVE_VOLK -#include "volk/volk.h" -#endif - -#ifdef DEBUG_MODE -#warning FIXME: Disabling SSE/AVX vector code -#undef LV_HAVE_SSE -#undef LV_HAVE_AVX -#endif - - -int srslte_vec_acc_ii(int *x, uint32_t len) { - int i; - int z=0; - for (i=0;im) { - m=x[i]; - p=i; - } - } - return p; -#endif -#endif -} - -int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) { -#ifdef HAVE_VOLK_MAX_STAR_S_FUNCTION - int16_t target=0; - volk_16i_max_star_16i(&target,x,len); - return target; - -#else - uint32_t i; - int16_t m=-INT16_MIN; - for (i=0;im) { - m=x[i]; - } - } - return m; -#endif -} - -int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) { - uint32_t i; - int16_t m=-INT16_MIN; - for (i=0;im) { - m=abs(x[i]); - } - } - return m; + return srslte_vec_max_fi_simd(x, len); } -void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) { -#ifdef HAVE_VOLK_MAX_VEC_FUNCTION - volk_32f_x2_max_32f(z,x,y,len); -#else - uint32_t i; - for (i=0;i y[i]) { - z[i] = x[i]; - } else { - z[i] = y[i]; - } - } -#endif -} - - +// CP autocorr uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) { -#ifdef HAVE_VOLK_MAX_ABS_FUNCTION_32 - uint32_t target=0; - volk_32fc_index_max_32u(&target,x,len); - return target; -#else -#ifdef HAVE_VOLK_MAX_ABS_FUNCTION_16 - uint32_t target=0; - volk_32fc_index_max_16u(&target,x,len); - return target; -#else - uint32_t i; - float m=-FLT_MAX; - uint32_t p=0; - float tmp; - for (i=0;im) { - m=tmp; - p=i; - } - } - return p; -#endif -#endif + return srslte_vec_max_ci_simd(x, len); } void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len) { @@ -863,23 +393,5 @@ void srslte_vec_quant_suc(int16_t *in, uint8_t *out, float gain, int16_t offset, } void srs_vec_cf_cpy(cf_t *dst, cf_t *src, int len) { - int i = 0; - -#ifdef LV_HAVE_AVX - for (; i < len - 3; i += 4) { - _mm256_store_ps((float *) &dst[i], _mm256_load_ps((float *) &src[i])); - } -#endif /* LV_HAVE_AVX */ -#ifdef LV_HAVE_SSE - for (; i < len - 1; i += 2) { - _mm_store_ps((float *) &dst[i], _mm_load_ps((float *) &src[i])); - } - for (; i < len; i++) { - ((__m64*) dst)[i] = ((__m64*) src)[i]; - } -#else - for (; i < len; i++) { - dst[i] = src[i]; - } -#endif /* LV_HAVE_SSE */ + srslte_vec_cp_simd(dst, src, len); } diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 7be4afc44..ab281a653 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -25,475 +25,1080 @@ */ -#include #include #include #include #include +#include +#include +#include #include "srslte/phy/utils/vector_simd.h" +#include "srslte/phy/utils/simd.h" -#include -#include -#ifdef LV_HAVE_SSE -#include -#endif +int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len) { + int i = 0; + int result = 0; +#if SRSLTE_SIMD_S_SIZE + simd_s_t simd_dotProdVal = srslte_simd_s_zero(); + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); -#ifdef LV_HAVE_AVX -#include -#endif + simd_s_t z = srslte_simd_s_mul(a, b); + simd_dotProdVal = srslte_simd_s_add(simd_dotProdVal, z); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); -int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len) -{ - int result = 0; -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 8; + simd_s_t z = srslte_simd_s_mul(a, b); - const __m128i* xPtr = (const __m128i*) x; - const __m128i* yPtr = (const __m128i*) y; - - __m128i dotProdVal = _mm_setzero_si128(); + simd_dotProdVal = srslte_simd_s_add(simd_dotProdVal, z); + } + } + __attribute__ ((aligned (SRSLTE_SIMD_S_SIZE*2))) short dotProdVector[SRSLTE_SIMD_S_SIZE]; + srslte_simd_s_store(dotProdVector, simd_dotProdVal); + for (int k = 0; k < SRSLTE_SIMD_S_SIZE; k++) { + result += dotProdVector[k]; + } +#endif /* SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + result += (x[i] * y[i]); + } - __m128i xVal, yVal, zVal; - for(;number < points; number++){ + return result; +} - xVal = _mm_load_si128(xPtr); - yVal = _mm_loadu_si128(yPtr); +void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { + int i = 0; +#if SRSLTE_SIMD_S_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); - zVal = _mm_mullo_epi16(xVal, yVal); + simd_s_t r = srslte_simd_s_add(a, b); + + srslte_simd_s_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); - dotProdVal = _mm_add_epi16(dotProdVal, zVal); + simd_s_t r = srslte_simd_s_add(a, b); - xPtr ++; - yPtr ++; + srslte_simd_s_storeu(&z[i], r); + } } - - short dotProdVector[8]; - _mm_store_si128((__m128i*) dotProdVector, dotProdVal); - for (int i=0;i<8;i++) { - result += dotProdVector[i]; +#endif /* SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + z[i] = x[i] + y[i]; } +} + +void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { + int i = 0; +#if SRSLTE_SIMD_S_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); - number = points * 8; - for(;number < len; number++){ - result += (x[number] * y[number]); + simd_s_t r = srslte_simd_s_sub(a, b); + + srslte_simd_s_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); + + simd_s_t r = srslte_simd_s_sub(a, b); + + srslte_simd_s_storeu(&z[i], r); + } + } +#endif /* SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + z[i] = x[i] - y[i]; } - -#endif - return result; } +void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { + int i = 0; +#if SRSLTE_SIMD_S_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); -int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len) -{ - int result = 0; -#ifdef LV_HAVE_AVX2 - unsigned int number = 0; - const unsigned int points = len / 16; + simd_s_t r = srslte_simd_s_mul(a, b); - const __m256i* xPtr = (const __m256i*) x; - const __m256i* yPtr = (const __m256i*) y; - - __m256i dotProdVal = _mm256_setzero_si256(); + srslte_simd_s_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); - __m256i xVal, yVal, zVal; - for(;number < points; number++){ + simd_s_t r = srslte_simd_s_mul(a, b); - xVal = _mm256_load_si256(xPtr); - yVal = _mm256_loadu_si256(yPtr); - zVal = _mm256_mullo_epi16(xVal, yVal); - dotProdVal = _mm256_add_epi16(dotProdVal, zVal); - xPtr ++; - yPtr ++; + srslte_simd_s_storeu(&z[i], r); + } } - - __attribute__ ((aligned (256))) short dotProdVector[16]; - _mm256_store_si256((__m256i*) dotProdVector, dotProdVal); - for (int i=0;i<16;i++) { - result += dotProdVector[i]; +#endif /* SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + z[i] = x[i] * y[i]; } +} - number = points * 16; - for(;number < len; number++){ - result += (x[number] * y[number]); +/* No improvement with AVX */ +void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, int len) { + int i = 0; +#ifdef LV_HAVE_SSE +#if CMAKE_BUILD_TYPE!=Debug + + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(lut)) { + for (; i < len - 7; i += 8) { + __m128i xVal = _mm_load_si128((__m128i *) &x[i]); + __m128i lutVal = _mm_load_si128((__m128i *) &lut[i]); + + for (int k = 0; k < 8; k++) { + int16_t x = (int16_t) _mm_extract_epi16(xVal, k); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, k); + y[l] = (short) x; + } + } + } else { + for (; i < len - 7; i += 8) { + __m128i xVal = _mm_loadu_si128((__m128i *) &x[i]); + __m128i lutVal = _mm_loadu_si128((__m128i *) &lut[i]); + + for (int k = 0; k < 8; k++) { + int16_t x = (int16_t) _mm_extract_epi16(xVal, k); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, k); + y[l] = (short) x; + } + } } - #endif - return result; +#endif + + for (; i < len; i++) { + y[lut[i]] = x[i]; + } } +/* Modified from volk_32f_s32f_convert_16i_a_simd2. Removed clipping */ +void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, int len) { + int i = 0; +#if SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE + simd_f_t s = srslte_simd_f_set1(scale); + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_f_t a = srslte_simd_f_load(&x[i]); + simd_f_t b = srslte_simd_f_load(&x[i + SRSLTE_SIMD_F_SIZE]); -void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len) -{ -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 8; + simd_f_t sa = srslte_simd_f_mul(a, s); + simd_f_t sb = srslte_simd_f_mul(b, s); + + simd_s_t i16 = srslte_simd_convert_2f_s(sa, sb); + + srslte_simd_s_store(&z[i], i16); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_f_t a = srslte_simd_f_loadu(&x[i]); + simd_f_t b = srslte_simd_f_loadu(&x[i + SRSLTE_SIMD_F_SIZE]); - const __m128i* xPtr = (const __m128i*) x; - const __m128i* yPtr = (const __m128i*) y; - __m128i* zPtr = (__m128i*) z; + simd_f_t sa = srslte_simd_f_mul(a, s); + simd_f_t sb = srslte_simd_f_mul(b, s); - __m128i xVal, yVal, zVal; - for(;number < points; number++){ + simd_s_t i16 = srslte_simd_convert_2f_s(sa, sb); - xVal = _mm_load_si128(xPtr); - yVal = _mm_load_si128(yPtr); + srslte_simd_s_storeu(&z[i], i16); + } + } +#endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + z[i] = (int16_t) (x[i] * scale); + } +} - zVal = _mm_add_epi16(xVal, yVal); +float srslte_vec_acc_ff_simd(float *x, int len) { + int i = 0; + float acc_sum = 0.0f; - _mm_store_si128(zPtr, zVal); +#if SRSLTE_SIMD_F_SIZE + simd_f_t simd_sum = srslte_simd_f_zero(); - xPtr ++; - yPtr ++; - zPtr ++; + if (SRSLTE_IS_ALIGNED(x)) { + for (; i < len - SRSLTE_SIMD_F_SIZE + 1; i += SRSLTE_SIMD_F_SIZE) { + simd_f_t a = srslte_simd_f_load(&x[i]); + + simd_sum = srslte_simd_f_add(simd_sum, a); + } + } else { + for (; i < len - SRSLTE_SIMD_F_SIZE + 1; i += SRSLTE_SIMD_F_SIZE) { + simd_f_t a = srslte_simd_f_loadu(&x[i]); + + simd_sum = srslte_simd_f_add(simd_sum, a); + } } - number = points * 8; - for(;number < len; number++){ - z[number] = x[number] + y[number]; + __attribute__((aligned(SRSLTE_SIMD_F_SIZE*4))) float sum[SRSLTE_SIMD_F_SIZE]; + srslte_simd_f_store(sum, simd_sum); + for (int k = 0; k < SRSLTE_SIMD_F_SIZE; k++) { + acc_sum += sum[k]; } #endif + for (; i max_value) { + max_value = values_buffer[k]; + max_index = (uint32_t) indexes_buffer[k]; + } + } +#endif /* SRSLTE_SIMD_I_SIZE */ + + for (; i < len; i++) { + if (x[i] > max_value) { + max_value = x[i]; + max_index = (uint32_t)i; + } + } + + return max_index; +} + +uint32_t srslte_vec_max_ci_simd(cf_t *x, int len) { + int i = 0; + + float max_value = -INFINITY; + uint32_t max_index = 0; + +#if SRSLTE_SIMD_I_SIZE + __attribute__ ((aligned (SRSLTE_SIMD_I_SIZE*sizeof(int)))) int indexes_buffer[SRSLTE_SIMD_I_SIZE] = {0}; + __attribute__ ((aligned (SRSLTE_SIMD_I_SIZE*sizeof(float)))) float values_buffer[SRSLTE_SIMD_I_SIZE] = {0}; + + for (int k = 0; k < SRSLTE_SIMD_I_SIZE; k++) indexes_buffer[k] = k; + simd_i_t simd_inc = srslte_simd_i_set1(SRSLTE_SIMD_I_SIZE); + simd_i_t simd_indexes = srslte_simd_i_load(indexes_buffer); + simd_i_t simd_max_indexes = srslte_simd_i_set1(0); + + simd_f_t simd_max_values = srslte_simd_f_set1(-INFINITY); + + if (SRSLTE_IS_ALIGNED(x)) { + for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { + simd_f_t x1 = srslte_simd_f_load((float *) &x[i]); + simd_f_t x2 = srslte_simd_f_load((float *) &x[i + SRSLTE_SIMD_F_SIZE / 2]); + + simd_f_t mul1 = srslte_simd_f_mul(x1, x1); + simd_f_t mul2 = srslte_simd_f_mul(x2, x2); + + simd_f_t z1 = srslte_simd_f_hadd(mul1, mul2); + + simd_sel_t res = srslte_simd_f_max(z1, simd_max_values); + + simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); + simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) z1, res); + simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc); + } + } else { + for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { + simd_f_t x1 = srslte_simd_f_loadu((float *) &x[i]); + simd_f_t x2 = srslte_simd_f_loadu((float *) &x[i + SRSLTE_SIMD_F_SIZE / 2]); + + simd_f_t mul1 = srslte_simd_f_mul(x1, x1); + simd_f_t mul2 = srslte_simd_f_mul(x2, x2); + + simd_f_t z1 = srslte_simd_f_hadd(mul1, mul2); + + simd_sel_t res = srslte_simd_f_max(z1, simd_max_values); + + simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); + simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) z1, res); + simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc); + } + } + + srslte_simd_i_store(indexes_buffer, simd_max_indexes); + srslte_simd_f_store(values_buffer, simd_max_values); + + for (int k = 0; k < SRSLTE_SIMD_I_SIZE; k++) { + if (values_buffer[k] > max_value) { + max_value = values_buffer[k]; + max_index = (uint32_t) indexes_buffer[k]; + } + } +#endif /* SRSLTE_SIMD_I_SIZE */ + + for (; i < len; i++) { + cf_t a = x[i]; + float abs2 = __real__ a * __real__ a + __imag__ a * __imag__ a; + if (abs2 > max_value) { + max_value = abs2; + max_index = (uint32_t)i; + } + } + + return max_index; } diff --git a/lib/src/radio/radio.cc b/lib/src/radio/radio.cc index c6abb8642..cfa83caf3 100644 --- a/lib/src/radio/radio.cc +++ b/lib/src/radio/radio.cc @@ -63,10 +63,10 @@ bool radio::init(char *args, char *devname, uint32_t nof_channels) } if (args) { - strncpy(saved_args, args, 128); + strncpy(saved_args, args, 127); } if (devname) { - strncpy(saved_devname, devname, 128); + strncpy(saved_devname, devname, 127); } saved_nof_channels = nof_channels; diff --git a/lib/src/radio/radio_multi.cc b/lib/src/radio/radio_multi.cc index 2ec7f891c..664458c75 100644 --- a/lib/src/radio/radio_multi.cc +++ b/lib/src/radio/radio_multi.cc @@ -31,10 +31,10 @@ bool radio_multi::init_multi(uint32_t nof_rx_antennas, char* args, char* devname } if (args) { - strncpy(saved_args, args, 128); + strncpy(saved_args, args, 127); } if (devname) { - strncpy(saved_devname, devname, 128); + strncpy(saved_devname, devname, 127); } return true; diff --git a/lib/src/upper/rlc_am.cc b/lib/src/upper/rlc_am.cc index b62a7e88e..96b231a09 100644 --- a/lib/src/upper/rlc_am.cc +++ b/lib/src/upper/rlc_am.cc @@ -613,7 +613,7 @@ int rlc_am::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) uint32_t pdu_space = nof_bytes; uint8_t *pdu_ptr = pdu->msg; - if(pdu_space <= head_len) + if(pdu_space <= head_len + 1) { log->warning("%s Cannot build a PDU - %d bytes available, %d bytes required for header\n", rrc->get_rb_name(lcid).c_str(), nof_bytes, head_len); @@ -652,7 +652,7 @@ int rlc_am::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) } // Pull SDUs from queue - while(pdu_space > head_len && tx_sdu_queue.size() > 0) + while(pdu_space > head_len + 1 && tx_sdu_queue.size() > 0) { if(last_li > 0) header.li[header.N_li++] = last_li; diff --git a/lib/src/upper/rlc_um.cc b/lib/src/upper/rlc_um.cc index b2697178c..d95e186e7 100644 --- a/lib/src/upper/rlc_um.cc +++ b/lib/src/upper/rlc_um.cc @@ -277,7 +277,7 @@ int rlc_um::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) int head_len = rlc_um_packed_length(&header); int pdu_space = nof_bytes; - if(pdu_space <= head_len) + if(pdu_space <= head_len + 1) { log->warning("%s Cannot build a PDU - %d bytes available, %d bytes required for header\n", rrc->get_rb_name(lcid).c_str(), nof_bytes, head_len); @@ -309,7 +309,7 @@ int rlc_um::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) } // Pull SDUs from queue - while(pdu_space > head_len && tx_sdu_queue.size() > 0) + while(pdu_space > head_len + 1 && tx_sdu_queue.size() > 0) { log->debug("pdu_space=%d, head_len=%d\n", pdu_space, head_len); if(last_li > 0) diff --git a/lib/test/CMakeLists.txt b/lib/test/CMakeLists.txt index a1af8153c..72e9e9100 100644 --- a/lib/test/CMakeLists.txt +++ b/lib/test/CMakeLists.txt @@ -18,5 +18,6 @@ # and at http://www.gnu.org/licenses/. # +add_subdirectory(asn1) add_subdirectory(common) add_subdirectory(upper) diff --git a/lib/test/asn1/CMakeLists.txt b/lib/test/asn1/CMakeLists.txt new file mode 100644 index 000000000..6c52d1972 --- /dev/null +++ b/lib/test/asn1/CMakeLists.txt @@ -0,0 +1,23 @@ +# +# Copyright 2013-2017 Software Radio Systems Limited +# +# This file is part of srsLTE +# +# srsLTE is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of +# the License, or (at your option) any later version. +# +# srsLTE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# A copy of the GNU Affero General Public License can be found in +# the LICENSE file in the top-level directory of this distribution +# and at http://www.gnu.org/licenses/. +# + +add_executable(rrc_meas_test rrc_meas_test.cc) +target_link_libraries(rrc_meas_test srslte_common srslte_phy srslte_asn1) +add_test(rrc_meas_test rrc_meas_test) diff --git a/lib/test/asn1/rrc_meas_test.cc b/lib/test/asn1/rrc_meas_test.cc new file mode 100644 index 000000000..63a4d61b6 --- /dev/null +++ b/lib/test/asn1/rrc_meas_test.cc @@ -0,0 +1,91 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsUE library. + * + * srsUE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsUE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#include +#include +#include +#include "srslte/common/log_filter.h" +#include "srslte/asn1/liblte_rrc.h" + + +void basic_test() { + srslte::log_filter log1("RRC"); + log1.set_level(srslte::LOG_LEVEL_DEBUG); + log1.set_hex_limit(128); + + LIBLTE_BIT_MSG_STRUCT bit_buf; + LIBLTE_BIT_MSG_STRUCT bit_buf2; + LIBLTE_BYTE_MSG_STRUCT byte_buf; + LIBLTE_RRC_UL_DCCH_MSG_STRUCT ul_dcch_msg; + + uint32_t rrc_message_len = 18; + uint8_t rrc_message[] = {0x08, 0x10, 0x49, 0x3C, 0x0D, 0x97, 0x89, 0x83, + 0xC0, 0x84, 0x20, 0x82, 0x08, 0x21, 0x00, 0x01, + 0xBC, 0x48}; + + srslte_bit_unpack_vector(rrc_message, bit_buf.msg, rrc_message_len*8); + bit_buf.N_bits = rrc_message_len*8; + liblte_rrc_unpack_ul_dcch_msg((LIBLTE_BIT_MSG_STRUCT*)&bit_buf, &ul_dcch_msg); + + assert(ul_dcch_msg.msg_type == LIBLTE_RRC_UL_DCCH_MSG_TYPE_MEASUREMENT_REPORT); + LIBLTE_RRC_MEASUREMENT_REPORT_STRUCT *rep = &ul_dcch_msg.msg.measurement_report; + assert(rep->meas_id == 1); + assert(rep->pcell_rsrp_result == 73); + assert(rep->pcell_rsrq_result == 15); + assert(rep->have_meas_result_neigh_cells); + assert(rep->meas_result_neigh_cells_choice == LIBLTE_RRC_MEAS_RESULT_LIST_EUTRA); + LIBLTE_RRC_MEAS_RESULT_LIST_EUTRA_STRUCT *eutra = &rep->meas_result_neigh_cells.eutra; + assert(eutra->n_result == 1); + assert(eutra->result_eutra_list[0].phys_cell_id == 357); + assert(eutra->result_eutra_list[0].have_cgi_info); + assert(eutra->result_eutra_list[0].cgi_info.have_plmn_identity_list); + assert(eutra->result_eutra_list[0].cgi_info.cell_global_id.plmn_id.mcc == 0xF898); + assert(eutra->result_eutra_list[0].cgi_info.cell_global_id.plmn_id.mnc == 0xFF78); + assert(eutra->result_eutra_list[0].cgi_info.cell_global_id.cell_id == 0x1084104); + assert(eutra->result_eutra_list[0].cgi_info.tracking_area_code == 0x1042); + assert(eutra->result_eutra_list[0].cgi_info.have_plmn_identity_list); + assert(eutra->result_eutra_list[0].cgi_info.n_plmn_identity_list == 1); + assert(eutra->result_eutra_list[0].cgi_info.plmn_identity_list[0].mcc == 0xFFFF); + assert(eutra->result_eutra_list[0].cgi_info.plmn_identity_list[0].mnc == 0xFF00); + assert(eutra->result_eutra_list[0].meas_result.have_rsrp); + assert(eutra->result_eutra_list[0].meas_result.rsrp_result == 60); + assert(eutra->result_eutra_list[0].meas_result.have_rsrp); + assert(eutra->result_eutra_list[0].meas_result.rsrq_result == 18); + + liblte_rrc_pack_ul_dcch_msg(&ul_dcch_msg, (LIBLTE_BIT_MSG_STRUCT*)&bit_buf2); + srslte_bit_pack_vector(bit_buf2.msg, byte_buf.msg, bit_buf2.N_bits); + byte_buf.N_bytes = (bit_buf2.N_bits+7)/8; + log1.info_hex(byte_buf.msg, byte_buf.N_bytes, "UL_DCCH Packed message\n"); + + for(uint32_t i=0; i pending_ack; diff --git a/srsenb/hdr/phy/phch_worker.h b/srsenb/hdr/phy/phch_worker.h index 2d773345c..29483d961 100644 --- a/srsenb/hdr/phy/phch_worker.h +++ b/srsenb/hdr/phy/phch_worker.h @@ -75,12 +75,12 @@ private: void work_imp(); - int encode_pdsch(srslte_enb_dl_pdsch_t *grants, uint32_t nof_grants, uint32_t sf_idx); - int decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, uint32_t tti_rx); - int encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks, uint32_t sf_idx); - int encode_pdcch_dl(srslte_enb_dl_pdsch_t *grants, uint32_t nof_grants, uint32_t sf_idx); - int encode_pdcch_ul(srslte_enb_ul_pusch_t *grants, uint32_t nof_grants, uint32_t sf_idx); - int decode_pucch(uint32_t tti_rx); + int encode_pdsch(srslte_enb_dl_pdsch_t *grants, uint32_t nof_grants); + int decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch); + int encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks); + int encode_pdcch_dl(srslte_enb_dl_pdsch_t *grants, uint32_t nof_grants); + int encode_pdcch_ul(srslte_enb_ul_pusch_t *grants, uint32_t nof_grants); + int decode_pucch(); /* Common objects */ @@ -91,8 +91,9 @@ private: cf_t *signal_buffer_rx[SRSLTE_MAX_PORTS]; cf_t *signal_buffer_tx[SRSLTE_MAX_PORTS]; - uint32_t tti_rx, tti_tx, tti_sched_ul, sf_rx, sf_tx, sf_sched_ul, tx_mutex_cnt; - + uint32_t tti_rx, tti_tx_dl, tti_tx_ul; + uint32_t sf_rx, sf_tx, tx_mutex_cnt; + uint32_t t_rx, t_tx_dl, t_tx_ul; srslte_enb_dl_t enb_dl; srslte_enb_ul_t enb_ul; diff --git a/srsenb/src/mac/mac.cc b/srsenb/src/mac/mac.cc index d1e6e01b4..0b6f65983 100644 --- a/srsenb/src/mac/mac.cc +++ b/srsenb/src/mac/mac.cc @@ -446,7 +446,7 @@ int mac::get_dl_sched(uint32_t tti, dl_sched_t *dl_sched_res) log_h->step(tti); if (!started) { - return 0; + return 0; } if (!dl_sched_res) { @@ -644,7 +644,7 @@ int mac::get_ul_sched(uint32_t tti, ul_sched_t *ul_sched_res) ul_sched_res->nof_phich = sched_result.nof_phich_elems; return SRSLTE_SUCCESS; } - + void mac::tti_clock() { timers_thread.tti_clock(); diff --git a/srsenb/src/mac/scheduler.cc b/srsenb/src/mac/scheduler.cc index dd787a28d..c8b6dcc6b 100644 --- a/srsenb/src/mac/scheduler.cc +++ b/srsenb/src/mac/scheduler.cc @@ -582,7 +582,7 @@ int sched::dl_sched_rar(dl_sched_rar_t rar[MAX_RAR_LIST]) pending_rar[j].rar_tti = 0; // Save UL resources - uint32_t pending_tti=(current_tti+6)%10; + uint32_t pending_tti=(current_tti+MSG3_DELAY_MS+HARQ_DELAY_MS)%10; pending_msg3[pending_tti].enabled = true; pending_msg3[pending_tti].rnti = pending_rar[j].rnti; pending_msg3[pending_tti].L = L_prb; @@ -734,17 +734,17 @@ int sched::ul_sched(uint32_t tti, srsenb::sched_interface::ul_sched_res_t* sched pthread_mutex_lock(&mutex); /* If dl_sched() not yet called this tti (this tti is +4ms advanced), reset CCE state */ - if ((current_tti+4)%10240 != tti) { + if (TTI_TX(current_tti) != tti) { bzero(used_cce, MAX_CCE*sizeof(bool)); } /* Initialize variables */ current_tti = tti; sfn = tti/10; - if (tti > 4) { - sf_idx = (tti-4)%10; + if (tti > HARQ_DELAY_MS) { + sf_idx = (tti-HARQ_DELAY_MS)%10; } else { - sf_idx = (tti+10240-4)%10; + sf_idx = (tti+10240-HARQ_DELAY_MS)%10; } int nof_dci_elems = 0; int nof_phich_elems = 0; diff --git a/srsenb/src/mac/scheduler_harq.cc b/srsenb/src/mac/scheduler_harq.cc index cbb66c669..85be81406 100644 --- a/srsenb/src/mac/scheduler_harq.cc +++ b/srsenb/src/mac/scheduler_harq.cc @@ -179,7 +179,7 @@ void dl_harq_proc::set_rbgmask(uint32_t new_mask) bool dl_harq_proc::has_pending_retx(uint32_t tb_idx, uint32_t current_tti) { - return srslte_tti_interval(current_tti, tti) >= 8 && has_pending_retx_common(tb_idx); + return srslte_tti_interval(current_tti, tti) >= (2*HARQ_DELAY_MS) && has_pending_retx_common(tb_idx); } int dl_harq_proc::get_tbs(uint32_t tb_idx) diff --git a/srsenb/src/mac/scheduler_metric.cc b/srsenb/src/mac/scheduler_metric.cc index ee3f44e6f..4e03d396c 100644 --- a/srsenb/src/mac/scheduler_metric.cc +++ b/srsenb/src/mac/scheduler_metric.cc @@ -142,8 +142,12 @@ dl_harq_proc* dl_metric_rr::get_user_allocation(sched_ue *user) dl_harq_proc *h = user->get_pending_dl_harq(current_tti); // Time-domain RR scheduling +#if ASYNC_DL_SCHED if (pending_data || h) { - if (nof_users_with_data) { +#else + if (pending_data || (h && !h->is_empty())) { +#endif + if (nof_users_with_data) { if (nof_users_with_data == 2) { } if ((current_tti%nof_users_with_data) != user->ue_idx) { @@ -153,7 +157,11 @@ dl_harq_proc* dl_metric_rr::get_user_allocation(sched_ue *user) } // Schedule retx if we have space +#if ASYNC_DL_SCHED if (h) { +#else + if (h && !h->is_empty()) { +#endif uint32_t retx_mask = h->get_rbgmask(); // If can schedule the same mask, do it if (!allocation_is_valid(retx_mask)) { @@ -170,10 +178,14 @@ dl_harq_proc* dl_metric_rr::get_user_allocation(sched_ue *user) } } } - // If could not schedule the reTx, or there wasn't any pending retx, find an empty PID + // If could not schedule the reTx, or there wasn't any pending retx, find an empty PID +#if ASYNC_DL_SCHED h = user->get_empty_dl_harq(); if (h) { - // Allocate resources based on pending data +#else + if (h && h->is_empty()) { +#endif + // Allocate resources based on pending data if (pending_data) { uint32_t pending_rb = user->get_required_prb_dl(pending_data, nof_ctrl_symbols); uint32_t newtx_mask = 0; diff --git a/srsenb/src/mac/scheduler_ue.cc b/srsenb/src/mac/scheduler_ue.cc index 974791957..53d4c30cd 100644 --- a/srsenb/src/mac/scheduler_ue.cc +++ b/srsenb/src/mac/scheduler_ue.cc @@ -249,16 +249,16 @@ bool sched_ue::get_pucch_sched(uint32_t current_tti, uint32_t prb_idx[2]) // First check if it has pending ACKs for (int i=0;icell.nof_prb) * sizeof(cf_t)); signal_buffer_tx[p] = (cf_t *) srslte_vec_malloc(2 * SRSLTE_SF_LEN_PRB(phy->cell.nof_prb) * sizeof(cf_t)); if (!signal_buffer_tx[p]) { fprintf(stderr, "Error allocating memory\n"); @@ -101,7 +102,7 @@ void phch_worker::init(phch_common* phy_, srslte::log *log_h_) } bzero(signal_buffer_tx[p], 2 * SRSLTE_SF_LEN_PRB(phy->cell.nof_prb) * sizeof(cf_t)); } - if (srslte_enb_dl_init(&enb_dl, phy->cell.nof_prb)) { + if (srslte_enb_dl_init(&enb_dl, signal_buffer_tx, phy->cell.nof_prb)) { fprintf(stderr, "Error initiating ENB DL\n"); return; } @@ -109,7 +110,7 @@ void phch_worker::init(phch_common* phy_, srslte::log *log_h_) fprintf(stderr, "Error initiating ENB DL\n"); return; } - if (srslte_enb_ul_init(&enb_ul, phy->cell.nof_prb)) { + if (srslte_enb_ul_init(&enb_ul, signal_buffer_rx[0], phy->cell.nof_prb)) { fprintf(stderr, "Error initiating ENB UL\n"); return; } @@ -181,11 +182,16 @@ cf_t* phch_worker::get_buffer_rx(uint32_t antenna_idx) void phch_worker::set_time(uint32_t tti_, uint32_t tx_mutex_cnt_, srslte_timestamp_t tx_time_) { tti_rx = tti_; - tti_tx = (tti_ + 4)%10240; - tti_sched_ul = (tti_ + 8)%10240; + tti_tx_dl = TTI_TX(tti_rx); + tti_tx_ul = TTI_RX_ACK(tti_rx); + sf_rx = tti_rx%10; - sf_tx = tti_tx%10; - sf_sched_ul = tti_sched_ul%10; + sf_tx = tti_tx_dl%10; + + t_tx_dl = TTIMOD(tti_tx_dl); + t_rx = TTIMOD(tti_rx); + t_tx_ul = TTIMOD(tti_tx_ul); + tx_mutex_cnt = tx_mutex_cnt_; memcpy(&tx_time, &tx_time_, sizeof(srslte_timestamp_t)); } @@ -194,16 +200,16 @@ int phch_worker::add_rnti(uint16_t rnti) { if (srslte_enb_dl_add_rnti(&enb_dl, rnti)) { - return -1; + return -1; } if (srslte_enb_ul_add_rnti(&enb_ul, rnti)) { - return -1; + return -1; } - // Create user - ue_db[rnti].rnti = rnti; - - return SRSLTE_SUCCESS; + // Create user + ue_db[rnti].rnti = rnti; + + return SRSLTE_SUCCESS; } @@ -221,8 +227,8 @@ void phch_worker::set_conf_dedicated_ack(uint16_t rnti, bool ack){ pthread_mutex_unlock(&mutex); } -void phch_worker::set_config_dedicated(uint16_t rnti, - srslte_uci_cfg_t *uci_cfg, +void phch_worker::set_config_dedicated(uint16_t rnti, + srslte_uci_cfg_t *uci_cfg, srslte_pucch_sched_t *pucch_sched, srslte_refsignal_srs_cfg_t *srs_cfg, LIBLTE_RRC_PHYSICAL_CONFIG_DEDICATED_STRUCT* dedicated) @@ -236,17 +242,17 @@ void phch_worker::set_config_dedicated(uint16_t rnti, if (ue_db.count(rnti)) { pucch_sched->N_pucch_1 = phy->pucch_cfg.n1_pucch_an; srslte_enb_ul_cfg_ue(&enb_ul, rnti, uci_cfg, pucch_sched, srs_cfg); - - ue_db[rnti].I_sr = I_sr; + + ue_db[rnti].I_sr = I_sr; ue_db[rnti].I_sr_en = true; if (pucch_cqi) { - ue_db[rnti].pmi_idx = pmi_idx; - ue_db[rnti].cqi_en = true; - ue_db[rnti].pucch_cqi_ack = pucch_cqi_ack; + ue_db[rnti].pmi_idx = pmi_idx; + ue_db[rnti].cqi_en = true; + ue_db[rnti].pucch_cqi_ack = pucch_cqi_ack; } else { - ue_db[rnti].pmi_idx = 0; - ue_db[rnti].cqi_en = false; + ue_db[rnti].pmi_idx = 0; + ue_db[rnti].cqi_en = false; } /* Copy all dedicated RRC configuration to UE */ @@ -254,41 +260,39 @@ void phch_worker::set_config_dedicated(uint16_t rnti, } else { Error("Setting config dedicated: rnti=0x%x does not exist\n"); } - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&mutex); } void phch_worker::rem_rnti(uint16_t rnti) { - pthread_mutex_lock(&mutex); + pthread_mutex_lock(&mutex); if (ue_db.count(rnti)) { ue_db.erase(rnti); - - srslte_enb_dl_rem_rnti(&enb_dl, rnti); + + srslte_enb_dl_rem_rnti(&enb_dl, rnti); srslte_enb_ul_rem_rnti(&enb_ul, rnti); - - // remove any pending grant for each subframe - for (uint32_t i=0;i<10;i++) { + + // remove any pending grant for each subframe + for (uint32_t i=0;iul_grants[i].nof_grants;j++) { if (phy->ul_grants[i].sched_grants[j].rnti == rnti) { - phy->ul_grants[i].sched_grants[j].rnti = 0; + phy->ul_grants[i].sched_grants[j].rnti = 0; } } for (uint32_t j=0;jdl_grants[i].nof_grants;j++) { if (phy->dl_grants[i].sched_grants[j].rnti == rnti) { - phy->dl_grants[i].sched_grants[j].rnti = 0; + phy->dl_grants[i].sched_grants[j].rnti = 0; } } } } else { Error("Removing user: rnti=0x%x does not exist\n", rnti); } - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&mutex); } void phch_worker::work_imp() { - uint32_t sf_ack; - if (!running) { return; } @@ -296,69 +300,68 @@ void phch_worker::work_imp() pthread_mutex_lock(&mutex); mac_interface_phy::ul_sched_t *ul_grants = phy->ul_grants; - mac_interface_phy::dl_sched_t *dl_grants = phy->dl_grants; - mac_interface_phy *mac = phy->mac; - + mac_interface_phy::dl_sched_t *dl_grants = phy->dl_grants; + mac_interface_phy *mac = phy->mac; + log_h->step(tti_rx); - + Debug("Worker %d running\n", get_id()); - + for(std::map::iterator iter=ue_db.begin(); iter!=ue_db.end(); ++iter) { uint16_t rnti = (uint16_t) iter->first; - ue_db[rnti].has_grant_tti = -1; + ue_db[rnti].has_grant_tti = -1; } // Process UL signal - srslte_enb_ul_fft(&enb_ul, signal_buffer_rx[0]); + srslte_enb_ul_fft(&enb_ul); // Decode pending UL grants for the tti they were scheduled - decode_pusch(ul_grants[sf_rx].sched_grants, ul_grants[sf_rx].nof_grants, sf_rx); - + decode_pusch(ul_grants[t_rx].sched_grants, ul_grants[t_rx].nof_grants); + // Decode remaining PUCCH ACKs not associated with PUSCH transmission and SR signals - decode_pucch(tti_rx); - + decode_pucch(); + // Get DL scheduling for the TX TTI from MAC - if (mac->get_dl_sched(tti_tx, &dl_grants[sf_tx]) < 0) { + if (mac->get_dl_sched(tti_tx_dl, &dl_grants[t_tx_dl]) < 0) { Error("Getting DL scheduling from MAC\n"); goto unlock; - } - - if (dl_grants[sf_tx].cfi < 1 || dl_grants[sf_tx].cfi > 3) { - Error("Invalid CFI=%d\n", dl_grants[sf_tx].cfi); + } + + if (dl_grants[t_tx_dl].cfi < 1 || dl_grants[t_tx_dl].cfi > 3) { + Error("Invalid CFI=%d\n", dl_grants[t_tx_dl].cfi); goto unlock; } - + // Get UL scheduling for the TX TTI from MAC - if (mac->get_ul_sched(tti_sched_ul, &ul_grants[sf_sched_ul]) < 0) { + if (mac->get_ul_sched(tti_tx_ul, &ul_grants[t_tx_ul]) < 0) { Error("Getting UL scheduling from MAC\n"); goto unlock; - } - + } + // Put base signals (references, PBCH, PCFICH and PSS/SSS) into the resource grid srslte_enb_dl_clear_sf(&enb_dl); - srslte_enb_dl_set_cfi(&enb_dl, dl_grants[sf_tx].cfi); - srslte_enb_dl_put_base(&enb_dl, tti_tx); + srslte_enb_dl_set_cfi(&enb_dl, dl_grants[t_tx_dl].cfi); + srslte_enb_dl_put_base(&enb_dl, tti_tx_dl); + + // Put UL/DL grants to resource grid. PDSCH data will be encoded as well. + encode_pdcch_dl(dl_grants[t_tx_dl].sched_grants, dl_grants[t_tx_dl].nof_grants); + encode_pdcch_ul(ul_grants[t_tx_ul].sched_grants, ul_grants[t_tx_ul].nof_grants); + encode_pdsch(dl_grants[t_tx_dl].sched_grants, dl_grants[t_tx_dl].nof_grants); - // Put UL/DL grants to resource grid. PDSCH data will be encoded as well. - encode_pdcch_dl(dl_grants[sf_tx].sched_grants, dl_grants[sf_tx].nof_grants, sf_tx); - encode_pdcch_ul(ul_grants[sf_sched_ul].sched_grants, ul_grants[sf_sched_ul].nof_grants, sf_tx); - encode_pdsch(dl_grants[sf_tx].sched_grants, dl_grants[sf_tx].nof_grants, sf_tx); - // Put pending PHICH HARQ ACK/NACK indications into subframe - encode_phich(ul_grants[sf_sched_ul].phich, ul_grants[sf_sched_ul].nof_phich, sf_tx); - - // Prepare for receive ACK for DL grants in sf_tx+4 - sf_ack = (sf_tx+4)%10; - phy->ack_clear(sf_ack); - for (uint32_t i=0;iack_clear(TTIMOD(TTI_TX(t_tx_dl))); + for (uint32_t i=0;i= SRSLTE_CRNTI_START && dl_grants[sf_tx].sched_grants[i].rnti <= SRSLTE_CRNTI_END) { - phy->ack_set_pending(sf_ack, dl_grants[sf_tx].sched_grants[i].rnti, dl_grants[sf_tx].sched_grants[i].location.ncce); + if (dl_grants[t_tx_dl].sched_grants[i].rnti >= SRSLTE_CRNTI_START && dl_grants[t_tx_dl].sched_grants[i].rnti <= SRSLTE_CRNTI_END) { + phy->ack_set_pending(TTIMOD(TTI_TX(t_tx_dl)), dl_grants[t_tx_dl].sched_grants[i].rnti, dl_grants[t_tx_dl].sched_grants[i].location.ncce); } } - + // Generate signal and transmit - srslte_enb_dl_gen_signal(&enb_dl, signal_buffer_tx); + srslte_enb_dl_gen_signal(&enb_dl); Debug("Sending to radio\n"); phy->worker_end(tx_mutex_cnt, signal_buffer_tx, SRSLTE_SF_LEN_PRB(phy->cell.nof_prb), tx_time); @@ -367,35 +370,35 @@ void phch_worker::work_imp() #endif #ifdef DEBUG_WRITE_FILE - if (tti_tx == 10) { + if (tti_tx_dl == 10) { fclose(f); exit(-1); } -#endif - +#endif + /* Tell the plotting thread to draw the plots */ #ifdef ENABLE_GUI if ((int) get_id() == plot_worker_id) { - sem_post(&plot_sem); + sem_post(&plot_sem); } #endif unlock: - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&mutex); } -int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, uint32_t tti) +int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) { - srslte_uci_data_t uci_data; + srslte_uci_data_t uci_data; bzero(&uci_data, sizeof(srslte_uci_data_t)); - - uint32_t wideband_cqi_value = 0; - - uint32_t n_rb_ho = 0; + + uint32_t wideband_cqi_value = 0; + + uint32_t n_rb_ho = 0; for (uint32_t i=0;iack_is_pending(sf_rx, rnti)) { - uci_data.uci_ack_len = 1; + if (phy->ack_is_pending(t_rx, rnti)) { + uci_data.uci_ack_len = 1; } - // Configure PUSCH CQI channel + // Configure PUSCH CQI channel srslte_cqi_value_t cqi_value; - bool cqi_enabled = false; + bool cqi_enabled = false; if (ue_db[rnti].cqi_en && srslte_cqi_send(ue_db[rnti].pmi_idx, tti_rx)) { cqi_value.type = SRSLTE_CQI_TYPE_WIDEBAND; - cqi_enabled = true; + cqi_enabled = true; } else if (grants[i].grant.cqi_request) { cqi_value.type = SRSLTE_CQI_TYPE_SUBBAND_HL; cqi_value.subband_hl.N = (phy->cell.nof_prb > 7) ? srslte_cqi_hl_get_no_subbands(phy->cell.nof_prb) : 0; - cqi_enabled = true; + cqi_enabled = true; } if (cqi_enabled) { uci_data.uci_cqi_len = srslte_cqi_size(&cqi_value); } - - // mark this tti as having an ul grant to avoid pucch - ue_db[rnti].has_grant_tti = tti_rx; - - srslte_ra_ul_grant_t phy_grant; + + // mark this tti as having an ul grant to avoid pucch + ue_db[rnti].has_grant_tti = tti_rx; + + srslte_ra_ul_grant_t phy_grant; int res = -1; - if (!srslte_ra_ul_dci_to_grant(&grants[i].grant, enb_ul.cell.nof_prb, n_rb_ho, &phy_grant, tti%8)) { + if (!srslte_ra_ul_dci_to_grant(&grants[i].grant, enb_ul.cell.nof_prb, n_rb_ho, &phy_grant)) { if (phy_grant.mcs.mod == SRSLTE_MOD_64QAM) { phy_grant.mcs.mod = SRSLTE_MOD_16QAM; } phy_grant.Qm = SRSLTE_MIN(phy_grant.Qm, 4); res = srslte_enb_ul_get_pusch(&enb_ul, &phy_grant, grants[i].softbuffer, - rnti, grants[i].rv_idx, - grants[i].current_tx_nb, - grants[i].data, - &uci_data, - tti); + rnti, grants[i].rv_idx, + grants[i].current_tx_nb, + grants[i].data, + &uci_data, + sf_rx); } else { Error("Computing PUSCH grant\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } - + #ifdef LOG_EXECTIME gettimeofday(&t[2], NULL); get_time_interval(t); snprintf(timestr, 64, ", dec_time=%4d us", (int) t[0].tv_usec); #endif - - bool crc_res = (res == 0); - + + bool crc_res = (res == 0); + // Save PHICH scheduling for this user. Each user can have just 1 PUSCH grant per TTI - ue_db[rnti].phich_info.n_prb_lowest = enb_ul.pusch_cfg.grant.n_prb_tilde[0]; - ue_db[rnti].phich_info.n_dmrs = phy_grant.ncs_dmrs; + ue_db[rnti].phich_info.n_prb_lowest = enb_ul.pusch_cfg.grant.n_prb_tilde[0]; + ue_db[rnti].phich_info.n_dmrs = phy_grant.ncs_dmrs; char cqi_str[64]; if (cqi_enabled) { @@ -466,8 +469,8 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, } snprintf(cqi_str, 64, ", cqi=%d", wideband_cqi_value); } - - float snr_db = 10*log10(srslte_chest_ul_get_snr(&enb_ul.chest)); + + float snr_db = 10*log10(srslte_chest_ul_get_snr(&enb_ul.chest)); /* if (!crc_res && enb_ul.pusch_cfg.grant.L_prb == 1 && enb_ul.pusch_cfg.grant.n_prb[0] == 0 && snr_db > 5) { @@ -476,8 +479,8 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, srslte_vec_save_file("d", enb_ul.pusch.d, sizeof(cf_t)*enb_ul.pusch_cfg.nbits.nof_re); srslte_vec_save_file("ce2", enb_ul.pusch.ce, sizeof(cf_t)*enb_ul.pusch_cfg.nbits.nof_re); srslte_vec_save_file("z", enb_ul.pusch.z, sizeof(cf_t)*enb_ul.pusch_cfg.nbits.nof_re); - printf("saved sf_idx=%d, mcs=%d, tbs=%d, rnti=%d, rv=%d, snr=%.1f\n", tti%10, - grants[i].grant.mcs_idx, enb_ul.pusch_cfg.cb_segm.tbs, rnti, grants[i].rv_idx, snr_db); + printf("saved sf_idx=%d, mcs=%d, tbs=%d, rnti=%d, rv=%d, snr=%.1f\n", tti%10, + grants[i].grant.mcs_idx, enb_ul.pusch_cfg.cb_segm.tbs, rnti, grants[i].rv_idx, snr_db); exit(-1); } */ @@ -485,70 +488,70 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, "PUSCH: rnti=0x%x, prb=(%d,%d), tbs=%d, mcs=%d, rv=%d, snr=%.1f dB, n_iter=%d, crc=%s%s%s%s\n", rnti, phy_grant.n_prb[0], phy_grant.n_prb[0]+phy_grant.L_prb, phy_grant.mcs.tbs/8, phy_grant.mcs.idx, grants[i].grant.rv_idx, - snr_db, + snr_db, srslte_pusch_last_noi(&enb_ul.pusch), crc_res?"OK":"KO", uci_data.uci_ack_len>0?(uci_data.uci_ack?", ack=1":", ack=0"):"", - uci_data.uci_cqi_len>0?cqi_str:"", - timestr); - - // Notify MAC of RL status + uci_data.uci_cqi_len>0?cqi_str:"", + timestr); + + // Notify MAC of RL status if (grants[i].grant.rv_idx == 0) { if (res && snr_db < PUSCH_RL_SNR_DB_TH) { Debug("PUSCH: Radio-Link failure snr=%.1f dB\n", snr_db); phy->mac->rl_failure(rnti); } else { phy->mac->rl_ok(rnti); - } + } } - + // Notify MAC new received data and HARQ Indication value - phy->mac->crc_info(tti_rx, rnti, phy_grant.mcs.tbs/8, crc_res); + phy->mac->crc_info(tti_rx, rnti, phy_grant.mcs.tbs/8, crc_res); if (uci_data.uci_ack_len) { phy->mac->ack_info(tti_rx, rnti, uci_data.uci_ack && (crc_res || snr_db > PUSCH_RL_SNR_DB_TH)); } - - // Notify MAC of UL SNR and DL CQI + + // Notify MAC of UL SNR and DL CQI if (snr_db >= PUSCH_RL_SNR_DB_TH) { phy->mac->snr_info(tti_rx, rnti, snr_db); } if (uci_data.uci_cqi_len>0 && crc_res) { phy->mac->cqi_info(tti_rx, rnti, wideband_cqi_value); } - - // Save metrics stats + + // Save metrics stats ue_db[rnti].metrics_ul(phy_grant.mcs.idx, 0, snr_db, srslte_pusch_last_noi(&enb_ul.pusch)); - } + } } - return SRSLTE_SUCCESS; + return SRSLTE_SUCCESS; } -int phch_worker::decode_pucch(uint32_t tti_rx) +int phch_worker::decode_pucch() { - uint32_t sf_rx = tti_rx%10; - srslte_uci_data_t uci_data; - + srslte_uci_data_t uci_data; + for(std::map::iterator iter=ue_db.begin(); iter!=ue_db.end(); ++iter) { uint16_t rnti = (uint16_t) iter->first; if (rnti >= SRSLTE_CRNTI_START && rnti <= SRSLTE_CRNTI_END && ue_db[rnti].has_grant_tti != (int) tti_rx) { - // Check if user needs to receive PUCCH + // Check if user needs to receive PUCCH bool needs_pucch = false, needs_ack=false, needs_sr=false, needs_cqi=false, needs_ri=false; uint32_t last_n_pdcch = 0; bzero(&uci_data, sizeof(srslte_uci_data_t)); - + if (ue_db[rnti].I_sr_en) { if (srslte_ue_ul_sr_send_tti(ue_db[rnti].I_sr, tti_rx)) { - needs_pucch = true; - needs_sr = true; - uci_data.scheduling_request = true; + needs_pucch = true; + needs_sr = true; + uci_data.scheduling_request = true; } - } - if (phy->ack_is_pending(sf_rx, rnti, &last_n_pdcch)) { - needs_pucch = true; - needs_ack = true; - uci_data.uci_ack_len = 1; + } + + if (phy->ack_is_pending(t_rx, rnti, &last_n_pdcch)) { + needs_pucch = true; + needs_ack = true; + uci_data.uci_ack_len = 1; } srslte_cqi_value_t cqi_value; LIBLTE_RRC_PHYSICAL_CONFIG_DEDICATED_STRUCT *dedicated = &ue_db[rnti].dedicated; @@ -563,9 +566,9 @@ int phch_worker::decode_pucch(uint32_t tti_rx) needs_ri = true; uci_data.uci_ri_len = 1; } else if (srslte_cqi_send(ue_db[rnti].pmi_idx, tti_rx)) { - needs_pucch = true; - needs_cqi = true; - cqi_value.type = SRSLTE_CQI_TYPE_WIDEBAND; + needs_pucch = true; + needs_cqi = true; + cqi_value.type = SRSLTE_CQI_TYPE_WIDEBAND; uci_data.uci_cqi_len = srslte_cqi_size(&cqi_value); if (tx_mode == LIBLTE_RRC_TRANSMISSION_MODE_4) { //uci_data.uci_dif_cqi_len = 3; @@ -573,17 +576,17 @@ int phch_worker::decode_pucch(uint32_t tti_rx) } } } - + if (needs_pucch) { if (srslte_enb_ul_get_pucch(&enb_ul, rnti, last_n_pdcch, sf_rx, &uci_data)) { fprintf(stderr, "Error getting PUCCH\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } if (uci_data.uci_ack_len > 0) { - phy->mac->ack_info(tti_rx, rnti, uci_data.uci_ack && (srslte_pucch_get_last_corr(&enb_ul.pucch) >= PUCCH_RL_CORR_TH)); + phy->mac->ack_info(tti_rx, rnti, uci_data.uci_ack && (srslte_pucch_get_last_corr(&enb_ul.pucch) >= PUCCH_RL_CORR_TH)); } if (uci_data.scheduling_request) { - phy->mac->sr_detected(tti_rx, rnti); + phy->mac->sr_detected(tti_rx, rnti); } char cqi_ri_str[64]; @@ -606,8 +609,8 @@ int phch_worker::decode_pucch(uint32_t tti_rx) } } - log_h->info("PUCCH: rnti=0x%x, corr=%.2f, n_pucch=%d, n_prb=%d%s%s%s\n", - rnti, + log_h->info("PUCCH: rnti=0x%x, corr=%.2f, n_pucch=%d, n_prb=%d%s%s%s\n", + rnti, srslte_pucch_get_last_corr(&enb_ul.pucch), enb_ul.pucch.last_n_pucch, enb_ul.pucch.last_n_prb, needs_ack?(uci_data.uci_ack?", ack=1":", ack=0"):"", @@ -615,109 +618,107 @@ int phch_worker::decode_pucch(uint32_t tti_rx) (needs_cqi || needs_ri)?cqi_ri_str:""); - // Notify MAC of RL status + // Notify MAC of RL status if (!needs_sr) { if (srslte_pucch_get_last_corr(&enb_ul.pucch) < PUCCH_RL_CORR_TH) { Debug("PUCCH: Radio-Link failure corr=%.1f\n", srslte_pucch_get_last_corr(&enb_ul.pucch)); phy->mac->rl_failure(rnti); } else { phy->mac->rl_ok(rnti); - } - } + } + } } } - } - return 0; + } + return 0; } -int phch_worker::encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks, uint32_t sf_idx) +int phch_worker::encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks) { for (uint32_t i=0;irnti; if (rnti) { - if (srslte_enb_dl_put_pdcch_dl(&enb_dl, &grants[i].grant, grant->dci_format, grants[i].location, rnti, sf_idx)) { + if (srslte_enb_dl_put_pdcch_dl(&enb_dl, &grants[i].grant, grant->dci_format, grants[i].location, rnti, sf_tx)) { fprintf(stderr, "Error putting PDCCH %d\n",i); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } - + if (LOG_THIS(rnti)) { - Info("PDCCH: DL DCI %s rnti=0x%x, cce_index=%d, L=%d, tti_tx=%d\n", srslte_dci_format_string(grant->dci_format), - rnti, grants[i].location.ncce, (1<dci_format), + rnti, grants[i].location.ncce, (1<info_hex(ptr, len, - "PDSCH: rnti=0x%x, l_crb=%2d, %s, harq=%d, tbs=%d, mcs=%d, rv=%d, tti_tx=%d, tx_scheme=%s%s\n", - rnti, phy_grant.nof_prb, grant_str, grants[i].grant.harq_process, - phy_grant.mcs[0].tbs/8, phy_grant.mcs[0].idx, grants[i].grant.rv_idx, tti_tx, srslte_mimotype2str(mimo_type), pinfo_str); + "PDSCH: rnti=0x%x, l_crb=%2d, %s, harq=%d, tbs=%d, mcs=%d, rv=%d, tti_tx_dl=%d, tx_scheme=%s%s\n", + rnti, phy_grant.nof_prb, grant_str, grants[i].grant.harq_process, + phy_grant.mcs[0].tbs / 8, phy_grant.mcs[0].idx, grants[i].grant.rv_idx, tti_tx_dl, + srslte_mimotype2str(mimo_type), pinfo_str); } - int rv[SRSLTE_MAX_CODEWORDS] = {grants[i].grant.rv_idx, grants[i].grant.rv_idx_1}; + srslte_softbuffer_tx_t *sb[SRSLTE_MAX_CODEWORDS]; + uint8_t *d[SRSLTE_MAX_CODEWORDS]; + int rv[SRSLTE_MAX_CODEWORDS]; - if (srslte_enb_dl_put_pdsch(&enb_dl, &phy_grant, grants[i].softbuffers, rnti, rv, sf_idx, grants[i].data, mimo_type)) { - fprintf(stderr, "Error putting PDSCH %d\n",i); - return SRSLTE_ERROR; + for (int tb = 0; tb < 2; tb++) { + sb[tb] = grants[tb].softbuffers[tb]; + d[tb] = grants[tb].data[tb]; } + rv[0] = grants[i].grant.rv_idx; + rv[1] = grants[i].grant.rv_idx_1; + - // Save metrics stats + if (srslte_enb_dl_put_pdsch(&enb_dl, &phy_grant, sb, rnti, rv, sf_tx, d, mimo_type)) { + fprintf(stderr, "Error putting PDSCH %d\n", i); + return SRSLTE_ERROR; + } + + // Save metrics stats ue_db[rnti].metrics_dl(phy_grant.mcs[0].idx); } } - return SRSLTE_SUCCESS; + return SRSLTE_SUCCESS; } diff --git a/srsenb/src/phy/txrx.cc b/srsenb/src/phy/txrx.cc index 2a6929a20..acedd36c3 100644 --- a/srsenb/src/phy/txrx.cc +++ b/srsenb/src/phy/txrx.cc @@ -117,7 +117,7 @@ void txrx::run_thread() /* Compute TX time: Any transmission happens in TTI+4 thus advance 4 ms the reception time */ srslte_timestamp_copy(&tx_time, &rx_time); - srslte_timestamp_add(&tx_time, 0, 4e-3); + srslte_timestamp_add(&tx_time, 0, HARQ_DELAY_MS*1e-3); Debug("Settting TTI=%d, tx_mutex=%d, tx_time=%d:%f to worker %d\n", tti, tx_mutex_cnt, diff --git a/srsenb/src/upper/gtpu.cc b/srsenb/src/upper/gtpu.cc index d8f53d662..5d64cb4e5 100644 --- a/srsenb/src/upper/gtpu.cc +++ b/srsenb/src/upper/gtpu.cc @@ -82,6 +82,7 @@ bool gtpu::init(std::string gtp_bind_addr_, std::string mme_addr_, srsenb::pdcp_ #endif struct sockaddr_in bindaddr; + bzero(&bindaddr, sizeof(struct sockaddr_in)); bindaddr.sin_family = AF_INET; bindaddr.sin_addr.s_addr = inet_addr(gtp_bind_addr.c_str()); bindaddr.sin_port = htons(GTPU_PORT); @@ -137,7 +138,10 @@ void gtpu::write_pdu(uint16_t rnti, uint32_t lcid, srslte::byte_buffer_t* pdu) servaddr.sin_port = htons(GTPU_PORT); gtpu_write_header(&header, pdu); - sendto(snk_fd, pdu->msg, pdu->N_bytes, MSG_EOR, (struct sockaddr*)&servaddr, sizeof(struct sockaddr_in)); + if (sendto(snk_fd, pdu->msg, pdu->N_bytes, MSG_EOR, (struct sockaddr*)&servaddr, sizeof(struct sockaddr_in))<0) { + perror("sendto"); + } + pool->deallocate(pdu); } diff --git a/srsenb/src/upper/rrc.cc b/srsenb/src/upper/rrc.cc index 54a8b2663..1993f3444 100644 --- a/srsenb/src/upper/rrc.cc +++ b/srsenb/src/upper/rrc.cc @@ -223,9 +223,13 @@ void rrc::rem_user(uint16_t rnti) if (users.count(rnti) == 1) { rrc_log->console("Disconnecting rnti=0x%x.\n", rnti); rrc_log->info("Disconnecting rnti=0x%x.\n", rnti); - /* **Caution** order of removal here is imporant: from bottom to top */ + /* **Caution** order of removal here is important: from bottom to top */ mac->ue_rem(rnti); // MAC handles PHY + + pthread_mutex_unlock(&user_mutex); usleep(50000); + pthread_mutex_lock(&user_mutex); + rlc->rem_user(rnti); pdcp->rem_user(rnti); gtpu->rem_user(rnti); diff --git a/srsue/hdr/mac/demux.h b/srsue/hdr/mac/demux.h index 8b722e8c9..fb38990ec 100644 --- a/srsue/hdr/mac/demux.h +++ b/srsue/hdr/mac/demux.h @@ -41,14 +41,16 @@ namespace srsue { class demux : public srslte::pdu_queue::process_callback { public: - demux(uint8_t nof_harq_proc_); + demux(); void init(phy_interface_mac_common* phy_h_, rlc_interface_mac *rlc, srslte::log* log_h_, srslte::timers::timer* time_alignment_timer); bool process_pdus(); - uint8_t* request_buffer(uint32_t pid, uint32_t len); + uint8_t* request_buffer(uint32_t len); + uint8_t* request_buffer_bcch(uint32_t len); void deallocate(uint8_t* payload_buffer_ptr); - void push_pdu(uint32_t pid, uint8_t *buff, uint32_t nof_bytes, uint32_t tstamp); + void push_pdu(uint8_t *buff, uint32_t nof_bytes, uint32_t tstamp); + void push_pdu_bcch(uint8_t *buff, uint32_t nof_bytes, uint32_t tstamp); void push_pdu_temp_crnti(uint8_t *buff, uint32_t nof_bytes); void set_uecrid_callback(bool (*callback)(void*, uint64_t), void *arg); @@ -59,7 +61,8 @@ public: private: const static int MAX_PDU_LEN = 150*1024/8; // ~ 150 Mbps const static int NOF_BUFFER_PDUS = 64; // Number of PDU buffers per HARQ pid - uint8_t bcch_buffer[1024]; // BCCH PID has a dedicated buffer + const static int MAX_BCCH_PDU_LEN = 1024; + uint8_t bcch_buffer[MAX_BCCH_PDU_LEN]; // BCCH PID has a dedicated buffer bool (*uecrid_callback) (void*, uint64_t); void *uecrid_callback_arg; @@ -76,8 +79,7 @@ private: srslte::log *log_h; srslte::timers::timer *time_alignment_timer; rlc_interface_mac *rlc; - uint8_t nof_harq_proc; - + // Buffer of PDUs srslte::pdu_queue pdus; }; diff --git a/srsue/hdr/mac/dl_harq.h b/srsue/hdr/mac/dl_harq.h index ee925596f..521018d73 100644 --- a/srsue/hdr/mac/dl_harq.h +++ b/srsue/hdr/mac/dl_harq.h @@ -259,13 +259,16 @@ private: memcpy(&cur_grant, &grant, sizeof(Tgrant)); // If data has not yet been successfully decoded - if (!ack) { + if (!ack || (grant.rv[tid]==0 && grant.phy_grant.dl.mcs[tid].idx < 29)) { // Instruct the PHY To combine the received data and attempt to decode it - payload_buffer_ptr = harq_entity->demux_unit->request_buffer(pid * SRSLTE_MAX_TB + tid, - cur_grant.n_bytes[tid]); + if (pid == HARQ_BCCH_PID) { + payload_buffer_ptr = harq_entity->demux_unit->request_buffer_bcch(cur_grant.n_bytes[tid]); + } else { + payload_buffer_ptr = harq_entity->demux_unit->request_buffer(cur_grant.n_bytes[tid]); + } action->payload_ptr[tid] = payload_buffer_ptr; - if (!action->payload_ptr) { + if (!action->payload_ptr[tid]) { action->decode_enabled[tid] = false; Error("Can't get a buffer for TBS=%d\n", cur_grant.n_bytes[tid]); return; @@ -305,8 +308,7 @@ private: harq_entity->pcap->write_dl_sirnti(payload_buffer_ptr, cur_grant.n_bytes[tid], ack, cur_grant.tti); } Debug("Delivering PDU=%d bytes to Dissassemble and Demux unit (BCCH)\n", cur_grant.n_bytes[tid]); - harq_entity->demux_unit->push_pdu(pid * SRSLTE_MAX_TB + tid, payload_buffer_ptr, cur_grant.n_bytes[tid], - cur_grant.tti); + harq_entity->demux_unit->push_pdu_bcch(payload_buffer_ptr, cur_grant.n_bytes[tid], cur_grant.tti); } else { if (harq_entity->pcap) { harq_entity->pcap->write_dl_crnti(payload_buffer_ptr, cur_grant.n_bytes[tid], cur_grant.rnti, ack, @@ -318,8 +320,7 @@ private: harq_entity->demux_unit->push_pdu_temp_crnti(payload_buffer_ptr, cur_grant.n_bytes[tid]); } else { Debug("Delivering PDU=%d bytes to Dissassemble and Demux unit\n", cur_grant.n_bytes[tid]); - harq_entity->demux_unit->push_pdu(pid * SRSLTE_MAX_TB + tid, payload_buffer_ptr, cur_grant.n_bytes[tid], - cur_grant.tti); + harq_entity->demux_unit->push_pdu(payload_buffer_ptr, cur_grant.n_bytes[tid], cur_grant.tti); // Compute average number of retransmissions per packet harq_entity->average_retx = SRSLTE_VEC_CMA((float) n_retx, harq_entity->average_retx, @@ -346,9 +347,10 @@ private: // Determine if it's a new transmission 5.3.2.2 bool calc_is_new_transmission(Tgrant grant) { - if ((grant.ndi[tid] != cur_grant.ndi[tid]) || // 1st condition (NDI has changed) - (pid == HARQ_BCCH_PID && grant.rv[tid] == 0) || // 2nd condition (Broadcast and 1st transmission) - is_first_tb) // 3rd condition (first TB) + if (grant.phy_grant.dl.mcs[tid].idx <= 28 && // mcs 29,30,31 always retx regardless of rest + ((grant.ndi[tid] != cur_grant.ndi[tid]) || // 1st condition (NDI has changed) + (pid == HARQ_BCCH_PID && grant.rv[tid] == 0) || // 2nd condition (Broadcast and 1st transmission) + is_first_tb)) { is_first_tb = false; is_new_transmission = true; diff --git a/srsue/hdr/mac/mac.h b/srsue/hdr/mac/mac.h index a306af187..d19f668bf 100644 --- a/srsue/hdr/mac/mac.h +++ b/srsue/hdr/mac/mac.h @@ -109,7 +109,7 @@ private: static const int MAC_MAIN_THREAD_PRIO = 5; static const int MAC_PDU_THREAD_PRIO = 6; - static const int MAC_NOF_HARQ_PROC = 8; + static const int MAC_NOF_HARQ_PROC = 2*HARQ_DELAY_MS; // Interaction with PHY srslte::tti_sync_cv ttisync; diff --git a/srsue/hdr/mac/mux.h b/srsue/hdr/mac/mux.h index 1167af752..ebc61b5c6 100644 --- a/srsue/hdr/mac/mux.h +++ b/srsue/hdr/mac/mux.h @@ -82,8 +82,7 @@ private: const static int MIN_RLC_SDU_LEN = 0; const static int MAX_NOF_SUBHEADERS = 20; - const static int MAX_HARQ_PROC = 8; - + std::vector lch; // Keep track of the PIDs that transmitted BSR reports @@ -100,7 +99,7 @@ private: uint8_t nof_harq_proc; /* Msg3 Buffer */ - static const uint32_t MSG3_BUFF_SZ = 128; + static const uint32_t MSG3_BUFF_SZ = 1024; uint8_t msg3_buff[MSG3_BUFF_SZ]; /* PDU Buffer */ diff --git a/srsue/hdr/phy/phch_common.h b/srsue/hdr/phy/phch_common.h index aa64fe9ea..39e9e9685 100644 --- a/srsue/hdr/phy/phch_common.h +++ b/srsue/hdr/phy/phch_common.h @@ -138,7 +138,7 @@ namespace srsue { uint32_t I_lowest; uint32_t n_dmrs; } pending_ack_t; - pending_ack_t pending_ack[10]; + pending_ack_t pending_ack[TTIMOD_SZ]; bool is_first_tx; diff --git a/srsue/hdr/phy/phch_recv.h b/srsue/hdr/phy/phch_recv.h index 044960760..01a296094 100644 --- a/srsue/hdr/phy/phch_recv.h +++ b/srsue/hdr/phy/phch_recv.h @@ -53,6 +53,7 @@ public: void set_agc_enable(bool enable); void set_earfcn(std::vector earfcn); + void force_freq(float dl_freq, float ul_freq); void reset_sync(); void cell_search_start(); @@ -157,7 +158,7 @@ private: uint32_t current_earfcn; uint32_t sync_sfn_cnt; - const static uint32_t SYNC_SFN_TIMEOUT = 200; + const static uint32_t SYNC_SFN_TIMEOUT = 1000; float ul_dl_factor; int cur_earfcn_index; bool cell_search_in_progress; @@ -165,12 +166,16 @@ private: float measure_rsrp; srslte_ue_dl_t ue_dl_measure; - const static int RSRP_MEASURE_NOF_FRAMES = 5; + const static int RSRP_MEASURE_NOF_FRAMES = 10; int cell_sync_sfn(); int cell_meas_rsrp(); int cell_search(int force_N_id_2 = -1); bool set_cell(); + + float dl_freq; + float ul_freq; + }; } // namespace srsue diff --git a/srsue/hdr/phy/phch_worker.h b/srsue/hdr/phy/phch_worker.h index 74af27752..12d14756c 100644 --- a/srsue/hdr/phy/phch_worker.h +++ b/srsue/hdr/phy/phch_worker.h @@ -150,7 +150,12 @@ private: uint32_t I_sr; float cfo; bool rar_cqi_request; - + + // Save last TBS for mcs>28 cases + int last_dl_tbs[2*HARQ_DELAY_MS][SRSLTE_MAX_CODEWORDS]; + int last_ul_tbs[2*HARQ_DELAY_MS]; + srslte_mod_t last_ul_mod[2*HARQ_DELAY_MS]; + // Metrics dl_metrics_t dl_metrics; ul_metrics_t ul_metrics; diff --git a/srsue/hdr/phy/phy.h b/srsue/hdr/phy/phy.h index 0c77360b2..07a2713fa 100644 --- a/srsue/hdr/phy/phy.h +++ b/srsue/hdr/phy/phy.h @@ -76,6 +76,7 @@ public: void write_trace(std::string filename); void set_earfcn(std::vector earfcns); + void force_freq(float dl_freq, float ul_freq); /********** RRC INTERFACE ********************/ void reset(); @@ -167,7 +168,7 @@ private: /* Current time advance */ uint32_t n_ta; - + bool init_(srslte::radio *radio_handler, mac_interface_phy *mac, srslte::log *log_h, bool do_agc, uint32_t nof_workers); void set_default_args(phy_args_t *args); bool check_args(phy_args_t *args); diff --git a/srsue/hdr/ue_base.h b/srsue/hdr/ue_base.h index 411896f70..b547945a9 100644 --- a/srsue/hdr/ue_base.h +++ b/srsue/hdr/ue_base.h @@ -103,6 +103,7 @@ typedef struct { }gui_args_t; typedef struct { + std::string ip_netmask; phy_args_t phy; float metrics_period_secs; bool pregenerate_signals; diff --git a/srsue/hdr/upper/gw.h b/srsue/hdr/upper/gw.h index 800b31624..b97ceb6c5 100644 --- a/srsue/hdr/upper/gw.h +++ b/srsue/hdr/upper/gw.h @@ -57,8 +57,13 @@ public: // NAS interface srslte::error_t setup_if_addr(uint32_t ip_addr, char *err_str); + void set_netmask(std::string netmask); + private: + bool default_netmask; + std::string netmask; + static const int GW_THREAD_PRIO = 7; pdcp_interface_gw *pdcp; diff --git a/srsue/hdr/upper/rrc.h b/srsue/hdr/upper/rrc.h index 3643f76c3..3e2fb70dd 100644 --- a/srsue/hdr/upper/rrc.h +++ b/srsue/hdr/upper/rrc.h @@ -98,6 +98,8 @@ private: uint8_t transaction_id; bool drb_up; + bool reestablishment_in_progress; + // timeouts in ms uint32_t connecting_timeout; @@ -244,7 +246,8 @@ private: // Helpers void rrc_connection_release(); - void radio_link_failure(); + void con_restablish_cell_reselected(); + void radio_link_failure(); static void* start_sib_thread(void *rrc_); void sib_search(); void apply_sib2_configs(LIBLTE_RRC_SYS_INFO_BLOCK_TYPE_2_STRUCT *sib2); diff --git a/srsue/src/mac/demux.cc b/srsue/src/mac/demux.cc index 75bafd1b4..171e7bf45 100644 --- a/srsue/src/mac/demux.cc +++ b/srsue/src/mac/demux.cc @@ -36,7 +36,7 @@ namespace srsue { -demux::demux(uint8_t nof_harq_proc_) : mac_msg(20), pending_mac_msg(20), nof_harq_proc(nof_harq_proc_) +demux::demux() : mac_msg(20), pending_mac_msg(20), rlc(NULL) { } @@ -64,18 +64,18 @@ void demux::deallocate(uint8_t* payload_buffer_ptr) pdus.deallocate(payload_buffer_ptr); } } - -uint8_t* demux::request_buffer(uint32_t pid, uint32_t len) -{ - uint8_t *buff = NULL; - if (pid < nof_harq_proc) { - return pdus.request(len); - } else if (pid == nof_harq_proc) { - buff = bcch_buffer; +uint8_t* demux::request_buffer_bcch(uint32_t len) +{ + if (len < MAX_BCCH_PDU_LEN) { + return bcch_buffer; } else { - Error("Requested buffer for invalid PID=%d\n", pid); + return NULL; } - return buff; +} + +uint8_t* demux::request_buffer(uint32_t len) +{ + return pdus.request(len); } /* Demultiplexing of MAC PDU associated with a Temporal C-RNTI. The PDU will @@ -117,21 +117,17 @@ void demux::push_pdu_temp_crnti(uint8_t *buff, uint32_t nof_bytes) * This function enqueues the packet and returns quicly because ACK * deadline is important here. */ -void demux::push_pdu(uint32_t pid, uint8_t *buff, uint32_t nof_bytes, uint32_t tstamp) -{ - if (pid < nof_harq_proc) { - return pdus.push(buff, nof_bytes, tstamp); - } else if (pid == nof_harq_proc) { - /* Demultiplexing of MAC PDU associated with SI-RNTI. The PDU passes through - * the MAC in transparent mode. - * Warning: In this case function sends the message to RLC now, since SI blocks do not - * require ACK feedback to be transmitted quickly. - */ - Debug("Pushed BCCH MAC PDU in transparent mode\n"); - rlc->write_pdu_bcch_dlsch(buff, nof_bytes); - } else { - Error("Pushed buffer for invalid PID=%d\n", pid); - } +void demux::push_pdu(uint8_t *buff, uint32_t nof_bytes, uint32_t tstamp) { + return pdus.push(buff, nof_bytes, tstamp); +} + +/* Demultiplexing of MAC PDU associated with SI-RNTI. The PDU passes through +* the MAC in transparent mode. +* Warning: In this case function sends the message to RLC now, since SI blocks do not +* require ACK feedback to be transmitted quickly. +*/ +void demux::push_pdu_bcch(uint8_t *buff, uint32_t nof_bytes, uint32_t tstamp) { + rlc->write_pdu_bcch_dlsch(buff, nof_bytes); } bool demux::process_pdus() diff --git a/srsue/src/mac/mac.cc b/srsue/src/mac/mac.cc index b54ea5be6..f8d10bb34 100644 --- a/srsue/src/mac/mac.cc +++ b/srsue/src/mac/mac.cc @@ -44,7 +44,6 @@ namespace srsue { mac::mac() : ttisync(10240), timers(64), mux_unit(MAC_NOF_HARQ_PROC), - demux_unit(SRSLTE_MAX_TB*MAC_NOF_HARQ_PROC), pdu_process_thread(&demux_unit) { started = false; @@ -118,7 +117,8 @@ void mac::reset() Info("Resetting MAC\n"); - timers.stop_all(); + timers.get(timer_alignment)->stop(); + timers.get(contention_resolution_timer)->stop(); ul_harq.reset_ndi(); @@ -417,7 +417,7 @@ void mac::get_metrics(mac_metrics_t &m) metrics.rx_pkts?((float) 100*metrics.rx_errors/metrics.rx_pkts):0.0, dl_harq.get_average_retx(), metrics.tx_pkts?((float) 100*metrics.tx_errors/metrics.tx_pkts):0.0, - dl_harq.get_average_retx()); + ul_harq.get_average_retx()); metrics.ul_buffer = (int) bsr_procedure.get_buffer_state(); m = metrics; diff --git a/srsue/src/mac/mux.cc b/srsue/src/mac/mux.cc index 5ab58fb50..e38300b87 100644 --- a/srsue/src/mac/mux.cc +++ b/srsue/src/mac/mux.cc @@ -349,14 +349,19 @@ bool mux::msg3_is_transmitted() /* Returns a pointer to the Msg3 buffer */ uint8_t* mux::msg3_get(uint8_t *payload, uint32_t pdu_sz) { - uint8_t* msg3_buff_start_pdu = pdu_get(msg3_buff, pdu_sz, 0, 0); - if (!msg3_buff_start_pdu) { - Error("Moving PDU from Mux unit to Msg3 buffer\n"); + if (pdu_sz < MSG3_BUFF_SZ - 32) { + uint8_t* msg3_buff_start_pdu = pdu_get(msg3_buff, pdu_sz, 0, 0); + if (!msg3_buff_start_pdu) { + Error("Moving PDU from Mux unit to Msg3 buffer\n"); + return NULL; + } + memcpy(payload, msg3_buff_start_pdu, sizeof(uint8_t)*pdu_sz); + msg3_has_been_transmitted = true; + return payload; + } else { + Error("Msg3 size (%d) is longer than internal msg3_buff size=%d, (see mux.h)\n", pdu_sz, MSG3_BUFF_SZ-32); return NULL; - } - memcpy(payload, msg3_buff_start_pdu, sizeof(uint8_t)*pdu_sz); - msg3_has_been_transmitted = true; - return payload; + } } diff --git a/srsue/src/mac/proc_bsr.cc b/srsue/src/mac/proc_bsr.cc index 898943ab9..43694c1bc 100644 --- a/srsue/src/mac/proc_bsr.cc +++ b/srsue/src/mac/proc_bsr.cc @@ -368,7 +368,7 @@ bool bsr_proc::need_to_reset_sr() { bool bsr_proc::need_to_send_sr(uint32_t tti) { if (!sr_is_sent && triggered_bsr_type == REGULAR) { - if (srslte_tti_interval(tti,next_tx_tti)>0 && srslte_tti_interval(tti,next_tx_tti) < 10240-4) { + if (srslte_tti_interval(tti,next_tx_tti)>0 && srslte_tti_interval(tti,next_tx_tti) < 10240-HARQ_DELAY_MS) { reset_sr = false; sr_is_sent = true; Debug("BSR: Need to send sr: sr_is_sent=true, reset_sr=false, tti=%d, next_tx_tti=%d\n", tti, next_tx_tti); diff --git a/srsue/src/main.cc b/srsue/src/main.cc index 569083998..b933b66f3 100644 --- a/srsue/src/main.cc +++ b/srsue/src/main.cc @@ -65,6 +65,8 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { common.add_options() ("rf.dl_earfcn", bpo::value(&args->rf.dl_earfcn)->default_value(3400), "Downlink EARFCN") ("rf.freq_offset", bpo::value(&args->rf.freq_offset)->default_value(0), "(optional) Frequency offset") + ("rf.dl_freq", bpo::value(&args->rf.dl_freq)->default_value(-1), "Downlink Frequency (if positive overrides EARFCN)") + ("rf.ul_freq", bpo::value(&args->rf.ul_freq)->default_value(-1), "Uplink Frequency (if positive overrides EARFCN)") ("rf.rx_gain", bpo::value(&args->rf.rx_gain)->default_value(-1), "Front-end receiver gain") ("rf.tx_gain", bpo::value(&args->rf.tx_gain)->default_value(-1), "Front-end transmitter gain") ("rf.nof_rx_ant", bpo::value(&args->rf.nof_rx_ant)->default_value(1), "Number of RX antennas") @@ -120,6 +122,10 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { /* Expert section */ + ("expert.ip_netmask", + bpo::value(&args->expert.ip_netmask)->default_value("255.255.255.0"), + "Netmask of the tun_srsue device") + ("expert.phy.worker_cpu_mask", bpo::value(&args->expert.phy.worker_cpu_mask)->default_value(-1), "cpu bit mask (eg 255 = 1111 1111)") @@ -196,6 +202,11 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { bpo::value(&args->expert.phy.cfo_correct_tol_hz)->default_value(50.0), "Tolerance (in Hz) for digial CFO compensation.") + ("expert.cfo_ema", + bpo::value(&args->expert.phy.cfo_ema)->default_value(0.4), + "CFO Exponential Moving Average coefficient. Lower makes it more robust to noise " + "but vulnerable to periodic interruptions due to VCO corrections.") + ("expert.time_correct_period", bpo::value(&args->expert.phy.time_correct_period)->default_value(5), "Period for sampling time offset correction.") diff --git a/srsue/src/phy/phch_common.cc b/srsue/src/phy/phch_common.cc index 4a2f8a441..be06b8e9c 100644 --- a/srsue/src/phy/phch_common.cc +++ b/srsue/src/phy/phch_common.cc @@ -136,12 +136,14 @@ srslte::radio* phch_common::get_radio() void phch_common::set_rar_grant(uint32_t tti, uint8_t grant_payload[SRSLTE_RAR_GRANT_LEN]) { srslte_dci_rar_grant_unpack(&rar_grant, grant_payload); - rar_grant_pending = true; - // PUSCH is at n+6 or n+7 and phch_worker assumes default delay of 4 ttis + rar_grant_pending = true; + if (MSG3_DELAY_MS < 0) { + fprintf(stderr, "Error MSG3_DELAY_MS can't be negative\n"); + } if (rar_grant.ul_delay) { - rar_grant_tti = (tti + 3) % 10240; + rar_grant_tti = (tti + MSG3_DELAY_MS + 1) % 10240; } else { - rar_grant_tti = (tti + 2) % 10240; + rar_grant_tti = (tti + MSG3_DELAY_MS) % 10240; } } @@ -195,13 +197,13 @@ void phch_common::set_dl_rnti(srslte_rnti_type_t type, uint16_t rnti_value, int } void phch_common::reset_pending_ack(uint32_t tti) { - pending_ack[tti%10].enabled = false; + pending_ack[TTIMOD(tti)].enabled = false; } void phch_common::set_pending_ack(uint32_t tti, uint32_t I_lowest, uint32_t n_dmrs) { - pending_ack[tti%10].enabled = true; - pending_ack[tti%10].I_lowest = I_lowest; - pending_ack[tti%10].n_dmrs = n_dmrs; + pending_ack[TTIMOD(tti)].enabled = true; + pending_ack[TTIMOD(tti)].I_lowest = I_lowest; + pending_ack[TTIMOD(tti)].n_dmrs = n_dmrs; Debug("Set pending ACK for tti=%d I_lowest=%d, n_dmrs=%d\n", tti, I_lowest, n_dmrs); } @@ -211,12 +213,12 @@ bool phch_common::get_pending_ack(uint32_t tti) { bool phch_common::get_pending_ack(uint32_t tti, uint32_t *I_lowest, uint32_t *n_dmrs) { if (I_lowest) { - *I_lowest = pending_ack[tti%10].I_lowest; + *I_lowest = pending_ack[TTIMOD(tti)].I_lowest; } if (n_dmrs) { - *n_dmrs = pending_ack[tti%10].n_dmrs; + *n_dmrs = pending_ack[TTIMOD(tti)].n_dmrs; } - return pending_ack[tti%10].enabled; + return pending_ack[TTIMOD(tti)].enabled; } /* The transmisison of UL subframes must be in sequence. Each worker uses this function to indicate @@ -334,6 +336,7 @@ void phch_common::reset_ul() pthread_mutex_trylock(&tx_mutex[i]); pthread_mutex_unlock(&tx_mutex[i]); } + radio_h->tx_end(); } } diff --git a/srsue/src/phy/phch_recv.cc b/srsue/src/phy/phch_recv.cc index c14141257..8a69a7f76 100644 --- a/srsue/src/phy/phch_recv.cc +++ b/srsue/src/phy/phch_recv.cc @@ -63,6 +63,8 @@ double callback_set_rx_gain(void *h, double gain) { phch_recv::phch_recv() { + dl_freq = -1; + ul_freq = -1; bzero(&cell, sizeof(srslte_cell_t)); running = false; } @@ -101,13 +103,13 @@ void phch_recv:: init(srslte::radio_multi *_radio_handler, mac_interface_phy *_ if (do_agc) { srslte_ue_sync_start_agc(&cs.ue_sync, callback_set_rx_gain, last_gain); } - - if (srslte_ue_dl_init(&ue_dl_measure, SRSLTE_MAX_PRB, nof_rx_antennas)) { + + if (srslte_ue_dl_init(&ue_dl_measure, sf_buffer, SRSLTE_MAX_PRB, nof_rx_antennas)) { Error("SYNC: Initiating ue_dl_measure\n"); return; } - if (srslte_ue_mib_init(&ue_mib, SRSLTE_MAX_PRB)) { + if (srslte_ue_mib_init(&ue_mib, sf_buffer, SRSLTE_MAX_PRB)) { Error("SYNC: Initiating UE MIB decoder\n"); return; } @@ -207,6 +209,7 @@ void phch_recv::set_ue_sync_opts(srslte_ue_sync_t *q) { srslte_ue_sync_cfo_i_detec_en(q, true); } + srslte_ue_sync_set_cfo_ema(q, worker_com->args->cfo_ema); srslte_ue_sync_set_cfo_tol(q, worker_com->args->cfo_correct_tol_hz); int time_correct_period = worker_com->args->time_correct_period; @@ -374,7 +377,7 @@ int phch_recv::cell_sync_sfn(void) { int sfn_offset = 0; Info("SYNC: Trying to decode MIB... SNR=%.1f dB\n", 10*log10(srslte_chest_dl_get_snr(&ue_mib.chest))); - int n = srslte_ue_mib_decode(&ue_mib, sf_buffer[0], bch_payload, NULL, &sfn_offset); + int n = srslte_ue_mib_decode(&ue_mib, bch_payload, NULL, &sfn_offset); if (n < 0) { Error("SYNC: Error decoding MIB while synchronising SFN"); return -1; @@ -445,6 +448,11 @@ void phch_recv::set_earfcn(std::vector earfcn) { this->earfcn = earfcn; } +void phch_recv::force_freq(float dl_freq, float ul_freq) { + this->dl_freq = dl_freq; + this->ul_freq = ul_freq; +} + bool phch_recv::stop_sync() { wait_radio_reset(); @@ -568,17 +576,25 @@ bool phch_recv::cell_select(uint32_t earfcn, srslte_cell_t cell) { bool phch_recv::set_frequency() { - double dl_freq = 1e6*srslte_band_fd(current_earfcn); - double ul_freq = 1e6*srslte_band_fu(srslte_band_ul_earfcn(current_earfcn)); - if (dl_freq > 0 && ul_freq > 0) { + double set_dl_freq = 0; + double set_ul_freq = 0; + + if (this->dl_freq > 0 && this->ul_freq > 0) { + set_dl_freq = this->dl_freq; + set_ul_freq = this->ul_freq; + } else { + set_dl_freq = 1e6*srslte_band_fd(current_earfcn); + set_ul_freq = 1e6*srslte_band_fu(srslte_band_ul_earfcn(current_earfcn)); + } + if (set_dl_freq > 0 && set_ul_freq > 0) { log_h->info("SYNC: Set DL EARFCN=%d, f_dl=%.1f MHz, f_ul=%.1f MHz\n", - current_earfcn, dl_freq / 1e6, ul_freq / 1e6); + current_earfcn, set_dl_freq / 1e6, set_ul_freq / 1e6); log_h->console("Searching cell in DL EARFCN=%d, f_dl=%.1f MHz, f_ul=%.1f MHz\n", - current_earfcn, dl_freq / 1e6, ul_freq / 1e6); + current_earfcn, set_dl_freq / 1e6, set_ul_freq / 1e6); - radio_h->set_rx_freq(dl_freq); - radio_h->set_tx_freq(ul_freq); + radio_h->set_rx_freq(set_dl_freq); + radio_h->set_tx_freq(set_ul_freq); ul_dl_factor = radio_h->get_tx_freq()/radio_h->get_rx_freq(); srslte_ue_sync_reset(&ue_sync); @@ -717,11 +733,11 @@ void phch_recv::run_thread() { worker->set_sample_offset(srslte_ue_sync_get_sfo(&ue_sync)/1000); - /* Compute TX time: Any transmission happens in TTI4 thus advance 4 ms the reception time */ + /* Compute TX time: Any transmission happens in TTI+4 thus advance 4 ms the reception time */ srslte_timestamp_t rx_time, tx_time, tx_time_prach; srslte_ue_sync_get_last_timestamp(&ue_sync, &rx_time); srslte_timestamp_copy(&tx_time, &rx_time); - srslte_timestamp_add(&tx_time, 0, 4e-3 - time_adv_sec); + srslte_timestamp_add(&tx_time, 0, HARQ_DELAY_MS*1e-3 - time_adv_sec); worker->set_tx_time(tx_time, next_offset); next_offset = 0; diff --git a/srsue/src/phy/phch_worker.cc b/srsue/src/phy/phch_worker.cc index 4ccbbae69..1a6c67ef2 100644 --- a/srsue/src/phy/phch_worker.cc +++ b/srsue/src/phy/phch_worker.cc @@ -65,8 +65,7 @@ phch_worker::phch_worker() : tr_exec(10240) cell_initiated = false; pregen_enabled = false; trace_enabled = false; - - reset(); + reset(); } @@ -97,7 +96,7 @@ void phch_worker::reset() bzero(&period_cqi, sizeof(srslte_cqi_periodic_cfg_t)); I_sr = 0; rnti_is_set = false; - rar_cqi_request = false; + rar_cqi_request = false; cfi = 0; } @@ -118,12 +117,12 @@ bool phch_worker::init(uint32_t max_prb, srslte::log *log_h) } } - if (srslte_ue_dl_init(&ue_dl, max_prb, phy->args->nof_rx_ant)) { + if (srslte_ue_dl_init(&ue_dl, signal_buffer, max_prb, phy->args->nof_rx_ant)) { Error("Initiating UE DL\n"); return false; } - if (srslte_ue_ul_init(&ue_ul, max_prb)) { + if (srslte_ue_ul_init(&ue_ul, signal_buffer[0], max_prb)) { Error("Initiating UE UL\n"); return false; } @@ -262,6 +261,13 @@ void phch_worker::work_imp() } } + // Process RAR before UL to enable zero-delay Msg3 + bool rar_delivered = false; + if (HARQ_DELAY_MS == MSG3_DELAY_MS && dl_mac_grant.rnti_type == SRSLTE_RNTI_RAR) { + rar_delivered = true; + phy->mac->tb_decoded(dl_ack[0], 0, dl_mac_grant.rnti_type, dl_mac_grant.pid); + } + // Decode PHICH bool ul_ack = false; bool ul_ack_available = decode_phich(&ul_ack); @@ -282,8 +288,8 @@ void phch_worker::work_imp() set_uci_periodic_cqi(); } - /* TTI offset for UL is always 4 for LTE */ - ul_action.tti_offset = 4; + /* TTI offset for UL */ + ul_action.tti_offset = HARQ_DELAY_MS; /* Send UL grant or HARQ information (from PHICH) to MAC */ if (ul_grant_available && ul_ack_available) { @@ -304,7 +310,7 @@ void phch_worker::work_imp() &ul_action.softbuffers[0], ul_action.rv[0], ul_action.rnti, ul_mac_grant.is_from_rar); signal_ready = true; if (ul_action.expect_ack) { - phy->set_pending_ack(tti + 8, ue_ul.pusch_cfg.grant.n_prb_tilde[0], ul_action.phy_grant.ul.ncs_dmrs); + phy->set_pending_ack(TTI_RX_ACK(tti), ue_ul.pusch_cfg.grant.n_prb_tilde[0], ul_action.phy_grant.ul.ncs_dmrs); } } else if (dl_action.generate_ack || uci_data.scheduling_request || uci_data.uci_cqi_len > 0 || uci_data.uci_ri_len > 0) { @@ -326,7 +332,7 @@ void phch_worker::work_imp() if (!dl_action.generate_ack_callback) { if (dl_mac_grant.rnti_type == SRSLTE_RNTI_PCH && dl_action.decode_enabled[0]) { phy->mac->pch_decoded_ok(dl_mac_grant.n_bytes[0]); - } else { + } else if (!rar_delivered) { for (uint32_t tb = 0; tb < SRSLTE_MAX_TB; tb++) { if (dl_action.decode_enabled[tb]) { phy->mac->tb_decoded(dl_ack[tb], tb, dl_mac_grant.rnti_type, dl_mac_grant.pid); @@ -476,10 +482,20 @@ bool phch_worker::decode_pdcch_dl(srsue::mac_interface_phy::mac_grant_t* grant) return false; } + grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:(tti%(2*HARQ_DELAY_MS)); + + // Set last TBS for this TB (pid) in case of mcs>29 (7.1.7.2 of 36.213) + for (int i=0;iphy_grant.dl.mcs[i].tbs < 0) { + grant->phy_grant.dl.mcs[i].tbs = last_dl_tbs[grant->pid%(2*HARQ_DELAY_MS)][i]; + } + // save it + last_dl_tbs[grant->pid%(2*HARQ_DELAY_MS)][i] = grant->phy_grant.dl.mcs[i].tbs; + } + /* Fill MAC grant structure */ grant->ndi[0] = dci_unpacked.ndi; grant->ndi[1] = dci_unpacked.ndi_1; - grant->pid = dci_unpacked.harq_process; grant->n_bytes[0] = grant->phy_grant.dl.mcs[0].tbs / (uint32_t) 8; grant->n_bytes[1] = grant->phy_grant.dl.mcs[1].tbs / (uint32_t) 8; grant->tti = tti; @@ -623,6 +639,7 @@ int phch_worker::decode_pdsch(srslte_ra_dl_grant_t *grant, uint8_t *payload[SRSL snprintf(commonstr, 128, "PDSCH: l_crb=%2d, harq=%d, snr=%.1f dB, tx_scheme=%s%s", grant->nof_prb, harq_pid, 10 * log10(srslte_chest_dl_get_snr(&ue_dl.chest)), srslte_mimotype2str(mimo_type), pinfo_str); + for (int i=0;itb_en[i]) { snprintf(tbstr[i], 128, ", TB%d: tbs=%d, mcs=%d, rv=%d, crc=%s, it=%d", @@ -671,7 +688,7 @@ bool phch_worker::decode_pdcch_ul(mac_interface_phy::mac_grant_t* grant) char timestr[64]; timestr[0]='\0'; - phy->reset_pending_ack(tti + 8); + phy->reset_pending_ack(TTI_RX_ACK(tti)); srslte_dci_msg_t dci_msg; srslte_ra_ul_dci_t dci_unpacked; @@ -720,14 +737,29 @@ bool phch_worker::decode_pdcch_ul(mac_interface_phy::mac_grant_t* grant) ue_dl.last_location_ul.ncce, (1<phy_grant.ul.mcs.tbs==0) { - srslte_vec_fprint_hex(stdout, dci_msg.data, dci_msg.nof_bits); + Info("Received PUSCH grant with empty data\n"); } } } - + + if (ret) { + + // Use last TBS for this TB in case of mcs>28 + if (grant->phy_grant.ul.mcs.tbs < 0) { + grant->phy_grant.ul.mcs.tbs = last_ul_tbs[tti%(2*HARQ_DELAY_MS)]; + } + last_ul_tbs[tti%(2*HARQ_DELAY_MS)] = grant->phy_grant.ul.mcs.tbs; + + if (grant->phy_grant.ul.mcs.mod == SRSLTE_MOD_LAST) { + grant->phy_grant.ul.mcs.mod = last_ul_mod[tti%(2*HARQ_DELAY_MS)]; + grant->phy_grant.ul.Qm = srslte_mod_bits_x_symbol(grant->phy_grant.ul.mcs.mod); + } + last_ul_mod[tti%(2*HARQ_DELAY_MS)] = grant->phy_grant.ul.mcs.mod; + } + /* Limit UL modulation if not supported by the UE or disabled by higher layers */ if (!phy->config->enable_64qam) { - if (grant->phy_grant.ul.mcs.mod == SRSLTE_MOD_64QAM) { + if (grant->phy_grant.ul.mcs.mod >= SRSLTE_MOD_64QAM) { grant->phy_grant.ul.mcs.mod = SRSLTE_MOD_16QAM; grant->phy_grant.ul.Qm = 4; } @@ -784,7 +816,7 @@ void phch_worker::set_uci_sr() { uci_data.scheduling_request = false; if (phy->sr_enabled) { - uint32_t sr_tx_tti = (tti+4)%10240; + uint32_t sr_tx_tti = TTI_TX(tti); // Get I_sr parameter if (srslte_ue_ul_sr_send_tti(I_sr, sr_tx_tti)) { Info("PUCCH: SR transmission at TTI=%d, I_sr=%d\n", sr_tx_tti, I_sr); @@ -801,7 +833,7 @@ void phch_worker::set_uci_periodic_cqi() int cqi_max = phy->args->cqi_max; if (period_cqi.configured && rnti_is_set) { - if (period_cqi.ri_idx_present && srslte_ri_send(period_cqi.pmi_idx, period_cqi.ri_idx, (tti+4)%10240)) { + if (period_cqi.ri_idx_present && srslte_ri_send(period_cqi.pmi_idx, period_cqi.ri_idx, TTI_TX(tti))) { /* If the RI is not computed, compute it */ if (uci_data.uci_ri_len < 1) { compute_ri(); @@ -810,7 +842,7 @@ void phch_worker::set_uci_periodic_cqi() uci_data.uci_dif_cqi_len = 0; uci_data.uci_pmi_len = 0; Info("PUCCH: Periodic RI=%d\n", uci_data.uci_ri); - } else if (srslte_cqi_send(period_cqi.pmi_idx, (tti+4)%10240)) { + } else if (srslte_cqi_send(period_cqi.pmi_idx, TTI_TX(tti))) { srslte_cqi_value_t cqi_report; if (period_cqi.format_is_subband) { // TODO: Implement subband periodic reports @@ -886,8 +918,8 @@ void phch_worker::set_uci_aperiodic_cqi() bool phch_worker::srs_is_ready_to_send() { if (srs_cfg.configured) { - if (srslte_refsignal_srs_send_cs(srs_cfg.subframe_config, (tti+4)%10) == 1 && - srslte_refsignal_srs_send_ue(srs_cfg.I_srs, (tti+4)%10240) == 1) + if (srslte_refsignal_srs_send_cs(srs_cfg.subframe_config, TTI_TX(tti)%10) == 1 && + srslte_refsignal_srs_send_ue(srs_cfg.I_srs, TTI_TX(tti)) == 1) { return true; } @@ -907,10 +939,10 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui char timestr[64]; timestr[0]='\0'; - if (srslte_ue_ul_cfg_grant(&ue_ul, grant, (tti+4)%10240, rv, current_tx_nb)) { + if (srslte_ue_ul_cfg_grant(&ue_ul, grant, TTI_TX(tti), rv, current_tx_nb)) { Error("Configuring UL grant\n"); } - + if (srslte_ue_ul_pusch_encode_rnti_softbuffer(&ue_ul, payload, uci_data, softbuffer, @@ -937,12 +969,12 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui #endif Info("PUSCH: tti_tx=%d, alloc=(%d,%d), tbs=%d, mcs=%d, rv=%d, ack=%s, ri=%s, cfo=%.1f KHz%s\n", - (tti+4)%10240, - grant->n_prb[0], grant->n_prb[0]+grant->L_prb, - grant->mcs.tbs/8, grant->mcs.idx, rv, - uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", - uci_data.uci_ri_len>0?(uci_data.uci_ri?"1":"0"):"no", - cfo*15, timestr); + (tti+HARQ_DELAY_MS)%10240, + grant->n_prb[0], grant->n_prb[0]+grant->L_prb, + grant->mcs.tbs/8, grant->mcs.idx, rv, + uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", + uci_data.uci_ri_len>0?(uci_data.uci_ri?"1":"0"):"no", + cfo*15, timestr); // Store metrics ul_metrics.mcs = grant->mcs.idx; @@ -968,7 +1000,7 @@ void phch_worker::encode_pucch() gettimeofday(&t[1], NULL); #endif - if (srslte_ue_ul_pucch_encode(&ue_ul, uci_data, last_dl_pdcch_ncce, (tti+4)%10240, signal_buffer[0])) { + if (srslte_ue_ul_pucch_encode(&ue_ul, uci_data, last_dl_pdcch_ncce, TTI_TX(tti), signal_buffer[0])) { Error("Encoding PUCCH\n"); } @@ -993,7 +1025,7 @@ void phch_worker::encode_pucch() uci_data.uci_pmi_len>0?(uci_data.uci_pmi[0]?"1":"0"):"", uci_data.scheduling_request?"yes":"no", cfo*15, timestr); - } + } if (uci_data.scheduling_request) { phy->sr_enabled = false; @@ -1005,7 +1037,7 @@ void phch_worker::encode_srs() char timestr[64]; timestr[0]='\0'; - if (srslte_ue_ul_srs_encode(&ue_ul, (tti+4)%10240, signal_buffer[0])) + if (srslte_ue_ul_srs_encode(&ue_ul, TTI_TX(tti), signal_buffer[0])) { Error("Encoding SRS\n"); } @@ -1020,7 +1052,7 @@ void phch_worker::encode_srs() float gain = set_power(tx_power); uint32_t fi = srslte_vec_max_fi((float*) signal_buffer, SRSLTE_SF_LEN_PRB(cell.nof_prb)); float *f = (float*) signal_buffer; - Info("SRS: power=%.2f dBm, tti_tx=%d%s\n", tx_power, (tti+4)%10240, timestr); + Info("SRS: power=%.2f dBm, tti_tx=%d%s\n", tx_power, TTI_TX(tti), timestr); } diff --git a/srsue/src/phy/phy.cc b/srsue/src/phy/phy.cc index e74107661..9760ab374 100644 --- a/srsue/src/phy/phy.cc +++ b/srsue/src/phy/phy.cc @@ -200,8 +200,8 @@ void phy::set_timeadv_rar(uint32_t ta_cmd) { void phy::set_timeadv(uint32_t ta_cmd) { n_ta = srslte_N_ta_new(n_ta, ta_cmd); - //sf_recv.set_time_adv_sec(((float) n_ta)*SRSLTE_LTE_TS); - Warning("Not supported: Set TA: ta_cmd: %d, n_ta: %d, ta_usec: %.1f\n", ta_cmd, n_ta, ((float) n_ta)*SRSLTE_LTE_TS*1e6); + sf_recv.set_time_adv_sec(((float) n_ta)*SRSLTE_LTE_TS); + //Warning("Not supported: Set TA: ta_cmd: %d, n_ta: %d, ta_usec: %.1f\n", ta_cmd, n_ta, ((float) n_ta)*SRSLTE_LTE_TS*1e6); } void phy::configure_prach_params() @@ -308,7 +308,8 @@ void phy::reset() pdcch_dl_search_reset(); for(uint32_t i=0;i earfcns) sf_recv.set_earfcn(earfcns); } +void phy::force_freq(float dl_freq, float ul_freq) +{ + sf_recv.force_freq(dl_freq, ul_freq); +} + bool phy::sync_status() { return sf_recv.status_is_sync(); diff --git a/srsue/src/ue.cc b/srsue/src/ue.cc index 92adaf8dd..a99281e6f 100644 --- a/srsue/src/ue.cc +++ b/srsue/src/ue.cc @@ -185,6 +185,8 @@ bool ue::init(all_args_t *args_) nas.init(&usim, &rrc, &gw, &nas_log, 1 /* RB_ID_SRB1 */); gw.init(&pdcp, &nas, &gw_log, 3 /* RB_ID_DRB1 */); + gw.set_netmask(args->expert.ip_netmask); + rrc.init(&phy, &mac, &rlc, &pdcp, &nas, &usim, &mac, &rrc_log); rrc.set_ue_category(atoi(args->expert.ue_cateogry.c_str())); @@ -193,6 +195,10 @@ bool ue::init(all_args_t *args_) earfcn_list.push_back(args->rf.dl_earfcn); phy.set_earfcn(earfcn_list); + if (args->rf.dl_freq > 0 && args->rf.ul_freq > 0) { + phy.force_freq(args->rf.dl_freq, args->rf.ul_freq); + } + printf("Waiting PHY to initialize...\n"); phy.wait_initialize(); phy.configure_ul_params(); diff --git a/srsue/src/upper/gw.cc b/srsue/src/upper/gw.cc index 07ac36989..1e3e81999 100644 --- a/srsue/src/upper/gw.cc +++ b/srsue/src/upper/gw.cc @@ -44,6 +44,7 @@ gw::gw() :if_up(false) { current_ip_addr = 0; + default_netmask = true; } void gw::init(pdcp_interface_gw *pdcp_, nas_interface_gw *nas_, srslte::log *gw_log_, uint32_t lcid_) @@ -104,6 +105,12 @@ void gw::get_metrics(gw_metrics_t &m) ul_tput_bytes = 0; } +void gw::set_netmask(std::string netmask) { + default_netmask = false; + this->netmask = netmask; +} + + /******************************************************************************* PDCP interface *******************************************************************************/ @@ -152,7 +159,11 @@ srslte::error_t gw::setup_if_addr(uint32_t ip_addr, char *err_str) return(srslte::ERROR_CANT_START); } ifr.ifr_netmask.sa_family = AF_INET; - ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr = inet_addr("255.255.255.0"); + const char *mask = "255.255.255.0"; + if (!default_netmask) { + mask = netmask.c_str(); + } + ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr = inet_addr(mask); if(0 > ioctl(sock, SIOCSIFNETMASK, &ifr)) { err_str = strerror(errno); diff --git a/srsue/src/upper/nas.cc b/srsue/src/upper/nas.cc index f0fd8cf54..110c8c78e 100644 --- a/srsue/src/upper/nas.cc +++ b/srsue/src/upper/nas.cc @@ -281,7 +281,6 @@ void nas::parse_attach_accept(uint32_t lcid, byte_buffer_t *pdu) { LIBLTE_MME_ACTIVATE_DEFAULT_EPS_BEARER_CONTEXT_ACCEPT_MSG_STRUCT act_def_eps_bearer_context_accept; nas_log->info("Received Attach Accept\n"); - count_dl++; liblte_mme_unpack_attach_accept_msg((LIBLTE_BYTE_MSG_STRUCT *) pdu, &attach_accept); @@ -359,6 +358,8 @@ void nas::parse_attach_accept(uint32_t lcid, byte_buffer_t *pdu) { state = EMM_STATE_REGISTERED; current_plmn = selecting_plmn; + count_dl++; + // Send EPS bearer context accept and attach complete count_ul++; act_def_eps_bearer_context_accept.eps_bearer_id = eps_bearer_id; @@ -437,6 +438,9 @@ void nas::parse_authentication_request(uint32_t lcid, byte_buffer_t *pdu) { nas_log->console("Warning: Network authentication failure\n"); pool->deallocate(pdu); } + + // Reset DL counter (as per 24.301 5.4.3.2) + count_dl = 0; } void nas::parse_authentication_reject(uint32_t lcid, byte_buffer_t *pdu) { @@ -539,6 +543,8 @@ void nas::parse_security_mode_command(uint32_t lcid, byte_buffer_t *pdu) { } } + count_dl++; + if (!success) { // Reuse pdu for response pdu->reset(); diff --git a/srsue/src/upper/rrc.cc b/srsue/src/upper/rrc.cc index 679cd29ee..5806d200a 100644 --- a/srsue/src/upper/rrc.cc +++ b/srsue/src/upper/rrc.cc @@ -35,8 +35,6 @@ #include "srslte/common/security.h" #include "srslte/common/bcd_helpers.h" -#define TIMEOUT_RESYNC_REESTABLISH 100 - using namespace srslte; namespace srsue { @@ -92,6 +90,8 @@ void rrc::init(phy_interface_rrc *phy_, pthread_mutex_init(&mutex, NULL); + reestablishment_in_progress = false; + ue_category = SRSLTE_UE_CATEGORY; t301 = mac_timers->timer_get_unique_id(); t310 = mac_timers->timer_get_unique_id(); @@ -207,7 +207,11 @@ void rrc::run_thread() { break; case RRC_STATE_CELL_SELECTED: rrc_log->info("RRC Cell Selected: Sending connection request...\n"); - send_con_request(); + if (reestablishment_in_progress) { + con_restablish_cell_reselected(); + } else { + send_con_request(); + } state = RRC_STATE_CONNECTING; connecting_timeout = 0; break; @@ -226,6 +230,7 @@ void rrc::run_thread() { usleep(60000); rrc_log->info("Leaving RRC_CONNECTED state\n"); drb_up = false; + reestablishment_in_progress = false; pdcp->reset(); rlc->reset(); phy->reset(); @@ -233,7 +238,6 @@ void rrc::run_thread() { set_phy_default(); set_mac_default(); mac->pcch_start_rx(); - mac_timers->timer_get(t311)->run(); mac_timers->timer_get(t310)->stop(); mac_timers->timer_get(t311)->stop(); state = RRC_STATE_IDLE; @@ -485,12 +489,12 @@ void rrc::earfcn_end() { // Detection of physical layer problems (5.3.11.1) void rrc::out_of_sync() { - current_cell->in_sync = false; + current_cell->in_sync = false; if (!mac_timers->timer_get(t311)->is_running() && !mac_timers->timer_get(t310)->is_running()) { n310_cnt++; if (n310_cnt == N310) { // attempt resync - phy->sync_reset(); + //phy->sync_reset(); mac_timers->timer_get(t310)->reset(); mac_timers->timer_get(t310)->run(); @@ -664,6 +668,8 @@ void rrc::send_con_restablish_request() { ul_ccch_msg.msg.rrc_con_reest_req.cause = LIBLTE_RRC_CON_REEST_REQ_CAUSE_OTHER_FAILURE; liblte_rrc_pack_ul_ccch_msg(&ul_ccch_msg, (LIBLTE_BIT_MSG_STRUCT *) &bit_buf); + reestablishment_in_progress = true; + rrc_log->info("Initiating RRC Connection Reestablishment Procedure\n"); rrc_log->console("RRC Connection Reestablishment\n"); mac_timers->timer_get(t310)->stop(); @@ -674,19 +680,16 @@ void rrc::send_con_restablish_request() { set_phy_default(); mac->reset(); set_mac_default(); +} - // FIXME: Cell selection should be different?? - - // Wait for cell re-synchronization - uint32_t timeout_cnt = 0; - while (!phy->sync_status() && timeout_cnt < TIMEOUT_RESYNC_REESTABLISH) { - usleep(10000); - timeout_cnt++; - } +// Actions following cell reselection 5.3.7.3 +void rrc::con_restablish_cell_reselected() +{ + reestablishment_in_progress = false; + rrc_log->info("Cell Selection finished. Initiating transmission of RRC Connection Reestablishment Request\n"); mac_timers->timer_get(t301)->reset(); mac_timers->timer_get(t301)->run(); mac_timers->timer_get(t311)->stop(); - rrc_log->info("Cell Selection finished. Initiating transmission of RRC Connection Reestablishment Request\n"); // Byte align and pack the message bits for PDCP if ((bit_buf.N_bits % 8) != 0) { diff --git a/srsue/src/upper/usim.cc b/srsue/src/upper/usim.cc index 9be69383e..e1d2c204f 100644 --- a/srsue/src/upper/usim.cc +++ b/srsue/src/upper/usim.cc @@ -105,20 +105,18 @@ void usim::stop() void usim::get_imsi_vec(uint8_t* imsi_, uint32_t n) { - if (!initiated) - { - usim_log->error("Getting IMSI: USIM not initiated\n"); + if (!initiated) { + fprintf(stderr, "USIM not initiated!\n"); return; } - if(NULL == imsi_ || n < 15) - { + + if(NULL == imsi_ || n < 15) { usim_log->error("Invalid parameters to get_imsi_vec"); return; } uint64_t temp = imsi; - for(int i=14;i>=0;i--) - { + for(int i=14;i>=0;i--) { imsi_[i] = temp % 10; temp /= 10; } @@ -126,13 +124,12 @@ void usim::get_imsi_vec(uint8_t* imsi_, uint32_t n) void usim::get_imei_vec(uint8_t* imei_, uint32_t n) { - if (!initiated) - { - usim_log->error("Getting IMEI: USIM not initiated\n"); + if (!initiated) { + fprintf(stderr, "USIM not initiated!\n"); return; } - if(NULL == imei_ || n < 15) - { + + if(NULL == imei_ || n < 15) { usim_log->error("Invalid parameters to get_imei_vec"); return; } @@ -147,9 +144,8 @@ void usim::get_imei_vec(uint8_t* imei_, uint32_t n) int usim::get_home_plmn_id(LIBLTE_RRC_PLMN_IDENTITY_STRUCT *home_plmn_id) { - if (!initiated) - { - usim_log->error("Getting Home PLMN Id: USIM not initiated\n"); + if (!initiated) { + fprintf(stderr, "USIM not initiated!\n"); return -1; } diff --git a/srsue/ue.conf.example b/srsue/ue.conf.example index 30c05069a..5b3559ae9 100644 --- a/srsue/ue.conf.example +++ b/srsue/ue.conf.example @@ -9,6 +9,8 @@ # rx_gain: Optional receive gain (dB). If disabled, AGC if enabled # # Optional parameters: +# dl_freq: Override DL frequency corresponding to dl_earfcn +# ul_freq: Override UL frequency corresponding to dl_earfcn # nof_rx_ant: Number of RX antennas (Default 1, supported 1 or 2) # device_name: Device driver family. Supported options: "auto" (uses first found), "UHD" or "bladeRF" # device_args: Arguments for the device driver. Options are "auto" or any string. @@ -96,7 +98,11 @@ enable = false ##################################################################### # Expert configuration options # -# ue_category: Sets UE category (range 1-5). Default: 4 +# ue_category: Sets UE category (range 1-5). Default: 4 +# ip_netmask: Netmask of the tun_srsue device. Default: 255.255.255.0 +# rssi_sensor_enabled: Enable or disable RF frontend RSSI sensor. Required for RSRP metrics but +# can cause UHD instability for long-duration testing. Default true. +# ue_category: Sets UE category (range 1-5). Default: 4 # # prach_gain: PRACH gain (dB). If defined, forces a gain for the tranmsission of PRACH only., # Default is to use tx_gain in [rf] section. @@ -113,7 +119,9 @@ enable = false # nof_phy_threads: Selects the number of PHY threads (maximum 4, minimum 1, default 2) # equalizer_mode: Selects equalizer mode. Valid modes are: "mmse", "zf" or any # non-negative real number to indicate a regularized zf coefficient. -# Default is MMSE. +# Default is MMSE. +# cfo_ema: CFO Exponential Moving Average coefficient. Lower makes it more robust to noise +# but vulnerable to periodic interruptions due to VCO corrections. # cfo_integer_enabled: Enables integer CFO estimation and correction. This needs improvement # and may lead to incorrect synchronization. Use with caution. # cfo_correct_tol_hz: Tolerance (in Hz) for digial CFO compensation. Lower tolerance means that @@ -136,6 +144,8 @@ enable = false # ##################################################################### [expert] +#ip_netmask = 255.255.255.0 +#rssi_sensor_enabled = false #ue_category = 4 #prach_gain = 30 #cqi_max = 15 @@ -146,6 +156,7 @@ enable = false #attach_enable_64qam = false #nof_phy_threads = 2 #equalizer_mode = mmse +#cfo_ema = 0.4 #cfo_integer_enabled = false #cfo_correct_tol_hz = 50 #time_correct_period = 5