diff --git a/CMakeLists.txt b/CMakeLists.txt index 0788ec335..ccc1ce54d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,7 +75,8 @@ option(ENABLE_HARDSIM "Enable support for SIM cards" ON) option(BUILD_STATIC "Attempt to statically link external deps" OFF) option(RPATH "Enable RPATH" OFF) -option(ENABLE_ASAN "Enable gcc address sanitizer" OFF) +option(ENABLE_ASAN "Enable gcc/clang address sanitizer" OFF) +option(ENABLE_MSAN "Enable clang memory sanitizer" OFF) option(USE_LTE_RATES "Use standard LTE sampling rates" OFF) @@ -279,8 +280,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ggdb -DBUILD_TYPE_RELWITHDEBINFO") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -DBUILD_TYPE_RELWITHDEBINFO") else(${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -DBUILD_TYPE_RELEASE") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DBUILD_TYPE_RELEASE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -fno-trapping-math -fno-math-errno -DBUILD_TYPE_RELEASE") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fno-trapping-math -fno-math-errno -DBUILD_TYPE_RELEASE") endif(${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo") endif(${CMAKE_BUILD_TYPE} STREQUAL "Debug") @@ -315,7 +316,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") endif(HAVE_SSE) endif(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=native -DIS_ARM -DHAVE_NEON") message(STATUS "have ARM") @@ -325,17 +325,28 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") endif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") set(CMAKE_REQUIRED_FLAGS ${CMAKE_C_FLAGS}) - if(NOT HAVE_SSE AND NOT HAVE_NEON AND NOT DISABLE_SIMD) - message(FATAL_ERROR "no SIMD instructions found") - endif(NOT HAVE_SSE AND NOT HAVE_NEON AND NOT DISABLE_SIMD) + if(NOT HAVE_SSE AND NOT HAVE_NEON AND NOT DISABLE_SIMD) + message(FATAL_ERROR "no SIMD instructions found") + endif(NOT HAVE_SSE AND NOT HAVE_NEON AND NOT DISABLE_SIMD) if(NOT WIN32) ADD_CXX_COMPILER_FLAG_IF_AVAILABLE(-fvisibility=hidden HAVE_VISIBILITY_HIDDEN) endif(NOT WIN32) - if (ENABLE_ASAN) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") - endif (ENABLE_ASAN) + + if (ENABLE_ASAN AND ENABLE_MSAN) + message(FATAL_ERROR "ASAN and MSAN cannot be enabled at the same time.") + endif (ENABLE_ASAN AND ENABLE_MSAN) + + if (ENABLE_ASAN) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") + endif (ENABLE_ASAN) + + if (ENABLE_MSAN AND CMAKE_C_COMPILER_ID MATCHES "Clang") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=memory -fno-omit-frame-pointer -fPIE -pie") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=memory -fno-omit-frame-pointer -fPIE -pie") + endif (ENABLE_MSAN AND CMAKE_C_COMPILER_ID MATCHES "Clang") + endif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") @@ -346,6 +357,23 @@ endif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") message(STATUS "CMAKE_C_FLAGS is ${CMAKE_C_FLAGS}") message(STATUS "CMAKE_CXX_FLAGS is ${CMAKE_CXX_FLAGS}") + +######################################################################## +# clang-tidy check +######################################################################## +find_program( + CLANG_TIDY_BIN + NAMES "clang-tidy" + DOC "Path to clang-tidy executable" +) +if(NOT CLANG_TIDY_BIN) + message(STATUS "clang-tidy not found.") +else() + message(STATUS "clang-tidy found: ${CLANG_TIDY_BIN}") + set(DO_CLANG_TIDY "${CLANG_TIDY_BIN}" "-checks=*,-clang-analyzer-alpha.*,-modernize-*,-cppcoreguidelines-pro-type-vararg,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-cppcoreguidelines-pro-bounds-constant-array-index") +endif() + + ######################################################################## # Create uninstall targets ######################################################################## diff --git a/cmake/modules/FindSSE.cmake b/cmake/modules/FindSSE.cmake index 4c9673a9d..e5101deff 100644 --- a/cmake/modules/FindSSE.cmake +++ b/cmake/modules/FindSSE.cmake @@ -14,7 +14,7 @@ if (ENABLE_SSE) # # Check compiler for SSE4_1 intrinsics # - if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG ) + if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) set(CMAKE_REQUIRED_FLAGS "-msse4.1") check_c_source_runs(" #include @@ -38,7 +38,7 @@ if (ENABLE_SSE) # # Check compiler for AVX intrinsics # - if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG ) + if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) set(CMAKE_REQUIRED_FLAGS "-mavx") check_c_source_runs(" #include @@ -72,7 +72,7 @@ if (ENABLE_SSE) # # Check compiler for AVX intrinsics # - if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG ) + if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) set(CMAKE_REQUIRED_FLAGS "-mavx2") check_c_source_runs(" #include @@ -106,7 +106,7 @@ if (ENABLE_SSE) # # Check compiler for AVX intrinsics # - if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG ) + if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) set(CMAKE_REQUIRED_FLAGS "-mfma") check_c_source_runs(" #include @@ -141,7 +141,7 @@ if (ENABLE_SSE) # # Check compiler for AVX intrinsics # - if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG ) + if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) set(CMAKE_REQUIRED_FLAGS "-mavx512f") check_c_source_runs(" #include diff --git a/lib/include/srslte/common/bcd_helpers.h b/lib/include/srslte/common/bcd_helpers.h index b145a2d5a..abd596860 100644 --- a/lib/include/srslte/common/bcd_helpers.h +++ b/lib/include/srslte/common/bcd_helpers.h @@ -150,9 +150,9 @@ inline void s1ap_plmn_to_mccmnc(uint32_t plmn, uint16_t *mcc, uint16_t *mnc) *mnc |= nibbles[4]; // MNC digit 3 } else { // 3-digit MNC - *mnc |= nibbles[5] << 8; // MNC digit 1 - *mnc |= nibbles[4] << 4; // MNC digit 2 - *mnc |= nibbles[2] ; // MNC digit 3 + *mnc |= nibbles[2] << 8; // MNC digit 1 + *mnc |= nibbles[5] << 4; // MNC digit 2 + *mnc |= nibbles[4] ; // MNC digit 3 } } @@ -177,9 +177,9 @@ inline void s1ap_mccmnc_to_plmn(uint16_t mcc, uint16_t mnc, uint32_t *plmn) nibbles[4] = (mnc & 0x000F); // MNC digit 3 } else { // 3-digit MNC - nibbles[5] = (mnc & 0x0F00) >> 8; // MNC digit 1 - nibbles[4] = (mnc & 0x00F0) >> 4; // MNC digit 2 - nibbles[2] = (mnc & 0x000F); // MNC digit 3 + nibbles[2] = (mnc & 0x0F00) >> 8; // MNC digit 1 + nibbles[5] = (mnc & 0x00F0) >> 4; // MNC digit 2 + nibbles[4] = (mnc & 0x000F); // MNC digit 3 } *plmn = 0x000000; diff --git a/lib/include/srslte/common/block_queue.h b/lib/include/srslte/common/block_queue.h index 63b0e4177..2e476dd65 100644 --- a/lib/include/srslte/common/block_queue.h +++ b/lib/include/srslte/common/block_queue.h @@ -122,7 +122,7 @@ public: } void clear() { // remove all items - myobj item; + myobj *item = NULL; while (try_pop(item)); } diff --git a/lib/include/srslte/common/common.h b/lib/include/srslte/common/common.h index ed5c9acc1..0f53c4a40 100644 --- a/lib/include/srslte/common/common.h +++ b/lib/include/srslte/common/common.h @@ -122,8 +122,10 @@ public: byte_buffer_t():N_bytes(0) { bzero(buffer, SRSLTE_MAX_BUFFER_SIZE_BYTES); +#ifdef ENABLE_TIMESTAMP timestamp_is_set = false; - msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET]; +#endif + msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET]; next = NULL; #ifdef SRSLTE_BUFFER_POOL_LOG_ENABLED bzero(debug_name, SRSLTE_BUFFER_POOL_LOG_NAME_LEN); @@ -132,6 +134,9 @@ public: byte_buffer_t(const byte_buffer_t& buf) { bzero(buffer, SRSLTE_MAX_BUFFER_SIZE_BYTES); + msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET]; + next = NULL; + // copy actual contents N_bytes = buf.N_bytes; memcpy(msg, buf.msg, N_bytes); } @@ -141,6 +146,8 @@ public: if (&buf == this) return *this; bzero(buffer, SRSLTE_MAX_BUFFER_SIZE_BYTES); + msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET]; + next = NULL; N_bytes = buf.N_bytes; memcpy(msg, buf.msg, N_bytes); return *this; @@ -149,7 +156,9 @@ public: { msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET]; N_bytes = 0; - timestamp_is_set = false; +#ifdef ENABLE_TIMESTAMP + timestamp_is_set = false; +#endif } uint32_t get_headroom() { @@ -183,8 +192,10 @@ public: private: +#ifdef ENABLE_TIMESTAMP struct timeval timestamp[3]; bool timestamp_is_set; +#endif byte_buffer_t *next; }; @@ -199,6 +210,9 @@ struct bit_buffer_t{ bit_buffer_t():N_bits(0) { msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET]; +#ifdef ENABLE_TIMESTAMP + timestamp_is_set = false; +#endif } bit_buffer_t(const bit_buffer_t& buf){ N_bits = buf.N_bits; @@ -216,7 +230,9 @@ struct bit_buffer_t{ { msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET]; N_bits = 0; - timestamp_is_set = false; +#ifdef ENABLE_TIMESTAMP + timestamp_is_set = false; +#endif } uint32_t get_headroom() { @@ -241,10 +257,11 @@ struct bit_buffer_t{ #endif } -private: +private: +#ifdef ENABLE_TIMESTAMP struct timeval timestamp[3]; bool timestamp_is_set; - +#endif }; } // namespace srslte diff --git a/lib/include/srslte/common/int_helpers.h b/lib/include/srslte/common/int_helpers.h new file mode 100644 index 000000000..e6ac48155 --- /dev/null +++ b/lib/include/srslte/common/int_helpers.h @@ -0,0 +1,66 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsUE library. + * + * srsUE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsUE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#ifndef SRSLTE_INT_HELPERS_H +#define SRSLTE_INT_HELPERS_H + +namespace srslte { + +/****************************************************************************** + * Safe conversions between byte buffers and integer types. + * Note: these don't perform endian conversion - use e.g. htonl/ntohl if required + *****************************************************************************/ +inline void uint8_to_uint32(uint8_t *buf, uint32_t *i) +{ + *i = (uint32_t)buf[0] << 24 | + (uint32_t)buf[1] << 16 | + (uint32_t)buf[2] << 8 | + (uint32_t)buf[3]; +} + +inline void uint32_to_uint8(uint32_t i, uint8_t *buf) +{ + buf[0] = (i >> 24) & 0xFF; + buf[1] = (i >> 16) & 0xFF; + buf[2] = (i >> 8) & 0xFF; + buf[3] = i & 0xFF; +} + +inline void uint8_to_uint16(uint8_t *buf, uint16_t *i) +{ + *i = (uint32_t)buf[0] << 8 | + (uint32_t)buf[1]; +} + +inline void uint16_to_uint8(uint16_t i, uint8_t *buf) +{ + buf[0] = (i >> 8) & 0xFF; + buf[1] = i & 0xFF; +} + +}; //namespace + +#endif // SRSLTE_INT_HELPERS_H diff --git a/lib/include/srslte/common/log_filter.h b/lib/include/srslte/common/log_filter.h index 1296fbbbd..e0dbf4b5b 100644 --- a/lib/include/srslte/common/log_filter.h +++ b/lib/include/srslte/common/log_filter.h @@ -54,6 +54,7 @@ public: log_filter(); log_filter(std::string layer); log_filter(std::string layer, logger *logger_, bool tti=false); + ~log_filter(); void init(std::string layer, logger *logger_, bool tti=false); diff --git a/lib/include/srslte/config.h b/lib/include/srslte/config.h index 02c43777f..8ba918d20 100644 --- a/lib/include/srslte/config.h +++ b/lib/include/srslte/config.h @@ -52,6 +52,14 @@ #endif +// Useful macros for templates +#define CONCAT(a, b) a##b +#define CONCAT2(a, b) CONCAT(a,b) + +#define STRING2(x) #x +#define STRING(x) STRING2(x) + + // Common error codes #define SRSLTE_SUCCESS 0 #define SRSLTE_ERROR -1 diff --git a/lib/include/srslte/interfaces/ue_interfaces.h b/lib/include/srslte/interfaces/ue_interfaces.h index 959635afc..2e02050c3 100644 --- a/lib/include/srslte/interfaces/ue_interfaces.h +++ b/lib/include/srslte/interfaces/ue_interfaces.h @@ -240,6 +240,7 @@ public: virtual void reset() = 0; virtual void write_sdu(uint32_t lcid, srslte::byte_buffer_t *sdu, bool blocking = true) = 0; virtual void add_bearer(uint32_t lcid, srslte::srslte_pdcp_config_t cnfg = srslte::srslte_pdcp_config_t()) = 0; + virtual void change_lcid(uint32_t old_lcid, uint32_t new_lcid) = 0; virtual void config_security(uint32_t lcid, uint8_t *k_enc_, uint8_t *k_int_, @@ -273,10 +274,12 @@ class rlc_interface_rrc public: virtual void reset() = 0; virtual void reestablish() = 0; + virtual void reestablish(uint32_t lcid) = 0; virtual void add_bearer(uint32_t lcid) = 0; virtual void add_bearer(uint32_t lcid, srslte::srslte_rlc_config_t cnfg) = 0; virtual void add_bearer_mrb(uint32_t lcid) = 0; virtual void del_bearer(uint32_t lcid) = 0; + virtual void change_lcid(uint32_t old_lcid, uint32_t new_lcid) = 0; }; // RLC interface for PDCP @@ -545,6 +548,7 @@ typedef struct { bool sic_pss_enabled; float rx_gain_offset; bool pdsch_csi_enabled; + bool pdsch_8bit_decoder; uint32_t intra_freq_meas_len_ms; uint32_t intra_freq_meas_period_ms; } phy_args_t; diff --git a/lib/include/srslte/phy/common/sequence.h b/lib/include/srslte/phy/common/sequence.h index 346be1cda..96d18c008 100644 --- a/lib/include/srslte/phy/common/sequence.h +++ b/lib/include/srslte/phy/common/sequence.h @@ -44,6 +44,7 @@ typedef struct SRSLTE_API { uint8_t *c_bytes; float *c_float; short *c_short; + int8_t *c_char; uint32_t cur_len; uint32_t max_len; } srslte_sequence_t; diff --git a/lib/include/srslte/phy/fec/rm_turbo.h b/lib/include/srslte/phy/fec/rm_turbo.h index 26f98944c..51519ae72 100644 --- a/lib/include/srslte/phy/fec/rm_turbo.h +++ b/lib/include/srslte/phy/fec/rm_turbo.h @@ -36,6 +36,7 @@ #define SRSLTE_RM_TURBO_H #include "srslte/config.h" +#include "srslte/phy/fec/turbodecoder.h" #ifndef SRSLTE_RX_NULL #define SRSLTE_RX_NULL 10000 @@ -47,7 +48,6 @@ #include "srslte/config.h" - SRSLTE_API int srslte_rm_turbo_tx(uint8_t *w_buff, uint32_t buff_len, uint8_t *input, @@ -82,7 +82,19 @@ SRSLTE_API int srslte_rm_turbo_rx_lut(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, - uint32_t rv_idx); + uint32_t rv_idx); + +SRSLTE_API int srslte_rm_turbo_rx_lut_(int16_t *input, + int16_t *output, + uint32_t in_len, + uint32_t cb_idx, + uint32_t rv_idx, + bool enable_input_tdec); +SRSLTE_API int srslte_rm_turbo_rx_lut_8bit(int8_t *input, + int8_t *output, + uint32_t in_len, + uint32_t cb_idx, + uint32_t rv_idx); #endif // SRSLTE_RM_TURBO_H diff --git a/lib/include/srslte/phy/fec/tc_interl.h b/lib/include/srslte/phy/fec/tc_interl.h index c0ffaae58..6cb1ad4c9 100644 --- a/lib/include/srslte/phy/fec/tc_interl.h +++ b/lib/include/srslte/phy/fec/tc_interl.h @@ -1,58 +1,59 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -/********************************************************************************************** - * File: tc_interl.h - * - * Description: Turbo code interleaver. - * - * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2.3 - *********************************************************************************************/ - -#ifndef SRSLTE_TC_INTERL_H -#define SRSLTE_TC_INTERL_H - -#include "srslte/config.h" -#include - -typedef struct SRSLTE_API { - uint16_t *forward; - uint16_t *reverse; - uint32_t max_long_cb; -} srslte_tc_interl_t; - -SRSLTE_API int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h, - uint32_t long_cb); - -SRSLTE_API int srslte_tc_interl_UMTS_gen(srslte_tc_interl_t *h, - uint32_t long_cb); - -SRSLTE_API int srslte_tc_interl_init(srslte_tc_interl_t *h, - uint32_t max_long_cb); - -SRSLTE_API void srslte_tc_interl_free(srslte_tc_interl_t *h); - -#endif // SRSLTE_TC_INTERL_H +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +/********************************************************************************************** + * File: tc_interl.h + * + * Description: Turbo code interleaver. + * + * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2.3 + *********************************************************************************************/ + +#ifndef SRSLTE_TC_INTERL_H +#define SRSLTE_TC_INTERL_H + +#include "srslte/config.h" +#include + +typedef struct SRSLTE_API { + uint16_t *forward; + uint16_t *reverse; + uint32_t max_long_cb; +} srslte_tc_interl_t; + +SRSLTE_API int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h, + uint32_t long_cb); + +SRSLTE_API int srslte_tc_interl_LTE_gen_interl(srslte_tc_interl_t *h, + uint32_t long_cb, + uint32_t interl_win); + +SRSLTE_API int srslte_tc_interl_init(srslte_tc_interl_t *h, + uint32_t max_long_cb); + +SRSLTE_API void srslte_tc_interl_free(srslte_tc_interl_t *h); + +#endif // SRSLTE_TC_INTERL_H diff --git a/lib/include/srslte/phy/fec/turbocoder.h b/lib/include/srslte/phy/fec/turbocoder.h index 4da697461..84abf5b28 100644 --- a/lib/include/srslte/phy/fec/turbocoder.h +++ b/lib/include/srslte/phy/fec/turbocoder.h @@ -70,10 +70,12 @@ SRSLTE_API int srslte_tcod_encode(srslte_tcod_t *h, uint32_t long_cb); SRSLTE_API int srslte_tcod_encode_lut(srslte_tcod_t *h, - srslte_crc_t *crc, - uint8_t *input, + srslte_crc_t *crc_tb, + srslte_crc_t *crc_cb, + uint8_t *input, uint8_t *parity, - uint32_t cblen_idx); + uint32_t cblen_idx, + bool last_cb); SRSLTE_API void srslte_tcod_gentable(); diff --git a/lib/include/srslte/phy/fec/turbodecoder.h b/lib/include/srslte/phy/fec/turbodecoder.h index d882118aa..4c4de0082 100644 --- a/lib/include/srslte/phy/fec/turbodecoder.h +++ b/lib/include/srslte/phy/fec/turbodecoder.h @@ -1,120 +1,143 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -/********************************************************************************************** - * File: turbodecoder.h - * - * Description: Turbo Decoder. - * Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent - * encoders and one turbo code internal interleaver. The coding rate of turbo - * encoder is 1/3. - * MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder. - * - * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2 - *********************************************************************************************/ - -#ifndef SRSLTE_TURBODECODER_H -#define SRSLTE_TURBODECODER_H - -#include "srslte/config.h" -#include "srslte/phy/fec/tc_interl.h" -#include "srslte/phy/fec/cbsegm.h" - -#define SRSLTE_TCOD_RATE 3 -#define SRSLTE_TCOD_TOTALTAIL 12 - -#define SRSLTE_TCOD_MAX_LEN_CB 6144 -#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL) - -#include "srslte/phy/fec/turbodecoder_gen.h" -#include "srslte/phy/fec/turbodecoder_simd.h" - -typedef struct SRSLTE_API { - float *input_conv; - union { - srslte_tdec_simd_t tdec_simd; - srslte_tdec_gen_t tdec_gen; - }; -} srslte_tdec_t; - -SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h, - uint32_t max_long_cb); - -SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h); - -SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, - uint32_t long_cb); - -SRSLTE_API int srslte_tdec_reset_cb(srslte_tdec_t * h, - uint32_t cb_idx); - -SRSLTE_API int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h, - uint32_t cb_idx); - -SRSLTE_API uint32_t srslte_tdec_get_nof_parallel(srslte_tdec_t * h); - -SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h, - int16_t* input, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h, - uint8_t *output, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h, - uint8_t *output, - uint32_t long_cb); - -SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h, - int16_t * input, - uint8_t *output, - uint32_t nof_iterations, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_iteration_par(srslte_tdec_t * h, - int16_t* input[SRSLTE_TDEC_MAX_NPAR], - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_decision_par(srslte_tdec_t * h, - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_decision_byte_par(srslte_tdec_t * h, - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h, - uint8_t *output, - uint32_t cb_idx, - uint32_t long_cb); - -SRSLTE_API int srslte_tdec_run_all_par(srslte_tdec_t * h, - int16_t * input[SRSLTE_TDEC_MAX_NPAR], - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t nof_iterations, - uint32_t long_cb); - -#endif // SRSLTE_TURBODECODER_H +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +/********************************************************************************************** + * File: turbodecoder.h + * + * Description: Turbo Decoder. + * Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent + * encoders and one turbo code internal interleaver. The coding rate of turbo + * encoder is 1/3. + * MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder. + * + * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2 + *********************************************************************************************/ + +#ifndef SRSLTE_TURBODECODER_H +#define SRSLTE_TURBODECODER_H + +#include "srslte/config.h" +#include "srslte/phy/fec/tc_interl.h" +#include "srslte/phy/fec/cbsegm.h" + +#define SRSLTE_TCOD_RATE 3 +#define SRSLTE_TCOD_TOTALTAIL 12 + +#define SRSLTE_TCOD_MAX_LEN_CB 6144 + +// Expect the input to be aligned for sub-block window processing. +#define SRSLTE_TDEC_EXPECT_INPUT_SB 1 + +// Include interfaces for 8 and 16 bit decoder implementations +#define LLR_IS_8BIT +#include "srslte/phy/fec/turbodecoder_impl.h" +#undef LLR_IS_8BIT + +#define LLR_IS_16BIT +#include "srslte/phy/fec/turbodecoder_impl.h" +#undef LLR_IS_16BIT + +#define SRSLTE_TDEC_NOF_AUTO_MODES_8 2 +#define SRSLTE_TDEC_NOF_AUTO_MODES_16 3 + +typedef enum {SRSLTE_TDEC_8, SRSLTE_TDEC_16} srslte_tdec_llr_type_t; + +typedef struct SRSLTE_API { + uint32_t max_long_cb; + + void *dec8_hdlr[SRSLTE_TDEC_NOF_AUTO_MODES_8]; + void *dec16_hdlr[SRSLTE_TDEC_NOF_AUTO_MODES_16]; + srslte_tdec_8bit_impl_t *dec8[SRSLTE_TDEC_NOF_AUTO_MODES_8]; + srslte_tdec_16bit_impl_t *dec16[SRSLTE_TDEC_NOF_AUTO_MODES_16]; + int nof_blocks8[SRSLTE_TDEC_NOF_AUTO_MODES_8]; + int nof_blocks16[SRSLTE_TDEC_NOF_AUTO_MODES_16]; + + // Declare as void types as can be int8 or int16 + void *app1; + void *app2; + void *ext1; + void *ext2; + void *syst0; + void *parity0; + void *parity1; + + void *input_conv; + + bool force_not_sb; + + srslte_tdec_impl_type_t dec_type; + + srslte_tdec_llr_type_t current_llr_type; + uint32_t current_dec; + uint32_t current_long_cb; + uint32_t current_inter_idx; + int current_cbidx; + srslte_tc_interl_t interleaver[4][SRSLTE_NOF_TC_CB_SIZES]; + int n_iter; +} srslte_tdec_t; + +SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h, + uint32_t max_long_cb); + +SRSLTE_API int srslte_tdec_init_manual(srslte_tdec_t * h, + uint32_t max_long_cb, + srslte_tdec_impl_type_t dec_type); + +SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h); + +SRSLTE_API void srslte_tdec_force_not_sb(srslte_tdec_t *h); + +SRSLTE_API int srslte_tdec_new_cb(srslte_tdec_t * h, + uint32_t long_cb); + +SRSLTE_API int srslte_tdec_get_nof_iterations(srslte_tdec_t * h); + +SRSLTE_API uint32_t srslte_tdec_autoimp_get_subblocks(uint32_t long_cb); + +SRSLTE_API uint32_t srslte_tdec_autoimp_get_subblocks_8bit(uint32_t long_cb); + +SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h, + int16_t* input, + uint8_t *output); + +SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h, + int16_t * input, + uint8_t *output, + uint32_t nof_iterations, + uint32_t long_cb); + +SRSLTE_API void srslte_tdec_iteration_8bit(srslte_tdec_t * h, + int8_t* input, + uint8_t *output); + +SRSLTE_API int srslte_tdec_run_all_8bit(srslte_tdec_t * h, + int8_t * input, + uint8_t *output, + uint32_t nof_iterations, + uint32_t long_cb); + + +#endif // SRSLTE_TURBODECODER_H diff --git a/lib/include/srslte/phy/fec/turbodecoder_gen.h b/lib/include/srslte/phy/fec/turbodecoder_gen.h index 2fefc22cc..4d61c5e23 100644 --- a/lib/include/srslte/phy/fec/turbodecoder_gen.h +++ b/lib/include/srslte/phy/fec/turbodecoder_gen.h @@ -1,99 +1,62 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -/********************************************************************************************** - * File: turbodecoder.h - * - * Description: Turbo Decoder. - * Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent - * encoders and one turbo code internal interleaver. The coding rate of turbo - * encoder is 1/3. - * MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder. - * - * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2 - *********************************************************************************************/ - -#ifndef SRSLTE_TURBODECODER_GEN_H -#define SRSLTE_TURBODECODER_GEN_H - -#include "srslte/config.h" -#include "srslte/phy/fec/tc_interl.h" -#include "srslte/phy/fec/cbsegm.h" - -#define SRSLTE_TCOD_RATE 3 -#define SRSLTE_TCOD_TOTALTAIL 12 - -#define SRSLTE_TCOD_MAX_LEN_CB 6144 -#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL) - -typedef struct SRSLTE_API { - int max_long_cb; - float *beta; -} srslte_map_gen_vl_t; - -typedef struct SRSLTE_API { - int max_long_cb; - - srslte_map_gen_vl_t dec; - - float *llr1; - float *llr2; - float *w; - float *syst; - float *parity; - - int current_cbidx; - uint32_t current_cb_len; - uint32_t n_iter; - srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES]; -} srslte_tdec_gen_t; - -SRSLTE_API int srslte_tdec_gen_init(srslte_tdec_gen_t * h, - uint32_t max_long_cb); - -SRSLTE_API void srslte_tdec_gen_free(srslte_tdec_gen_t * h); - -SRSLTE_API int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb); - -SRSLTE_API void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h, - float * input, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_gen_decision(srslte_tdec_gen_t * h, - uint8_t *output, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h, - uint8_t *output, - uint32_t long_cb); - -SRSLTE_API int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h, - float * input, - uint8_t *output, - uint32_t nof_iterations, - uint32_t long_cb); - -#endif // SRSLTE_TURBODECODER_GEN_H +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +/********************************************************************************************** + * File: turbodecoder.h + * + * Description: Turbo Decoder. + * Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent + * encoders and one turbo code internal interleaver. The coding rate of turbo + * encoder is 1/3. + * MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder. + * + * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2 + *********************************************************************************************/ + +#ifndef SRSLTE_TURBODECODER_GEN_H +#define SRSLTE_TURBODECODER_GEN_H + +#include "srslte/config.h" +#include "srslte/phy/fec/tc_interl.h" +#include "srslte/phy/fec/cbsegm.h" + +#define SRSLTE_TCOD_RATE 3 +#define SRSLTE_TCOD_TOTALTAIL 12 + +#define SRSLTE_TCOD_MAX_LEN_CB 6144 + +typedef struct SRSLTE_API { + uint32_t max_long_cb; + int16_t *beta; +} tdec_gen_t; + +int tdec_gen_init(void **h, uint32_t max_long_cb); +void tdec_gen_free(void *h); +void tdec_gen_dec(void *h, int16_t * input, int16_t *app, int16_t * parity, int16_t *output, uint32_t long_cb); +void tdec_gen_extract_input(int16_t *input, int16_t *syst, int16_t *parity0, int16_t *parity1, int16_t *app2, uint32_t long_cb); +void tdec_gen_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb); + +#endif // SRSLTE_TURBODECODER_GEN_H diff --git a/lib/include/srslte/phy/fec/turbodecoder_impl.h b/lib/include/srslte/phy/fec/turbodecoder_impl.h new file mode 100644 index 000000000..1fe0a5321 --- /dev/null +++ b/lib/include/srslte/phy/fec/turbodecoder_impl.h @@ -0,0 +1,68 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#ifndef SRSLTE_TURBODECODER_IMPL_H +#define SRSLTE_TURBODECODER_IMPL_H + +#include "srslte/config.h" + +/* Interface for internal decoder implementation */ +typedef enum SRSLTE_API { + SRSLTE_TDEC_AUTO = 0, + SRSLTE_TDEC_GENERIC, + SRSLTE_TDEC_SSE, + SRSLTE_TDEC_SSE_WINDOW, + SRSLTE_TDEC_AVX_WINDOW, + SRSLTE_TDEC_SSE8_WINDOW, + SRSLTE_TDEC_AVX8_WINDOW, + SRSLTE_TDEC_NOF_IMP +} srslte_tdec_impl_type_t; + +#endif + +#ifdef LLR_IS_8BIT +#define llr_t int8_t +#define type_name srslte_tdec_8bit_impl_t +#else + #ifdef LLR_IS_16BIT + #define llr_t int16_t +#define type_name srslte_tdec_16bit_impl_t + #else + #error "Unsupported LLR mode" + #endif +#endif + + +typedef struct SRSLTE_API { + int (*tdec_init)(void **h, uint32_t max_long_cb); + void (*tdec_free)(void *h); + void (*tdec_dec)(void *h, llr_t * input, llr_t *app, llr_t * parity, llr_t *output, uint32_t long_cb); + void (*tdec_extract_input)(llr_t *input, llr_t *syst, llr_t *parity0, llr_t *parity1, llr_t *app2, uint32_t long_cb); + void (*tdec_decision_byte)(llr_t *app1, uint8_t *output, uint32_t long_cb); +} type_name; + +#undef llr_t +#undef type_name \ No newline at end of file diff --git a/lib/include/srslte/phy/fec/turbodecoder_iter.h b/lib/include/srslte/phy/fec/turbodecoder_iter.h new file mode 100644 index 000000000..7864829b5 --- /dev/null +++ b/lib/include/srslte/phy/fec/turbodecoder_iter.h @@ -0,0 +1,158 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#include "srslte/config.h" + +#define MAKE_CALL(a) CONCAT2(a,type_name) +#define MAKE_VEC(a) CONCAT2(a,vec_suffix) +#define PRINT CONCAT2(srslte_vec_fprint,print_suffix) + +#ifdef LLR_IS_8BIT +#define llr_t int8_t +#define type_name _8bit +#define vec_suffix _bbb +#define print_suffix _bs +#define decptr h->dec8[h->current_dec] +#define dechdlr h->dec8_hdlr[h->current_dec] +#define input_is_interleaved 1 +#else +#ifdef LLR_IS_16BIT + #define llr_t int16_t + #define vec_suffix _sss + #define print_suffix _s + #define decptr h->dec16[h->current_dec] + #define dechdlr h->dec16_hdlr[h->current_dec] + #define input_is_interleaved (h->current_dec > 0) +#define type_name _16bit + #else + #warning "Unsupported LLR mode" + #endif +#endif + +#define debug_enabled_iter 0 +#define debug_len 20 + +#define debug_vec(a) if (debug_enabled_iter) {printf("%s it=%d: ", STRING(a), n_iter);PRINT(stdout, a, debug_len);} + + +static void MAKE_CALL(extract_input_tail_sb)(llr_t *input, llr_t *syst, llr_t *app2, llr_t *parity0, llr_t *parity1, uint32_t long_cb) +{ + for (int i = long_cb; i < long_cb + 3; i++) { + syst[i] = input[3*(long_cb+32) + 2*(i - long_cb)]; + parity0[i] = input[3*(long_cb+32)+ 2*(i - long_cb) + 1]; + + app2[i] = input[3*(long_cb+32) + 6 + 2*(i - long_cb)]; + parity1[i] = input[3*(long_cb+32) + 6 + 2*(i - long_cb) + 1]; + } +} + +/* Runs 1 turbo decoder iteration */ +void MAKE_CALL(run_tdec_iteration)(srslte_tdec_t * h, llr_t * input) +{ + + if (h->current_cbidx >= 0) { + uint16_t *inter = h->interleaver[h->current_inter_idx][h->current_cbidx].forward; + uint16_t *deinter = h->interleaver[h->current_inter_idx][h->current_cbidx].reverse; + llr_t *syst = (llr_t*) h->syst0; + llr_t *parity0 = (llr_t*) h->parity0; + llr_t *parity1 = (llr_t*) h->parity1; + + llr_t *app1 = (llr_t*) h->app1; + llr_t *app2 = (llr_t*) h->app2; + llr_t *ext1 = (llr_t*) h->ext1; + llr_t *ext2 = (llr_t*) h->ext2; + + uint32_t long_cb = h->current_long_cb; + uint32_t n_iter = h->n_iter; + + if (SRSLTE_TDEC_EXPECT_INPUT_SB && !h->force_not_sb && input_is_interleaved) { + syst = input; + // align to 32 bytes (warning: must be same alignment as in rm_turbo.c) + parity0 = &input[long_cb+32]; + parity1 = &input[2*(long_cb+32)]; + if (n_iter == 0) { + MAKE_CALL(extract_input_tail_sb)(input, syst, app2, parity0, parity1, long_cb); + } + } else { + if (n_iter == 0) { + decptr->tdec_extract_input(input, syst, app2, parity0, parity1, long_cb); + } + } + + if ((n_iter%2) == 0) { + + // Add apriori information to decoder 1 + if (n_iter) { + MAKE_VEC(srslte_vec_sub)(app1, ext1, app1, long_cb); + } + + // Run MAP DEC #1 + decptr->tdec_dec(dechdlr, syst, n_iter ? app1 : NULL, parity0, ext1, long_cb); + + } + // Interleave extrinsic output of DEC1 to form apriori info for decoder 2 + if (n_iter%2) { + // Convert aposteriori information into extrinsic information + if (n_iter > 1) { + MAKE_VEC(srslte_vec_sub)(ext1, app1, ext1, long_cb); + } + + MAKE_VEC(srslte_vec_lut)(ext1, deinter, app2, long_cb); + + // Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits + decptr->tdec_dec(dechdlr, app2, NULL, parity1, ext2, long_cb); + + // Deinterleaved extrinsic bits become apriori info for decoder 1 + MAKE_VEC(srslte_vec_lut)(ext2, inter, app1, long_cb); + + } + + if (h->n_iter == 0) { + debug_vec(syst); + debug_vec(parity0); + debug_vec(parity1); + } + debug_vec(ext1); + debug_vec(ext2); + debug_vec(app1); + debug_vec(app2); + + h->n_iter++; + } else { + fprintf(stderr, "Error CB index not set (call srslte_tdec_new_cb() first\n"); + } +} + +#undef debug_enabled +#undef debug_len +#undef debug_vec +#undef llr_t +#undef vec_suffix +#undef print_suffix +#undef decptr +#undef dechdlr +#undef type_name +#undef input_is_interleaved \ No newline at end of file diff --git a/lib/include/srslte/phy/fec/turbodecoder_simd.h b/lib/include/srslte/phy/fec/turbodecoder_simd.h deleted file mode 100644 index 35c8beef9..000000000 --- a/lib/include/srslte/phy/fec/turbodecoder_simd.h +++ /dev/null @@ -1,122 +0,0 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -/********************************************************************************************** - * File: turbodecoder.h - * - * Description: Turbo Decoder. - * Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent - * encoders and one turbo code internal interleaver. The coding rate of turbo - * encoder is 1/3. - * MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder. - * - * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2 - *********************************************************************************************/ - -#ifndef SRSLTE_TURBODECODER_SIMD_H -#define SRSLTE_TURBODECODER_SIMD_H - -#include "srslte/config.h" -#include "srslte/phy/fec/tc_interl.h" -#include "srslte/phy/fec/cbsegm.h" - -// Define maximum number of CB decoded in parallel (2 for AVX2) -#define SRSLTE_TDEC_MAX_NPAR 2 - -#define SRSLTE_TCOD_RATE 3 -#define SRSLTE_TCOD_TOTALTAIL 12 - -#define SRSLTE_TCOD_MAX_LEN_CB 6144 -#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL) - -typedef struct SRSLTE_API { - uint32_t max_long_cb; - uint32_t max_par_cb; - int16_t *alpha; - int16_t *branch; -} map_gen_t; - -typedef struct SRSLTE_API { - uint32_t max_long_cb; - uint32_t max_par_cb; - - map_gen_t dec; - - int16_t *app1[SRSLTE_TDEC_MAX_NPAR]; - int16_t *app2[SRSLTE_TDEC_MAX_NPAR]; - int16_t *ext1[SRSLTE_TDEC_MAX_NPAR]; - int16_t *ext2[SRSLTE_TDEC_MAX_NPAR]; - int16_t *syst[SRSLTE_TDEC_MAX_NPAR]; - int16_t *parity0[SRSLTE_TDEC_MAX_NPAR]; - int16_t *parity1[SRSLTE_TDEC_MAX_NPAR]; - - int cb_mask; - int current_cbidx; - srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES]; - int n_iter[SRSLTE_TDEC_MAX_NPAR]; -} srslte_tdec_simd_t; - -SRSLTE_API int srslte_tdec_simd_init(srslte_tdec_simd_t * h, - uint32_t max_par_cb, - uint32_t max_long_cb); - -SRSLTE_API void srslte_tdec_simd_free(srslte_tdec_simd_t * h); - -SRSLTE_API int srslte_tdec_simd_reset(srslte_tdec_simd_t * h, - uint32_t long_cb); - -SRSLTE_API - -SRSLTE_API int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h, - uint32_t cb_idx); - -SRSLTE_API int srslte_tdec_simd_reset_cb(srslte_tdec_simd_t * h, - uint32_t cb_idx); - -SRSLTE_API void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, - int16_t * input[SRSLTE_TDEC_MAX_NPAR], - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h, - uint8_t *output, - uint32_t cbidx, - uint32_t long_cb); - -SRSLTE_API int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h, - int16_t * input[SRSLTE_TDEC_MAX_NPAR], - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t nof_iterations, - uint32_t long_cb); - -#endif // SRSLTE_TURBODECODER_SIMD_H diff --git a/lib/include/srslte/phy/fec/turbodecoder_simd_inter.h b/lib/include/srslte/phy/fec/turbodecoder_simd_inter.h deleted file mode 100644 index 054fbc3cb..000000000 --- a/lib/include/srslte/phy/fec/turbodecoder_simd_inter.h +++ /dev/null @@ -1,119 +0,0 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -/********************************************************************************************** - * File: turbodecoder.h - * - * Description: Turbo Decoder. - * Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent - * encoders and one turbo code internal interleaver. The coding rate of turbo - * encoder is 1/3. - * MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder. - * - * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2 - *********************************************************************************************/ - -#ifndef SRSLTE_TURBODECODER_SIMD_INTER_H -#define SRSLTE_TURBODECODER_SIMD_INTER_H - - -/** This is an simd inter-frame parallel turbo decoder. Parallizes 8 code-blocks using SSE - * This implementation is currently not functional and not used by the rest of the code - */ - -#include "srslte/config.h" -#include "srslte/phy/fec/tc_interl.h" -#include "srslte/phy/fec/cbsegm.h" - -#if LV_HAVE_AVX2 - #define SRSLTE_TDEC_MAX_NPAR 16 -#else - #define SRSLTE_TDEC_MAX_NPAR 8 -#endif - - -typedef struct SRSLTE_API { - int max_long_cb; - - int16_t *syst0; - int16_t *parity0; - int16_t *syst1; - int16_t *parity1; - int16_t *llr1; - int16_t *llr2; - int16_t *w; - int16_t *alpha; - - uint32_t max_par_cb; - int current_cbidx; - uint32_t current_long_cb; - srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES]; - int n_iter[SRSLTE_TDEC_MAX_NPAR]; -} srslte_tdec_simd_inter_t; - -SRSLTE_API int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h, - uint32_t max_par_cb, - uint32_t max_long_cb); - -SRSLTE_API void srslte_tdec_simd_inter_free(srslte_tdec_simd_inter_t * h); - -SRSLTE_API int srslte_tdec_simd_inter_reset(srslte_tdec_simd_inter_t * h, - uint32_t long_cb); - -SRSLTE_API int srslte_tdec_simd_inter_get_nof_iterations_cb(srslte_tdec_simd_inter_t * h, - uint32_t cb_idx); - -SRSLTE_API int srslte_tdec_simd_inter_reset_cb(srslte_tdec_simd_inter_t * h, - uint32_t cb_idx); - -SRSLTE_API void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h, - int16_t * input[SRSLTE_TDEC_MAX_NPAR], - uint32_t nof_cb, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h, - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t nof_cb, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h, - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t nof_cb, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_simd_inter_decision_byte_cb(srslte_tdec_simd_inter_t * h, - uint8_t *output, - uint32_t cbidx, - uint32_t long_cb); - -SRSLTE_API int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h, - int16_t *input[SRSLTE_TDEC_MAX_NPAR], - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t nof_iterations, - uint32_t nof_cb, - uint32_t long_cb); - -#endif // SRSLTE_TURBODECODER_SIMD_INTER_H diff --git a/lib/include/srslte/phy/fec/turbodecoder_sse.h b/lib/include/srslte/phy/fec/turbodecoder_sse.h index 9678fba9a..a02654238 100644 --- a/lib/include/srslte/phy/fec/turbodecoder_sse.h +++ b/lib/include/srslte/phy/fec/turbodecoder_sse.h @@ -1,101 +1,45 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -/********************************************************************************************** - * File: turbodecoder.h - * - * Description: Turbo Decoder. - * Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent - * encoders and one turbo code internal interleaver. The coding rate of turbo - * encoder is 1/3. - * MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder. - * - * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2 - *********************************************************************************************/ - -#ifndef SRSLTE_TURBODECODER_SSE_ -#define SRSLTE_TURBODECODER_SSE_ - -#include "srslte/config.h" -#include "srslte/phy/fec/tc_interl.h" -#include "srslte/phy/fec/cbsegm.h" - -#define SRSLTE_TCOD_RATE 3 -#define SRSLTE_TCOD_TOTALTAIL 12 - -#define SRSLTE_TCOD_MAX_LEN_CB 6144 -#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL) - -typedef struct SRSLTE_API { - int max_long_cb; - int16_t *alpha; - int16_t *branch; -} map_gen_t; - -typedef struct SRSLTE_API { - int max_long_cb; - - map_gen_t dec; - - int16_t *app1; - int16_t *app2; - int16_t *ext1; - int16_t *ext2; - int16_t *syst; - int16_t *parity0; - int16_t *parity1; - - int current_cbidx; - srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES]; - int n_iter; -} srslte_tdec_sse_t; - -SRSLTE_API int srslte_tdec_sse_init(srslte_tdec_sse_t * h, - uint32_t max_long_cb); - -SRSLTE_API void srslte_tdec_sse_free(srslte_tdec_sse_t * h); - -SRSLTE_API int srslte_tdec_sse_reset(srslte_tdec_sse_t * h, uint32_t long_cb); - -SRSLTE_API void srslte_tdec_sse_iteration(srslte_tdec_sse_t * h, - int16_t * input, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_sse_decision(srslte_tdec_sse_t * h, - uint8_t *output, - uint32_t long_cb); - -SRSLTE_API void srslte_tdec_sse_decision_byte(srslte_tdec_sse_t * h, - uint8_t *output, - uint32_t long_cb); - -SRSLTE_API int srslte_tdec_sse_run_all(srslte_tdec_sse_t * h, - int16_t * input, - uint8_t *output, - uint32_t nof_iterations, - uint32_t long_cb); - -#endif // SRSLTE_TURBODECODER_SSE_ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#ifndef SRSLTE_TURBODECODER_SSE_H +#define SRSLTE_TURBODECODER_SSE_H + +#include "srslte/config.h" + +typedef struct SRSLTE_API { + uint32_t max_long_cb; + int16_t *alpha; + int16_t *branch; +} tdec_sse_t; + +int tdec_sse_init(void **h, uint32_t max_long_cb); +void tdec_sse_free(void *h); +void tdec_sse_dec(void *h, int16_t * input, int16_t *app, int16_t * parity, + int16_t *output, uint32_t long_cb); +void tdec_sse_extract_input(int16_t *input, int16_t *syst, int16_t *parity0, int16_t *parity1, int16_t *app2, uint32_t long_cb); +void tdec_sse_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb); + +#endif // SRSLTE_TURBODECODER_SSE_H diff --git a/lib/include/srslte/phy/fec/turbodecoder_win.h b/lib/include/srslte/phy/fec/turbodecoder_win.h new file mode 100644 index 000000000..bd52e6284 --- /dev/null +++ b/lib/include/srslte/phy/fec/turbodecoder_win.h @@ -0,0 +1,752 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#include "srslte/config.h" + +#define MAKE_FUNC(a) CONCAT2(CONCAT2(tdec_win,WINIMP),CONCAT2(_,a)) +#define MAKE_TYPE CONCAT2(CONCAT2(tdec_win_,WINIMP),_t) + +#ifdef WINIMP_IS_SSE16 + + #ifndef LV_HAVE_SSE + #error "Selected SSE window decoder but instruction set not supported" + #endif + + #include + + #define WINIMP sse16 + #define nof_blocks 8 + + #define llr_t int16_t + + #define simd_type_t __m128i + #define simd_load _mm_load_si128 + #define simd_store _mm_store_si128 + #define simd_add _mm_adds_epi16 + #define simd_sub _mm_subs_epi16 + #define simd_max _mm_max_epi16 + #define simd_set1 _mm_set1_epi16 + #define simd_insert _mm_insert_epi16 + #define simd_shuffle _mm_shuffle_epi8 + #define move_right _mm_set_epi8(15,14,15,14,13,12,11,10,9,8,7,6,5,4,3,2) + #define move_left _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,1,0) + #define simd_rb_shift _mm_srai_epi16 + + #define normalize_period 2 + #define win_overlap_len 40 + +#define divide_output 1 + +#define INF 10000 + +#else +#ifdef WINIMP_IS_AVX16 + + #ifndef LV_HAVE_AVX2 + #error "Selected AVX2 window decoder but instruction set not supported" + #endif + + #include + + #define WINIMP avx16 + #define nof_blocks 16 + + #define llr_t int16_t + + #define simd_type_t __m256i + #define simd_load _mm256_load_si256 + #define simd_store _mm256_store_si256 + #define simd_add _mm256_adds_epi16 + #define simd_sub _mm256_subs_epi16 + #define simd_max _mm256_max_epi16 + #define simd_set1 _mm256_set1_epi16 + #define simd_insert _mm256_insert_epi16 + #define simd_shuffle _mm256_shuffle_epi8 + #define move_right _mm256_set_epi8(31,30,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2) + #define move_left _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,1,0) + + #define normalize_period 2 + #define win_overlap_len 40 + + #define INF 10000 +#else + +#ifdef WINIMP_IS_SSE8 + + #ifndef LV_HAVE_SSE + #error "Selected SSE window decoder but instruction set not supported" + #endif + + #include + + #define WINIMP sse8 + #define nof_blocks 16 + + #define llr_t int8_t + + #define simd_type_t __m128i + #define simd_load _mm_load_si128 + #define simd_store _mm_store_si128 + #define simd_add _mm_adds_epi8 + #define simd_sub _mm_subs_epi8 + #define simd_max _mm_max_epi8 + #define simd_set1 _mm_set1_epi8 + #define simd_insert _mm_insert_epi8 + #define simd_shuffle _mm_shuffle_epi8 + #define move_right _mm_set_epi8(15,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1) + #define move_left _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0) + #define simd_rb_shift simd_rb_shift_128 + + #define normalize_max + #define normalize_period 1 + #define win_overlap_len 40 + #define use_saturated_add + #define divide_output 1 + + #define INF 0 + + inline static simd_type_t simd_rb_shift_128(simd_type_t v, const int l) { + __m128i low = _mm_srai_epi16(_mm_slli_epi16(v,8), l+8); + __m128i hi = _mm_srai_epi16(v,l); + return _mm_blendv_epi8(hi, low, _mm_set1_epi32(0x00FF00FF)); + } + + +#else + +#ifdef WINIMP_IS_AVX8 + + #ifndef LV_HAVE_AVX2 + #error "Selected AVX2 window decoder but instruction set not supported" + #endif + + #include + + #define WINIMP avx8 + #define nof_blocks 32 + + #define llr_t int8_t + + #define simd_type_t __m256i + #define simd_load _mm256_load_si256 + #define simd_store _mm256_store_si256 + #define simd_add _mm256_adds_epi8 + #define simd_sub _mm256_subs_epi8 + #define simd_max _mm256_max_epi8 + #define simd_set1 _mm256_set1_epi8 + #define simd_insert _mm256_insert_epi8 + #define simd_shuffle _mm256_shuffle_epi8 + #define move_right _mm256_set_epi8(31,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1) + #define move_left _mm256_set_epi8(30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0) + #define simd_rb_shift simd_rb_shift_256 + + #define INF 0 + + #define normalize_max + #define normalize_period 1 + #define win_overlap_len 40 + #define use_saturated_add + #define divide_output 1 + + inline static simd_type_t simd_rb_shift_256(simd_type_t v, const int l) { + __m256i low = _mm256_srai_epi16(_mm256_slli_epi16(v,8), l+8); + __m256i hi = _mm256_srai_epi16(v,l); + return _mm256_blendv_epi8(hi, low, _mm256_set1_epi32(0x00FF00FF)); + } + + +#else + #error "Unknown WINIMP value" +#endif +#endif +#endif +#endif + +typedef struct SRSLTE_API { + uint32_t max_long_cb; + llr_t *beta; +} MAKE_TYPE; + + +#define long_sb (long_cb/nof_blocks) + + + +#define debug_enabled_win 0 + +#if debug_enabled_win +#define debug_state(d) printf("k=%5d, in=%5d, pa=%3d, out=%5d, alpha=[", d*long_sb+k+1, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d), MAKE_FUNC(get_simd)(out,d)); \ + for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d)); \ + printf("], beta=["); \ + for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(beta_save[j], d));printf("\n"); + +#define debug_state_pre(d) printf("pre-window k=%5d, in=%5d, pa=%3d, alpha=[", (d+1)*long_sb-loop_len+k+1, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d)); \ + for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d)); \ + printf("]\n"); + +#define debug_state_beta(d) printf("k=%5d, in=%5d, pa=%3d, beta=[", d*long_sb+k, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d)); \ + for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d));\ + printf("\n"); + +static llr_t MAKE_FUNC(get_simd)(simd_type_t x, uint32_t pos) { + llr_t *s = (llr_t*) &x; + return s[pos]; +} + + +#else +#define debug_state(a) +#define debug_state_pre(a) +#define debug_state_beta(a) +#endif +/* +static void MAKE_FUNC(print_simd)(simd_type_t x) { + llr_t *s = (llr_t*) &x; + printf("["); + for (int i=0;i127?127:(int8_t) z; +#endif +} + +inline static void MAKE_FUNC(normalize)(uint32_t k, simd_type_t old[8]) { + if ((k % normalize_period) == 0 && k != 0) { +#ifdef normalize_max + simd_type_t m = simd_max(old[0],old[1]); + for (int i=2;i<8;i++) { + m = simd_max(m,old[i]); + } + for (int i=0;i<8;i++) { + old[i] = simd_sub(old[i], m); + } +#else + for (int i = 1; i < 8; i++) { + old[i] = simd_sub(old[i], old[0]); + } + old[0] = simd_set1(0); +#endif + } +} + +static void MAKE_FUNC(beta_trellis)(llr_t *input, llr_t *parity, uint32_t long_cb, llr_t old[8]) +{ + llr_t m_b[8], new[8]; + llr_t x, y, xy; + + /* Calculate last state using Tail. No need to use SIMD here */ + old[0] = 0; + for (int i = 1; i < 8; i++) { + old[i] = -INF; + } + for (int k=long_cb+2;k >= long_cb; k--) { + x = input[k]; + y = parity[k]; + + xy = MAKE_FUNC(sadd)(x, y); + + m_b[0] = MAKE_FUNC(sadd)(old[4],xy); + m_b[1] = old[4]; + m_b[2] = MAKE_FUNC(sadd)(old[5], y); + m_b[3] = MAKE_FUNC(sadd)(old[5], x); + m_b[4] = MAKE_FUNC(sadd)(old[6], x); + m_b[5] = MAKE_FUNC(sadd)(old[6], y); + m_b[6] = old[7]; + m_b[7] = MAKE_FUNC(sadd)(old[7], xy); + + new[0] = old[0]; + new[1] = MAKE_FUNC(sadd)(old[0], xy); + new[2] = MAKE_FUNC(sadd)(old[1], x); + new[3] = MAKE_FUNC(sadd)(old[1], y); + new[4] = MAKE_FUNC(sadd)(old[2], y); + new[5] = MAKE_FUNC(sadd)(old[2], x); + new[6] = MAKE_FUNC(sadd)(old[3], xy); + new[7] = old[3]; + +#if debug_enabled_win + printf("trellis: k=%d, in=%d, pa=%d, beta: ", k, x, y); for (int i=0;i<8;i++) {printf("%d,", old[i]);} printf("\n"); +#endif + + for (int i = 0; i < 8; i++) { + if (m_b[i] > new[i]) + new[i] = m_b[i]; + old[i] = new[i]; + } + } +} + +/* Computes beta values */ +static void MAKE_FUNC(beta)(MAKE_TYPE * s, llr_t *input, llr_t *app, llr_t *parity, uint32_t long_cb) +{ + simd_type_t m_b[8], new[8], old[8]; + simd_type_t x, y, xy, ap; + + simd_type_t *inputPtr; + simd_type_t *appPtr; + simd_type_t *parityPtr; + simd_type_t *betaPtr = (simd_type_t*) s->beta; + + uint32_t loop_len; + for (int j=0;j<2;j++) { + + // First run L states to find initial state for all sub-blocks after first + if (j==0) { + loop_len = win_overlap_len; + } else { + loop_len = long_sb; + } + + // When passing through all window pick estimated initial states (known state for sb=0) + if (loop_len == long_sb) { + + // shuffle across 128-bit boundary manually +#ifdef WINIMP_IS_AVX16 + llr_t tmp[8]; + for (int i = 0; i < 8; i++) { + tmp[i] = _mm256_extract_epi16(old[i], 8); + } +#endif +#ifdef WINIMP_IS_AVX8 + llr_t tmp[8]; + for (int i = 0; i < 8; i++) { + tmp[i] = _mm256_extract_epi8(old[i], 16); + } +#endif + + for (int i = 0; i < 8; i++) { + old[i] = simd_shuffle(old[i], move_right); + } + // last sub-block state is calculated from the trellis + llr_t trellis_old[8]; + MAKE_FUNC(beta_trellis)(input, parity, long_cb, trellis_old); + for (int i = 0; i < 8; i++) { + old[i] = simd_insert(old[i], trellis_old[i], nof_blocks-1); + } + +#ifdef WINIMP_IS_AVX16 + for (int i = 0; i < 8; i++) { + old[i] = _mm256_insert_epi16(old[i], tmp[i], 7); + } +#endif +#ifdef WINIMP_IS_AVX8 + for (int i = 0; i < 8; i++) { + old[i] = _mm256_insert_epi8(old[i], tmp[i], 15); + } +#endif + + inputPtr = (simd_type_t*) &input[long_cb-nof_blocks]; + appPtr = (simd_type_t*) &app[long_cb-nof_blocks]; + parityPtr = (simd_type_t*) &parity[long_cb-nof_blocks]; + + for (int i = 0; i < 8; i++) { + simd_store(&betaPtr[8*long_sb + i], old[i]); + } + + } else { + // when estimating states, just set all to unknown + for (int i = 0; i < 8; i++) { + old[i] = simd_set1(-INF); + } + inputPtr = (simd_type_t*) &input[nof_blocks*(loop_len-1)]; + appPtr = (simd_type_t*) &app[nof_blocks*(loop_len-1)]; + parityPtr = (simd_type_t*) &parity[nof_blocks*(loop_len-1)]; + } + + for (int k = loop_len - 1; k >= 0; k--) { + x = simd_load(inputPtr--); + y = simd_load(parityPtr--); + + if (app) { + ap = simd_load(appPtr--); + x = simd_add(ap, x); + } + + xy = simd_add(x, y); + + m_b[0] = simd_add(old[4], xy); + m_b[1] = old[4]; + m_b[2] = simd_add(old[5], y); + m_b[3] = simd_add(old[5], x); + m_b[4] = simd_add(old[6], x); + m_b[5] = simd_add(old[6], y); + m_b[6] = old[7]; + m_b[7] = simd_add(old[7], xy); + + new[0] = old[0]; + new[1] = simd_add(old[0], xy); + new[2] = simd_add(old[1], x); + new[3] = simd_add(old[1], y); + new[4] = simd_add(old[2], y); + new[5] = simd_add(old[2], x); + new[6] = simd_add(old[3], xy); + new[7] = old[3]; + + // Calculate maximum metric + for (int i = 0; i < 8; i++) { + old[i] = simd_max(m_b[i], new[i]); + } + // Store metric only when doing the final pass + if (loop_len == long_sb) { + for (int i = 0; i < 8; i++) { + simd_store(&betaPtr[8*k + i], old[i]); + } + } + if (loop_len!=long_sb) { + debug_state_beta(0); + } else { + debug_state_beta(0); + } + + // normalize + MAKE_FUNC(normalize)(k, old); + } + } +} + +/* Computes alpha metrics */ +static void MAKE_FUNC(alpha)(MAKE_TYPE * s, llr_t *input, llr_t *app, llr_t *parity, llr_t * output, uint32_t long_cb) +{ + simd_type_t m_b[8], new[8], old[8], max1[8], max0[8]; + simd_type_t x, y, xy, ap; + simd_type_t m1, m0; + + simd_type_t *inputPtr; + simd_type_t *appPtr; + simd_type_t *parityPtr; + simd_type_t *betaPtr = (simd_type_t*) s->beta; + simd_type_t *outputPtr = (simd_type_t*) output; + +#if debug_enabled_win + simd_type_t beta_save[8]; +#endif + + // Skip state 0 + betaPtr+=8; + + uint32_t loop_len; + + for (int j=0;j<2;j++) { + + // First run L states to find initial state for all sub-blocks after first + if (j==0) { + loop_len = win_overlap_len; + } else { + loop_len = long_sb; + } + + // When passing through all window pick estimated initial states (known state for sb=0) + if (loop_len == long_sb) { + +#ifdef WINIMP_IS_AVX16 + llr_t tmp[8]; + for (int i=0;i<8;i++) { + tmp[i] = _mm256_extract_epi16(old[i], 7); + } +#endif +#ifdef WINIMP_IS_AVX8 + llr_t tmp[8]; + for (int i=0;i<8;i++) { + tmp[i] = _mm256_extract_epi8(old[i], 15); + } +#endif + for (int i = 0; i < 8; i++) { + old[i] = simd_shuffle(old[i], move_left); + } +#ifdef WINIMP_IS_AVX16 + for (int i=0;i<8;i++) { + old[i] = _mm256_insert_epi16(old[i], tmp[i], 8); + } +#endif +#ifdef WINIMP_IS_AVX8 + for (int i=0;i<8;i++) { + old[i] = _mm256_insert_epi8(old[i], tmp[i], 16); + } +#endif + // 1st sub-block state is known + old[0] = simd_insert(old[0], 0, 0); + for (int i = 1; i < 8; i++) { + old[i] = simd_insert(old[i], -INF, 0); + } + } else { + // when estimating states, just set all to unknown + for (int i = 0; i < 8; i++) { + old[i] = simd_set1(-INF); + } + } + + inputPtr = (simd_type_t*) &input[nof_blocks*(long_sb-loop_len)]; + appPtr = (simd_type_t*) &app[nof_blocks*(long_sb-loop_len)]; + parityPtr = (simd_type_t*) &parity[nof_blocks*(long_sb-loop_len)]; + + for (int k = 0; k < loop_len; k++) { + x = simd_load(inputPtr++); + y = simd_load(parityPtr++); + + if (app) { + ap = simd_load(appPtr++); + x = simd_add(ap, x); + } + + xy = simd_add(x,y); + + m_b[0] = old[0]; + m_b[1] = simd_add(old[3], y); + m_b[2] = simd_add(old[4], y); + m_b[3] = old[7]; + m_b[4] = old[1]; + m_b[5] = simd_add(old[2], y); + m_b[6] = simd_add(old[5], y); + m_b[7] = old[6]; + + new[0] = simd_add(old[1], xy); + new[1] = simd_add(old[2], x); + new[2] = simd_add(old[5], x); + new[3] = simd_add(old[6], xy); + new[4] = simd_add(old[0], xy); + new[5] = simd_add(old[3], x); + new[6] = simd_add(old[4], x); + new[7] = simd_add(old[7], xy); + + // Load beta and compute output only when passing through all window + if (loop_len == long_sb) { + simd_type_t beta; + for (int i = 0; i < 8; i++) { + beta = simd_load(betaPtr++); + max0[i] = simd_add(beta, m_b[i]); + max1[i] = simd_add(beta, new[i]); + +#if debug_enabled_win + beta_save[i] = beta; +#endif + } + + m1 = simd_max(max1[0], max1[1]); + m0 = simd_max(max0[0], max0[1]); + + for (int i = 2; i < 8; i++) { + m1 = simd_max(m1, max1[i]); + m0 = simd_max(m0, max0[i]); + } + + simd_type_t out = simd_sub(m1, m0); + + // Divide output when using 8-bit arithmetic +#ifdef divide_output + out = simd_rb_shift(out, divide_output); +#endif + + simd_store(outputPtr++, out); + + debug_state(0); + } + + for (int i = 0; i < 8; i++) { + old[i] = simd_max(m_b[i], new[i]); + } + + // normalize + MAKE_FUNC(normalize)(k, old); + + if (loop_len != long_sb) { + debug_state_pre(0); + } + } + } +} + +int MAKE_FUNC(init)(void **hh, uint32_t max_long_cb) +{ + *hh = calloc(1, sizeof(MAKE_TYPE)); + + MAKE_TYPE *h = (MAKE_TYPE*) *hh; + + h->beta = srslte_vec_malloc(sizeof(llr_t) * 8 * max_long_cb * nof_blocks); + if (!h->beta) { + perror("srslte_vec_malloc"); + return -1; + } + h->max_long_cb = max_long_cb; + return nof_blocks; +} + +void MAKE_FUNC(free)(void *hh) +{ + MAKE_TYPE *h = (MAKE_TYPE*) hh; + if (h) { + if (h->beta) { + free(h->beta); + } + free(h); + } +} + +void MAKE_FUNC(dec)(void *hh, llr_t *input, llr_t *app, llr_t *parity, llr_t *output, uint32_t long_cb) +{ + MAKE_TYPE *h = (MAKE_TYPE*) hh; + MAKE_FUNC(beta)(h, input, app, parity, long_cb); + MAKE_FUNC(alpha)(h, input, app, parity, output, long_cb); +#if debug_enabled_win + printf("running win decoder: %s\n", STRING(WINIMP)); +#endif +} + +#define INSERT8_INPUT(reg, st, off) reg = simd_insert(reg, input[3*(i+(st+0)*long_sb)+off], st+0);\ + reg = simd_insert(reg, input[3*(i+(st+1)*long_sb)+off], st+1);\ + reg = simd_insert(reg, input[3*(i+(st+2)*long_sb)+off], st+2);\ + reg = simd_insert(reg, input[3*(i+(st+3)*long_sb)+off], st+3);\ + reg = simd_insert(reg, input[3*(i+(st+4)*long_sb)+off], st+4);\ + reg = simd_insert(reg, input[3*(i+(st+5)*long_sb)+off], st+5);\ + reg = simd_insert(reg, input[3*(i+(st+6)*long_sb)+off], st+6);\ + reg = simd_insert(reg, input[3*(i+(st+7)*long_sb)+off], st+7); + + +void MAKE_FUNC(extract_input)(llr_t *input, llr_t *systematic, llr_t *app2, llr_t *parity_0, llr_t *parity_1, uint32_t long_cb) +{ + simd_type_t *systPtr = (simd_type_t*) systematic; + simd_type_t *parity0Ptr = (simd_type_t*) parity_0; + simd_type_t *parity1Ptr = (simd_type_t*) parity_1; + + simd_type_t syst, parity0, parity1; + + for (int i=0;i= 16 + INSERT8_INPUT(syst, 8, 0); + INSERT8_INPUT(parity0, 8, 1); + INSERT8_INPUT(parity1, 8, 2); +#endif + +#if nof_blocks >= 32 + INSERT8_INPUT(syst, 16, 0); + INSERT8_INPUT(parity0, 16, 1); + INSERT8_INPUT(parity1, 16, 2); + INSERT8_INPUT(syst, 24, 0); + INSERT8_INPUT(parity0, 24, 1); + INSERT8_INPUT(parity1, 24, 2); +#endif + + simd_store(systPtr++, syst); + simd_store(parity0Ptr++, parity0); + simd_store(parity1Ptr++, parity1); + } + + for (int i = long_cb; i < long_cb + 3; i++) { + systematic[i] = input[3*long_cb + 2*(i - long_cb)]; + parity_0[i] = input[3*long_cb + 2*(i - long_cb) + 1]; + + app2[i] = input[3*long_cb + 6 + 2*(i - long_cb)]; + parity_1[i] = input[3*long_cb + 6 + 2*(i - long_cb) + 1]; + } +} + +#define deinter(x,win) ((x%(long_cb/win))*(win)+x/(long_cb/win)) + +#define reset_cnt(a,b) if(!((a+1)%b)) { \ + k+=b*nof_blocks; \ + if (k >= long_cb) { \ + k -= (long_cb-1);\ + }\ + } +#define insert_bit(a,b) ap = _mm_insert_epi16(ap, app1[k+(a%b)*nof_blocks], 7-a); \ + reset_cnt(a,b); \ + + +#define decide_for(b) for (uint32_t i = 0; i < long_cb/8; i++) { \ + insert_bit(0,b);\ + insert_bit(1,b);\ + insert_bit(2,b);\ + insert_bit(3,b);\ + insert_bit(4,b);\ + insert_bit(5,b);\ + insert_bit(6,b);\ + insert_bit(7,b);\ + output[i] = (uint8_t) _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_packs_epi16(ap,zeros),zeros));\ + } + +/* No improvement to use AVX here */ +void MAKE_FUNC(decision_byte)(llr_t *app1, uint8_t *output, uint32_t long_cb) +{ + uint32_t k=0; + __m128i zeros = _mm_setzero_si128(); + __m128i ap; + + if ((long_cb%(nof_blocks*8)) == 0) { + decide_for(8); + } else if ((long_cb%(nof_blocks*4)) == 0) { + decide_for(4); + } else if ((long_cb%(nof_blocks*2)) == 0) { + decide_for(2); + } else { + decide_for(1); + } +} + + +#undef WINIMP +#undef nof_blocks +#undef llr_t +#undef normalize_period +#undef INF +#undef win_overlap_len +#undef simd_type_t +#undef simd_load +#undef simd_store +#undef simd_add +#undef simd_sub +#undef simd_max +#undef simd_set1 +#undef simd_insert +#undef simd_shuffle +#undef move_right +#undef move_left +#undef debug_enabled_win + +#ifdef normalize_max +#undef normalize_max +#endif + +#ifdef use_saturated_add +#undef use_saturated_add +#endif + +#ifdef simd_rb_shift +#undef simd_rb_shift +#endif + +#ifdef divide_output +#undef divide_output +#endif \ No newline at end of file diff --git a/lib/include/srslte/phy/modem/demod_soft.h b/lib/include/srslte/phy/modem/demod_soft.h index 9ad065edc..eb1924cae 100644 --- a/lib/include/srslte/phy/modem/demod_soft.h +++ b/lib/include/srslte/phy/modem/demod_soft.h @@ -53,4 +53,9 @@ SRSLTE_API int srslte_demod_soft_demodulate_s(srslte_mod_t modulation, short* llr, int nsymbols); +SRSLTE_API int srslte_demod_soft_demodulate_b(srslte_mod_t modulation, + const cf_t* symbols, + int8_t* llr, + int nsymbols); + #endif // SRSLTE_DEMOD_SOFT_H diff --git a/lib/include/srslte/phy/phch/pdsch.h b/lib/include/srslte/phy/phch/pdsch.h index f1ecbc348..787853d0a 100644 --- a/lib/include/srslte/phy/phch/pdsch.h +++ b/lib/include/srslte/phy/phch/pdsch.h @@ -65,6 +65,8 @@ typedef struct SRSLTE_API { uint16_t ue_rnti; bool is_ue; + bool llr_is_8bit; + /* Power allocation parameter 3GPP 36.213 Clause 5.2 Rho_b */ float rho_a; diff --git a/lib/include/srslte/phy/phch/pucch.h b/lib/include/srslte/phy/phch/pucch.h index fbb185c91..20b1b2ed4 100644 --- a/lib/include/srslte/phy/phch/pucch.h +++ b/lib/include/srslte/phy/phch/pucch.h @@ -81,6 +81,7 @@ typedef struct SRSLTE_API { typedef struct { srslte_sequence_t seq_f2[SRSLTE_NSUBFRAMES_X_FRAME]; + uint32_t cell_id; bool sequence_generated; } srslte_pucch_user_t; @@ -111,11 +112,16 @@ typedef struct SRSLTE_API { float last_corr; uint32_t last_n_prb; uint32_t last_n_pucch; - + + srslte_sequence_t tmp_seq; + uint16_t ue_rnti; + bool is_ue; }srslte_pucch_t; -SRSLTE_API int srslte_pucch_init(srslte_pucch_t *q); +SRSLTE_API int srslte_pucch_init_ue(srslte_pucch_t *q); + +SRSLTE_API int srslte_pucch_init_enb(srslte_pucch_t *q); SRSLTE_API void srslte_pucch_free(srslte_pucch_t *q); diff --git a/lib/include/srslte/phy/phch/pusch.h b/lib/include/srslte/phy/phch/pusch.h index a5b8f04ae..027fc02b2 100644 --- a/lib/include/srslte/phy/phch/pusch.h +++ b/lib/include/srslte/phy/phch/pusch.h @@ -74,6 +74,8 @@ typedef struct SRSLTE_API { uint16_t ue_rnti; uint32_t max_re; + bool llr_is_8bit; + srslte_dft_precoding_t dft_precoding; /* buffers */ diff --git a/lib/include/srslte/phy/phch/sch.h b/lib/include/srslte/phy/phch/sch.h index 003e9b7df..50077f417 100644 --- a/lib/include/srslte/phy/phch/sch.h +++ b/lib/include/srslte/phy/phch/sch.h @@ -59,6 +59,8 @@ typedef struct SRSLTE_API { uint32_t max_iterations; uint32_t nof_iterations; + bool llr_is_8bit; + /* buffers */ uint8_t *cb_in; uint8_t *parity_bits; diff --git a/lib/include/srslte/phy/scrambling/scrambling.h b/lib/include/srslte/phy/scrambling/scrambling.h index c38bbe8c9..66b4b1431 100644 --- a/lib/include/srslte/phy/scrambling/scrambling.h +++ b/lib/include/srslte/phy/scrambling/scrambling.h @@ -68,6 +68,11 @@ SRSLTE_API void srslte_scrambling_s_offset(srslte_sequence_t *s, int offset, int len); +SRSLTE_API void srslte_scrambling_sb_offset(srslte_sequence_t *s, + int8_t *data, + int offset, + int len); + SRSLTE_API void srslte_scrambling_c(srslte_sequence_t *s, cf_t *data); diff --git a/lib/include/srslte/phy/utils/mat.h b/lib/include/srslte/phy/utils/mat.h index 8db0205f9..c8465b34e 100644 --- a/lib/include/srslte/phy/utils/mat.h +++ b/lib/include/srslte/phy/utils/mat.h @@ -166,8 +166,13 @@ static inline void srslte_mat_2x2_mmse_csi_simd(simd_cf_t y0, simd_cf_t _noise_estimate; simd_f_t _norm = srslte_simd_f_set1(norm); +#if HAVE_NEON + _noise_estimate.val[0] = srslte_simd_f_set1(noise_estimate); + _noise_estimate.val[1] = srslte_simd_f_zero(); +#else /* HAVE_NEON */ _noise_estimate.re = srslte_simd_f_set1(noise_estimate); _noise_estimate.im = srslte_simd_f_zero(); +#endif /* HAVE_NEON */ /* 1. A = H' x H + No*/ simd_cf_t a00 = diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 2a7566e18..491c1f661 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -100,6 +100,7 @@ #define SRSLTE_SIMD_I_SIZE 16 +#define SRSLTE_SIMD_B_SIZE 64 #define SRSLTE_SIMD_S_SIZE 32 #define SRSLTE_SIMD_C16_SIZE 0 @@ -111,6 +112,7 @@ #define SRSLTE_SIMD_I_SIZE 8 +#define SRSLTE_SIMD_B_SIZE 32 #define SRSLTE_SIMD_S_SIZE 16 #define SRSLTE_SIMD_C16_SIZE 16 @@ -122,6 +124,7 @@ #define SRSLTE_SIMD_I_SIZE 4 +#define SRSLTE_SIMD_B_SIZE 16 #define SRSLTE_SIMD_S_SIZE 8 #define SRSLTE_SIMD_C16_SIZE 8 @@ -132,16 +135,16 @@ #define SRSLTE_SIMD_CF_SIZE 4 #define SRSLTE_SIMD_I_SIZE 4 - +#define SRSLTE_SIMD_B_SIZE 16 #define SRSLTE_SIMD_S_SIZE 8 #define SRSLTE_SIMD_C16_SIZE 8 -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ #define SRSLTE_SIMD_F_SIZE 0 #define SRSLTE_SIMD_CF_SIZE 0 #define SRSLTE_SIMD_I_SIZE 0 - +#define SRSLTE_SIMD_B_SIZE 0 #define SRSLTE_SIMD_S_SIZE 0 #define SRSLTE_SIMD_C16_SIZE 0 @@ -511,7 +514,7 @@ static inline simd_f_t srslte_simd_f_abs(simd_f_t a) { return _mm_andnot_ps(_mm_set1_ps(-0.0f), a); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - return vqabsq_s32(a); + return vabsq_f32(a); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -987,13 +990,13 @@ static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) { static inline simd_cf_t srslte_simd_cf_neg (simd_cf_t a) { simd_cf_t ret; -#if LV_HAVE_NEON +#if HAVE_NEON ret.val[0] = srslte_simd_f_neg(a.val[0]); ret.val[1] = srslte_simd_f_neg(a.val[1]); -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ ret.re = srslte_simd_f_neg(a.re); ret.im = srslte_simd_f_neg(a.im); -#endif /* LV_HAVE_NEON */ +#endif /* HAVE_NEON */ return ret; } @@ -1004,37 +1007,37 @@ static inline simd_cf_t srslte_simd_cf_neg_mask (simd_cf_t a, simd_f_t mask) { mask = _mm256_permutevar8x32_ps(mask, _mm256_setr_epi32(0,4,1,5,2,6,3,7)); #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ -#if LV_HAVE_NEON +#if HAVE_NEON ret.val[0] = srslte_simd_f_neg_mask(a.val[0], mask); ret.val[1] = srslte_simd_f_neg_mask(a.val[1], mask); -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ ret.re = srslte_simd_f_neg_mask(a.re, mask); ret.im = srslte_simd_f_neg_mask(a.im, mask); -#endif /* LV_HAVE_NEON */ +#endif /* HAVE_NEON */ return ret; } static inline simd_cf_t srslte_simd_cf_conj (simd_cf_t a) { simd_cf_t ret; -#if LV_HAVE_NEON +#if HAVE_NEON ret.val[0] = a.val[0]; ret.val[1] = srslte_simd_f_neg(a.val[1]); -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ ret.re = a.re; ret.im = srslte_simd_f_neg(a.im); -#endif /* LV_HAVE_NEON */ +#endif /* HAVE_NEON */ return ret; } static inline simd_cf_t srslte_simd_cf_mulj (simd_cf_t a) { simd_cf_t ret; -#if LV_HAVE_NEON +#if HAVE_NEON ret.val[0] = srslte_simd_f_neg(a.val[1]); ret.val[1] = a.val[0]; -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ ret.re = srslte_simd_f_neg(a.im); ret.im = a.re; -#endif /* LV_HAVE_NEON */ +#endif /* HAVE_NEON */ return ret; } @@ -1336,6 +1339,24 @@ static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) { #endif /* LV_HAVE_AVX512 */ } +static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 +#error sign instruction not available in avx512 +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_sign_epi16(a, b); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_sign_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + #error sign instruction not available in Neon +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + static inline simd_s_t srslte_simd_s_add(simd_s_t a, simd_s_t b) { #ifdef LV_HAVE_AVX512 return _mm512_add_epi16(a, b); @@ -1681,7 +1702,7 @@ typedef int8x16_t simd_b_t; -static inline simd_b_t srslte_simd_b_load(int8_t *ptr){ +static inline simd_b_t srslte_simd_b_load(const int8_t *ptr){ #ifdef LV_HAVE_AVX512 return _mm512_load_si512(ptr); #else /* LV_HAVE_AVX512 */ @@ -1699,7 +1720,7 @@ static inline simd_b_t srslte_simd_b_load(int8_t *ptr){ #endif /* LV_HAVE_AVX512 */ } -static inline simd_b_t srslte_simd_b_loadu(int8_t *ptr){ +static inline simd_b_t srslte_simd_b_loadu(const int8_t *ptr){ #ifdef LV_HAVE_AVX512 return _mm512_loadu_si512(ptr); #else /* LV_HAVE_AVX512 */ @@ -1773,6 +1794,44 @@ static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) { #endif /* LV_HAVE_AVX512 */ } +static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_subs_epi8(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_subs_epi8(a, b); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_subs_epi8(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vsubqs_s8(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) { +#ifdef LV_HAVE_AVX512 +#error sign instruction not available in avx512 +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_sign_epi8(a, b); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_sign_epi8(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + #error sign instruction not available in Neon +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + + + #endif /*SRSLTE_SIMD_B_SIZE */ diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index 32629da3e..c9e491027 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -69,6 +69,7 @@ SRSLTE_API void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_s SRSLTE_API void srslte_vec_fprint_c(FILE *stream, cf_t *x, const uint32_t len); SRSLTE_API void srslte_vec_fprint_f(FILE *stream, float *x, const uint32_t len); SRSLTE_API void srslte_vec_fprint_b(FILE *stream, uint8_t *x, const uint32_t len); +SRSLTE_API void srslte_vec_fprint_bs(FILE *stream, int8_t *x, const uint32_t len); SRSLTE_API void srslte_vec_fprint_byte(FILE *stream, uint8_t *x, const uint32_t len); SRSLTE_API void srslte_vec_fprint_i(FILE *stream, int *x, const uint32_t len); SRSLTE_API void srslte_vec_fprint_s(FILE *stream, short *x, const uint32_t len); @@ -82,12 +83,13 @@ SRSLTE_API void srslte_vec_load_file(char *filename, void *buffer, const uint32_ /* sum two vectors */ SRSLTE_API void srslte_vec_sum_fff(const float *x, const float *y, float *z, const uint32_t len); SRSLTE_API void srslte_vec_sum_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len); -SRSLTE_API void srslte_vec_sub_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len); SRSLTE_API void srslte_vec_sum_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len); /* substract two vectors z=x-y */ SRSLTE_API void srslte_vec_sub_fff(const float *x, const float *y, float *z, const uint32_t len); SRSLTE_API void srslte_vec_sub_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len); +SRSLTE_API void srslte_vec_sub_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len); +SRSLTE_API void srslte_vec_sub_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len); /* scalar product */ SRSLTE_API void srslte_vec_sc_prod_cfc(const cf_t *x, const float h, cf_t *z, const uint32_t len); @@ -97,8 +99,10 @@ SRSLTE_API void srslte_vec_sc_prod_fff(const float *x, const float h, float *z, SRSLTE_API void srslte_vec_convert_fi(const float *x, const float scale, int16_t *z, const uint32_t len); SRSLTE_API void srslte_vec_convert_if(const int16_t *x, const float scale, float *z, const uint32_t len); +SRSLTE_API void srslte_vec_convert_fb(const float *x, const float scale, int8_t *z, const uint32_t len); SRSLTE_API void srslte_vec_lut_sss(const short *x, const unsigned short *lut, short *y, const uint32_t len); +SRSLTE_API void srslte_vec_lut_bbb(const int8_t *x, const unsigned short *lut, int8_t *y, const uint32_t len); SRSLTE_API void srslte_vec_lut_sis(const short *x, const unsigned int *lut, short *y, const uint32_t len); /* vector product (element-wise) */ @@ -115,6 +119,10 @@ SRSLTE_API void srslte_vec_prod_conj_ccc(const cf_t *x, const cf_t *y, cf_t *z, SRSLTE_API void srslte_vec_prod_fff(const float *x, const float *y, float *z, const uint32_t len); SRSLTE_API void srslte_vec_prod_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len); +// Negate sign (scrambling) +SRSLTE_API void srslte_vec_neg_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len); +SRSLTE_API void srslte_vec_neg_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len); + /* Dot-product */ SRSLTE_API cf_t srslte_vec_dot_prod_cfc(const cf_t *x, const float *y, const uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_ccc(const cf_t *x, const cf_t *y, const uint32_t len); diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 68ddbdee0..cd198df8f 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -62,6 +62,8 @@ SRSLTE_API void srslte_vec_sum_sss_simd(const int16_t *x, const int16_t *y, int1 SRSLTE_API void srslte_vec_sub_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, int len); +SRSLTE_API void srslte_vec_sub_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, int len); + SRSLTE_API float srslte_vec_acc_ff_simd(const float *x, int len); SRSLTE_API cf_t srslte_vec_acc_cc_simd(const cf_t *x, int len); @@ -86,6 +88,10 @@ SRSLTE_API void srslte_vec_prod_ccc_c16_simd(const int16_t *a_re, const int16_t SRSLTE_API void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len); +SRSLTE_API void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len); + +SRSLTE_API void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len); + SRSLTE_API void srslte_vec_prod_cfc_simd(const cf_t *x, const float *y, cf_t *z, const int len); SRSLTE_API void srslte_vec_prod_fff_simd(const float *x, const float *y, float *z, const int len); @@ -120,10 +126,14 @@ SRSLTE_API void srslte_vec_abs_square_cf_simd(const cf_t *x, float *z, const int /* Other Functions */ SRSLTE_API void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y, const int len); +SRSLTE_API void srslte_vec_lut_bbb_simd(const int8_t *x, const unsigned short *lut, int8_t *y, const int len); + SRSLTE_API void srslte_vec_convert_if_simd(const int16_t *x, float *z, const float scale, const int len); SRSLTE_API void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const float scale, const int len); +SRSLTE_API void srslte_vec_convert_fb_simd(const float *x, int8_t *z, const float scale, const int len); + SRSLTE_API void srslte_vec_cp_simd(const cf_t *src, cf_t *dst, int len); SRSLTE_API void srslte_vec_interleave_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len); diff --git a/lib/include/srslte/radio/radio.h b/lib/include/srslte/radio/radio.h index 658bf7367..2b14b30cd 100644 --- a/lib/include/srslte/radio/radio.h +++ b/lib/include/srslte/radio/radio.h @@ -140,7 +140,7 @@ class radio { srslte_rf_t rf_device; - const static uint32_t burst_preamble_max_samples = 30720000; // 30.72 MHz is maximum frequency + const static uint32_t burst_preamble_max_samples = 13824; double burst_preamble_sec;// Start of burst preamble time (off->on RF transition time) srslte_timestamp_t end_of_burst_time; bool is_start_of_burst; diff --git a/lib/include/srslte/upper/pdcp.h b/lib/include/srslte/upper/pdcp.h index e4be9d548..7272f5079 100644 --- a/lib/include/srslte/upper/pdcp.h +++ b/lib/include/srslte/upper/pdcp.h @@ -61,6 +61,7 @@ public: void add_bearer(uint32_t lcid, srslte_pdcp_config_t cnfg = srslte_pdcp_config_t()); void add_bearer_mrb(uint32_t lcid, srslte_pdcp_config_t cnfg = srslte_pdcp_config_t()); void del_bearer(uint32_t lcid); + void change_lcid(uint32_t old_lcid, uint32_t new_lcid); void config_security(uint32_t lcid, uint8_t *k_enc, uint8_t *k_int, diff --git a/lib/include/srslte/upper/rlc.h b/lib/include/srslte/upper/rlc.h index a56f48eff..f211edf72 100644 --- a/lib/include/srslte/upper/rlc.h +++ b/lib/include/srslte/upper/rlc.h @@ -56,7 +56,7 @@ public: log *rlc_log_, mac_interface_timers *mac_timers_, uint32_t lcid_, - int buffer_size = -1); // -1 to use default buffer sizes + int buffer_size_ = -1); // -1 to use default buffer sizes void stop(); void get_metrics(rlc_metrics_t &m); @@ -81,12 +81,15 @@ public: // RRC interface void reestablish(); + void reestablish(uint32_t lcid); void reset(); void empty_queue(); void add_bearer(uint32_t lcid); void add_bearer(uint32_t lcid, srslte_rlc_config_t cnfg); void add_bearer_mrb(uint32_t lcid); void del_bearer(uint32_t lcid); + void del_bearer_mrb(uint32_t lcid); + void change_lcid(uint32_t old_lcid, uint32_t new_lcid); private: void reset_metrics(); diff --git a/lib/include/srslte/upper/rlc_am.h b/lib/include/srslte/upper/rlc_am.h index ab2c6cd63..ac04c991a 100644 --- a/lib/include/srslte/upper/rlc_am.h +++ b/lib/include/srslte/upper/rlc_am.h @@ -69,14 +69,14 @@ struct rlc_amd_retx_t{ class rlc_am : public rlc_common { public: - rlc_am(uint32_t queue_len = 16); + rlc_am(uint32_t queue_len = 128); ~rlc_am(); - void init(log *rlc_entity_log_, + void init(log *log_, uint32_t lcid_, srsue::pdcp_interface_rlc *pdcp_, srsue::rrc_interface_rlc *rrc_, - mac_interface_timers *mac_timers); - bool configure(srslte_rlc_config_t cnfg); + mac_interface_timers *mac_timers_); + bool configure(srslte_rlc_config_t cfg_); void reestablish(); void stop(); @@ -100,109 +100,205 @@ public: private: - byte_buffer_pool *pool; - srslte::log *log; - uint32_t lcid; - srsue::pdcp_interface_rlc *pdcp; - srsue::rrc_interface_rlc *rrc; + // Transmitter sub-class + class rlc_am_tx : public timer_callback + { + public: + rlc_am_tx(rlc_am *parent_, uint32_t queue_len_); + ~rlc_am_tx(); - // TX SDU buffers - rlc_tx_queue tx_sdu_queue; - byte_buffer_t *tx_sdu; - - // PDU being resegmented - rlc_amd_tx_pdu_t tx_pdu_segments; - - // Tx and Rx windows - std::map tx_window; - std::deque retx_queue; - std::map rx_window; - std::map rx_segments; - - // RX SDU buffers - byte_buffer_t *rx_sdu; - - // Mutexes - pthread_mutex_t mutex; - - bool tx_enabled; - bool poll_received; - bool do_status; - rlc_status_pdu_t status; - - // Metrics - uint32_t num_tx_bytes; - uint32_t num_rx_bytes; - - /**************************************************************************** - * Configurable parameters - * Ref: 3GPP TS 36.322 v10.0.0 Section 7 - ***************************************************************************/ - - srslte_rlc_am_config_t cfg; - - /**************************************************************************** - * State variables and counters - * Ref: 3GPP TS 36.322 v10.0.0 Section 7 - ***************************************************************************/ - - // Tx state variables - uint32_t vt_a; // ACK state. SN of next PDU in sequence to be ACKed. Low edge of tx window. - uint32_t vt_ms; // Max send state. High edge of tx window. vt_a + window_size. - uint32_t vt_s; // Send state. SN to be assigned for next PDU. - uint32_t poll_sn; // Poll send state. SN of most recent PDU txed with poll bit set. - - // Tx counters - uint32_t pdu_without_poll; - uint32_t byte_without_poll; - - // Rx state variables - uint32_t vr_r; // Receive state. SN following last in-sequence received PDU. Low edge of rx window - uint32_t vr_mr; // Max acceptable receive state. High edge of rx window. vr_r + window size. - uint32_t vr_x; // t_reordering state. SN following PDU which triggered t_reordering. - uint32_t vr_ms; // Max status tx state. Highest possible value of SN for ACK_SN in status PDU. - uint32_t vr_h; // Highest rx state. SN following PDU with highest SN among rxed PDUs. - - /**************************************************************************** - * Timers - * Ref: 3GPP TS 36.322 v10.0.0 Section 7 - ***************************************************************************/ - timeout poll_retx_timeout; - timeout reordering_timeout; - timeout status_prohibit_timeout; - - static const int reordering_timeout_id = 1; + void init(); + bool configure(srslte_rlc_am_config_t cfg_); - static const int poll_periodicity = 8; // After how many data PDUs a status PDU shall be requested + void empty_queue(); + void reestablish(); + void stop(); + + void write_sdu(byte_buffer_t *sdu, bool blocking); + int read_pdu(uint8_t *payload, uint32_t nof_bytes); + + uint32_t get_buffer_state(); + uint32_t get_total_buffer_state(); + uint32_t get_num_tx_bytes(); + void reset_metrics(); + + // Timeout callback interface + void timer_expired(uint32_t timeout_id); + + // Interface for Rx subclass + void handle_control_pdu(uint8_t *payload, uint32_t nof_bytes); + + private: + + int build_status_pdu(uint8_t *payload, uint32_t nof_bytes); + int build_retx_pdu(uint8_t *payload, uint32_t nof_bytes); + int build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t retx); + int build_data_pdu(uint8_t *payload, uint32_t nof_bytes); + + void debug_state(); + + bool retx_queue_has_sn(uint32_t sn); + int required_buffer_size(rlc_amd_retx_t retx); + void retransmit_random_pdu(); + + // Timer checks + bool status_prohibited; + + // Helpers + bool poll_required(); + bool do_status(); + + rlc_am *parent; + byte_buffer_pool *pool; + srslte::log *log; + + /**************************************************************************** + * Configurable parameters + * Ref: 3GPP TS 36.322 v10.0.0 Section 7 + ***************************************************************************/ + + srslte_rlc_am_config_t cfg; + + // TX SDU buffers + rlc_tx_queue tx_sdu_queue; + byte_buffer_t *tx_sdu;; + + bool tx_enabled; - // Timer checks - bool status_prohibited(); - bool poll_retx(); - void check_reordering_timeout(); + /**************************************************************************** + * State variables and counters + * Ref: 3GPP TS 36.322 v10.0.0 Section 7 + ***************************************************************************/ - // Helpers - bool poll_required(); + // Tx state variables + uint32_t vt_a; // ACK state. SN of next PDU in sequence to be ACKed. Low edge of tx window. + uint32_t vt_ms; // Max send state. High edge of tx window. vt_a + window_size. + uint32_t vt_s; // Send state. SN to be assigned for next PDU. + uint32_t poll_sn; // Poll send state. SN of most recent PDU txed with poll bit set. - int prepare_status(); - int build_status_pdu(uint8_t *payload, uint32_t nof_bytes); - int build_retx_pdu(uint8_t *payload, uint32_t nof_bytes); - int build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t retx); - int build_data_pdu(uint8_t *payload, uint32_t nof_bytes); + // Tx counters + uint32_t pdu_without_poll; + uint32_t byte_without_poll; - void handle_data_pdu(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header); - void handle_data_pdu_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header); - void handle_control_pdu(uint8_t *payload, uint32_t nof_bytes); + rlc_status_pdu_t tx_status; - void reassemble_rx_sdus(); + /**************************************************************************** + * Timers + * Ref: 3GPP TS 36.322 v10.0.0 Section 7 + ***************************************************************************/ - bool inside_tx_window(uint16_t sn); - bool inside_rx_window(uint16_t sn); - void debug_state(); - void print_rx_segments(); + srslte::timers::timer *poll_retx_timer; + uint32_t poll_retx_timer_id; - bool add_segment_and_check(rlc_amd_rx_pdu_segments_t *pdu, rlc_amd_rx_pdu_t *segment); - int required_buffer_size(rlc_amd_retx_t retx); - bool retx_queue_has_sn(uint32_t sn); + srslte::timers::timer *status_prohibit_timer; + uint32_t status_prohibit_timer_id; + + // Tx windows + std::map tx_window; + std::deque retx_queue; + + // Mutexes + pthread_mutex_t mutex; + + // Metrics + uint32_t num_tx_bytes; + }; + + // Receiver sub-class + class rlc_am_rx : public timer_callback + { + public: + rlc_am_rx(rlc_am* parent_); + ~rlc_am_rx(); + + void init(); + bool configure(srslte_rlc_am_config_t cfg_); + void reestablish(); + void stop(); + + void write_pdu(uint8_t *payload, uint32_t nof_bytes); + + uint32_t get_num_rx_bytes(); + void reset_metrics(); + + // Timeout callback interface + void timer_expired(uint32_t timeout_id); + + // Functions needed by Tx subclass to query rx state + int get_status(rlc_status_pdu_t* status); + bool get_do_status(); + void reset_status(); // called when status PDU has been sent + + private: + void handle_data_pdu(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header); + void handle_data_pdu_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header); + void reassemble_rx_sdus(); + bool inside_rx_window(uint16_t sn); + void debug_state(); + void print_rx_segments(); + bool add_segment_and_check(rlc_amd_rx_pdu_segments_t *pdu, rlc_amd_rx_pdu_t *segment); + + rlc_am *parent; + byte_buffer_pool *pool; + srslte::log *log; + + /**************************************************************************** + * Configurable parameters + * Ref: 3GPP TS 36.322 v10.0.0 Section 7 + ***************************************************************************/ + srslte_rlc_am_config_t cfg; + + // RX SDU buffers + byte_buffer_t *rx_sdu; + + /**************************************************************************** + * State variables and counters + * Ref: 3GPP TS 36.322 v10.0.0 Section 7 + ***************************************************************************/ + + // Rx state variables + uint32_t vr_r; // Receive state. SN following last in-sequence received PDU. Low edge of rx window + uint32_t vr_mr; // Max acceptable receive state. High edge of rx window. vr_r + window size. + uint32_t vr_x; // t_reordering state. SN following PDU which triggered t_reordering. + uint32_t vr_ms; // Max status tx state. Highest possible value of SN for ACK_SN in status PDU. + uint32_t vr_h; // Highest rx state. SN following PDU with highest SN among rxed PDUs. + + // Mutexes + pthread_mutex_t mutex; + + // Rx windows + std::map rx_window; + std::map rx_segments; + + // Metrics + uint32_t num_rx_bytes; + + bool poll_received; + bool do_status; + + /**************************************************************************** + * Timers + * Ref: 3GPP TS 36.322 v10.0.0 Section 7 + ***************************************************************************/ + + srslte::timers::timer *reordering_timer; + uint32_t reordering_timer_id; + }; + + // Rx and Tx objects + rlc_am_tx tx; + rlc_am_rx rx; + + // Common variables needed/provided by parent class + srsue::rrc_interface_rlc *rrc; + srslte::log *log; + srsue::pdcp_interface_rlc *pdcp; + mac_interface_timers *mac_timers; + uint32_t lcid; + srslte_rlc_am_config_t cfg; + std::string rb_name; + + static const int poll_periodicity = 8; // After how many data PDUs a status PDU shall be requested }; /**************************************************************************** diff --git a/lib/include/srslte/upper/rlc_metrics.h b/lib/include/srslte/upper/rlc_metrics.h index ab6e83c02..153fba57b 100644 --- a/lib/include/srslte/upper/rlc_metrics.h +++ b/lib/include/srslte/upper/rlc_metrics.h @@ -27,13 +27,15 @@ #ifndef SRSLTE_RLC_METRICS_H #define SRSLTE_RLC_METRICS_H +#include "srslte/common/common.h" namespace srslte { struct rlc_metrics_t { - float dl_tput_mbps; - float ul_tput_mbps; + float dl_tput_mbps[SRSLTE_N_RADIO_BEARERS]; + float ul_tput_mbps[SRSLTE_N_RADIO_BEARERS]; + float dl_tput_mrb_mbps[SRSLTE_N_MCH_LCIDS]; }; } // namespace srslte diff --git a/lib/include/srslte/upper/rlc_um.h b/lib/include/srslte/upper/rlc_um.h index f866417d2..e47df8e73 100644 --- a/lib/include/srslte/upper/rlc_um.h +++ b/lib/include/srslte/upper/rlc_um.h @@ -48,7 +48,7 @@ class rlc_um :public rlc_common { public: - rlc_um(uint32_t queue_len = 32); + rlc_um(uint32_t queue_len = 128); ~rlc_um(); void init(log *rlc_entity_log_, uint32_t lcid_, diff --git a/lib/src/common/log_filter.cc b/lib/src/common/log_filter.cc index 9602e202a..f496c1895 100644 --- a/lib/src/common/log_filter.cc +++ b/lib/src/common/log_filter.cc @@ -59,6 +59,10 @@ log_filter::log_filter(std::string layer, logger *logger_, bool tti) init(layer, logger_, tti); } +log_filter::~log_filter() +{ +} + void log_filter::init(std::string layer, logger *logger_, bool tti) { service_name = layer; diff --git a/lib/src/common/threads.c b/lib/src/common/threads.c index b6fa31778..024faa64a 100644 --- a/lib/src/common/threads.c +++ b/lib/src/common/threads.c @@ -53,8 +53,9 @@ bool threads_new_rt_cpu(pthread_t *thread, void *(*start_routine) (void*), void cpu_set_t cpuset; bool attr_enable = false; +#ifdef PER_THREAD_PRIO if (prio_offset >= 0) { - param.sched_priority = sched_get_priority_max(SCHED_FIFO) - prio_offset; + param.sched_priority = sched_get_priority_max(SCHED_FIFO) - prio_offset; pthread_attr_init(&attr); if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED)) { perror("pthread_attr_setinheritsched"); @@ -82,6 +83,25 @@ bool threads_new_rt_cpu(pthread_t *thread, void *(*start_routine) (void*), void } attr_enable = true; } else if (prio_offset == -2) { +#else + // All threads have normal priority except prio_offset=0,1,2,3,4 + if (prio_offset >= 0 && prio_offset < 5) { + param.sched_priority = 50; + pthread_attr_init(&attr); + if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED)) { + perror("pthread_attr_setinheritsched"); + } + if (pthread_attr_setschedpolicy(&attr, SCHED_FIFO)) { + perror("pthread_attr_setschedpolicy"); + } + if (pthread_attr_setschedparam(&attr, ¶m)) { + perror("pthread_attr_setschedparam"); + fprintf(stderr, "Error not enough privileges to set Scheduling priority\n"); + } + attr_enable = true; + + } else { +#endif param.sched_priority = 0; pthread_attr_init(&attr); if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED)) { diff --git a/lib/src/phy/common/sequence.c b/lib/src/phy/common/sequence.c index 0ecf9f159..52497f5de 100644 --- a/lib/src/phy/common/sequence.c +++ b/lib/src/phy/common/sequence.c @@ -139,6 +139,7 @@ int srslte_sequence_LTE_pr(srslte_sequence_t *q, uint32_t len, uint32_t seed) { for (int i=0;ic_float[i] = (1-2*q->c[i]); q->c_short[i] = (int16_t) q->c_float[i]; + q->c_char[i] = (int8_t) q->c_float[i];; } return SRSLTE_SUCCESS; } @@ -164,6 +165,10 @@ int srslte_sequence_init(srslte_sequence_t *q, uint32_t len) { if (!q->c_short) { return SRSLTE_ERROR; } + q->c_char = srslte_vec_malloc(len * sizeof(int8_t)); + if (!q->c_char) { + return SRSLTE_ERROR; + } q->max_len = len; } return SRSLTE_SUCCESS; @@ -182,6 +187,9 @@ void srslte_sequence_free(srslte_sequence_t *q) { if (q->c_short) { free(q->c_short); } + if (q->c_char) { + free(q->c_char); + } bzero(q, sizeof(srslte_sequence_t)); } diff --git a/lib/src/phy/enb/enb_dl.c b/lib/src/phy/enb/enb_dl.c index 0040b65a9..b5e2cd283 100644 --- a/lib/src/phy/enb/enb_dl.c +++ b/lib/src/phy/enb/enb_dl.c @@ -417,7 +417,8 @@ int srslte_enb_dl_put_pdcch_dl(srslte_enb_dl_t *q, srslte_ra_dl_dci_t *grant, srslte_dci_format_t format, srslte_dci_location_t location, uint16_t rnti, uint32_t sf_idx) { - srslte_dci_msg_t dci_msg = {}; + srslte_dci_msg_t dci_msg; + bzero(&dci_msg, sizeof(dci_msg)); bool rnti_is_user = true; if (rnti == SRSLTE_SIRNTI || rnti == SRSLTE_PRNTI || (rnti >= SRSLTE_RARNTI_START && rnti <= SRSLTE_RARNTI_END)) { @@ -439,7 +440,8 @@ int srslte_enb_dl_put_pdcch_ul(srslte_enb_dl_t *q, srslte_ra_ul_dci_t *grant, srslte_dci_location_t location, uint16_t rnti, uint32_t sf_idx) { - srslte_dci_msg_t dci_msg = {}; + srslte_dci_msg_t dci_msg; + bzero(&dci_msg, sizeof(dci_msg)); srslte_dci_msg_pack_pusch(grant, &dci_msg, q->cell.nof_prb); if (srslte_pdcch_encode(&q->pdcch, &dci_msg, location, rnti, q->sf_symbols, sf_idx, q->cfi)) { diff --git a/lib/src/phy/enb/enb_ul.c b/lib/src/phy/enb/enb_ul.c index 0c561ef5a..155b67598 100644 --- a/lib/src/phy/enb/enb_ul.c +++ b/lib/src/phy/enb/enb_ul.c @@ -76,7 +76,7 @@ int srslte_enb_ul_init(srslte_enb_ul_t *q, srslte_ofdm_set_normalize(&q->fft, false); srslte_ofdm_set_freq_shift(&q->fft, -0.5); - if (srslte_pucch_init(&q->pucch)) { + if (srslte_pucch_init_enb(&q->pucch)) { fprintf(stderr, "Error creating PUCCH object\n"); goto clean_exit; } diff --git a/lib/src/phy/fec/rm_turbo.c b/lib/src/phy/fec/rm_turbo.c index 3e06a9de2..0e83cda5b 100644 --- a/lib/src/phy/fec/rm_turbo.c +++ b/lib/src/phy/fec/rm_turbo.c @@ -43,13 +43,15 @@ #endif #ifdef LV_HAVE_SSE -#include -int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx); +#include +int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx); +int srslte_rm_turbo_rx_lut_sse_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx); #endif #ifdef LV_HAVE_AVX -#include -int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx); +#include +int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx); +int srslte_rm_turbo_rx_lut_avx_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx); #endif #define NCOLS 32 @@ -66,12 +68,31 @@ static uint16_t interleaver_parity_bits[192][2*6160]; static srslte_bit_interleaver_t bit_interleavers_parity_bits[192]; static uint16_t deinterleaver[192][4][18448]; static int k0_vec[SRSLTE_NOF_TC_CB_SIZES][4][2]; -static bool rm_turbo_tables_generated = false; - +static bool rm_turbo_tables_generated = false; + + +// Store deinterleaver version for sub-block turbo decoder +#if SRSLTE_TDEC_EXPECT_INPUT_SB == 1 +// Prepare bit for sub-block decoder processing. These are the nof subblock sizes +#define NOF_DEINTER_TABLE_SB_IDX 3 +const static int deinter_table_sb_idx[NOF_DEINTER_TABLE_SB_IDX] = {8, 16, 32}; +int deinter_table_idx_from_sb_len(uint32_t nof_subblocks) { + for (int i=0;i= out_len && inputCnt < in_len - 16) { + /* Copy last elements */ + if ((out_len%16) == 12) { + for (int j=(nwrapps+1)*out_len-12;j<(nwrapps+1)*out_len;j++) { + output[deinter[j%out_len]] += input[j]; + inputCnt++; + } + } else { + for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) { + output[deinter[j%out_len]] += input[j]; + inputCnt++; + } + } + /* And wrap pointers */ + nwrapps++; + intCnt = 16; + xPtr = (const __m128i*) &input[nwrapps*out_len]; + lutPtr = (const __m128i*) deinter; + } + } + for (int i=inputCnt;i= out_len && inputCnt < in_len - 16) { /* Copy last elements */ if ((out_len%16) == 12) { - for (int j=(nwrapps+1)*out_len-12;j<(nwrapps+1)*out_len;j++) { + for (int j=(nwrapps+1)*out_len-12;j<(nwrapps+1)*out_len;j++) { output[deinter[j%out_len]] += input[j]; inputCnt++; } } else { - for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) { + for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) { output[deinter[j%out_len]] += input[j]; inputCnt++; } } /* And wrap pointers */ nwrapps++; - intCnt = 16; + intCnt = 16; xPtr = (const __m256i*) &input[nwrapps*out_len]; lutPtr = (const __m256i*) deinter; } - } - for (int i=inputCnt;i= out_len && inputCnt < in_len - 32) { + printf("warning rate matching wrapping remainder %d\n", out_len%32); + /* Copy last elements */ + for (int j=(nwrapps+1)*out_len-(out_len%32) ;j<(nwrapps+1)*out_len;j++) { + output[deinter[j%out_len]] += input[j]; + inputCnt++; + } + /* And wrap pointers */ + nwrapps++; + intCnt = 32; + xPtr = (const __m256i*) &input[nwrapps*out_len]; + lutPtr = (const __m256i*) deinter; + } + } + for (int i=inputCnt;iforward[i] = (uint32_t) j; h->reverse[j] = (uint32_t) i; } + if (interl_win != 1) { + uint16_t *f = malloc(long_cb*sizeof(uint16_t)); + uint16_t *r = malloc(long_cb*sizeof(uint16_t)); + memcpy(f, h->forward, long_cb*sizeof(uint16_t)); + memcpy(r, h->reverse, long_cb*sizeof(uint16_t)); + for (i = 0; i < long_cb; i++) { + h->forward[i] = deinter(f[inter(i,interl_win)],interl_win); + h->reverse[i] = deinter(r[inter(i,interl_win)],interl_win); + } + free(f); + free(r); + } + return 0; } diff --git a/lib/src/phy/fec/tc_interl_umts.c b/lib/src/phy/fec/tc_interl_umts.c index d7f9ecdb7..261b4d7d3 100644 --- a/lib/src/phy/fec/tc_interl_umts.c +++ b/lib/src/phy/fec/tc_interl_umts.c @@ -30,7 +30,6 @@ #include #include "srslte/phy/fec/tc_interl.h" -#include "srslte/phy/fec/turbocoder.h" #define TURBO_SRSLTE_TCOD_RATE 3 diff --git a/lib/src/phy/fec/test/rm_turbo_test.c b/lib/src/phy/fec/test/rm_turbo_test.c index c8a0a95b5..f911da1d2 100644 --- a/lib/src/phy/fec/test/rm_turbo_test.c +++ b/lib/src/phy/fec/test/rm_turbo_test.c @@ -183,7 +183,7 @@ int main(int argc, char **argv) { srslte_rm_turbo_rx(buff_f, BUFFSZ, rm_bits_f, nof_e_bits, bits_f, long_cb_enc, rv_idx, 0); bzero(bits2_s, long_cb_enc*sizeof(short)); - srslte_rm_turbo_rx_lut(rm_bits_s, bits2_s, nof_e_bits, cb_idx, rv_idx); + srslte_rm_turbo_rx_lut_(rm_bits_s, bits2_s, nof_e_bits, cb_idx, rv_idx, false); for (int i=0;iorder - ((last_cb) ? crc_tb->order : 0)) / 8; /* if CRC pointer is given */ - for (int i = 0; i < (long_cb - crc->order) / 8; i++) { + for (int i = 0; i < block_size_nocrc; i++) { uint8_t in = input[i]; - /* Put byte in CRC and save latest checksum */ - srslte_crc_checksum_put_byte(crc, in); + /* Put byte in TB CRC and save latest checksum */ + srslte_crc_checksum_put_byte(crc_tb, in); + + /* Put byte in CB CRC and save latest checksum */ + srslte_crc_checksum_put_byte(crc_cb, in); /* Run actual encoder */ tcod_lut_t l = tcod_lut[state0][in]; @@ -225,10 +235,27 @@ int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input, state0 = l.next_state; } - uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc); - for (int i = 0; i < crc->order / 8; i++) { - int mask_shift = 8 * (crc->order / 8 - i - 1); - int idx = (long_cb - crc->order) / 8 + i; + if (last_cb) { + uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc_tb); + for (int i = 0; i < crc_tb->order / 8; i++) { + int mask_shift = 8 * (crc_tb->order / 8 - i - 1); + int idx = block_size_nocrc + i; + uint8_t in = (uint8_t) ((checksum >> mask_shift) & 0xff); + + /* Put byte in CB CRC and save latest checksum */ + srslte_crc_checksum_put_byte(crc_cb, in); + + input[idx] = in; + tcod_lut_t l = tcod_lut[state0][in]; + parity[idx] = l.output; + state0 = l.next_state; + } + } + + uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc_cb); + for (int i = 0; i < crc_cb->order / 8; i++) { + int mask_shift = 8 * (crc_cb->order / 8 - i - 1); + int idx = (long_cb - crc_cb->order) / 8 + i; uint8_t in = (uint8_t) ((checksum >> mask_shift) & 0xff); input[idx] = in; @@ -239,11 +266,31 @@ int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input, } else { /* No CRC given */ - for (uint32_t i = 0; i < long_cb / 8; i++) { - tcod_lut_t l = tcod_lut[state0][input[i]]; + int block_size_nocrc = (long_cb - ((last_cb) ? crc_tb->order : 0)) / 8; + + for (uint32_t i = 0; i < block_size_nocrc; i++) { + uint8_t in = input[i]; + + srslte_crc_checksum_put_byte(crc_tb, in); + + tcod_lut_t l = tcod_lut[state0][in]; parity[i] = l.output; state0 = l.next_state; } + + if (last_cb) { + uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc_tb); + for (int i = 0; i < crc_tb->order / 8; i++) { + int mask_shift = 8 * (crc_tb->order / 8 - i - 1); + int idx = block_size_nocrc + i; + uint8_t in = (uint8_t) ((checksum >> mask_shift) & 0xff); + + input[idx] = in; + tcod_lut_t l = tcod_lut[state0][in]; + parity[idx] = l.output; + state0 = l.next_state; + } + } } parity[long_cb / 8] = 0; // will put tail here later diff --git a/lib/src/phy/fec/turbodecoder.c b/lib/src/phy/fec/turbodecoder.c index 54a0ed109..a97180745 100644 --- a/lib/src/phy/fec/turbodecoder.c +++ b/lib/src/phy/fec/turbodecoder.c @@ -24,151 +24,543 @@ * */ - -#include #include -#include #include +#include +#include +#include +#include "srslte/phy/utils/vector.h" #include "srslte/phy/fec/turbodecoder.h" + +#define debug_enabled 0 + +/* Generic (no SSE) implementation */ #include "srslte/phy/fec/turbodecoder_gen.h" +srslte_tdec_16bit_impl_t gen_impl = { + tdec_gen_init, + tdec_gen_free, + tdec_gen_dec, + tdec_gen_extract_input, + tdec_gen_decision_byte +}; +/* SSE no-window implementation */ +#include "srslte/phy/fec/turbodecoder_sse.h" +srslte_tdec_16bit_impl_t sse_impl = { + tdec_sse_init, + tdec_sse_free, + tdec_sse_dec, + tdec_sse_extract_input, + tdec_sse_decision_byte +}; +/* SSE window implementation */ #ifdef LV_HAVE_SSE -#include "srslte/phy/fec/turbodecoder_simd.h" +#define WINIMP_IS_SSE16 +#include "srslte/phy/fec/turbodecoder_win.h" +#undef WINIMP_IS_SSE16 + +srslte_tdec_16bit_impl_t sse16_win_impl = { + tdec_winsse16_init, + tdec_winsse16_free, + tdec_winsse16_dec, + tdec_winsse16_extract_input, + tdec_winsse16_decision_byte +}; #endif -#include "srslte/phy/utils/vector.h" +/* AVX window implementation */ +#ifdef LV_HAVE_AVX2 +#define WINIMP_IS_AVX16 +#include "srslte/phy/fec/turbodecoder_win.h" +#undef WINIMP_IS_AVX16 +srslte_tdec_16bit_impl_t avx16_win_impl = { + tdec_winavx16_init, + tdec_winavx16_free, + tdec_winavx16_dec, + tdec_winavx16_extract_input, + tdec_winavx16_decision_byte +}; +#endif + +/* SSE window implementation */ +#ifdef LV_HAVE_SSE +#define WINIMP_IS_SSE8 +#include "srslte/phy/fec/turbodecoder_win.h" +#undef WINIMP_IS_SSE8 + +srslte_tdec_8bit_impl_t sse8_win_impl = { + tdec_winsse8_init, + tdec_winsse8_free, + tdec_winsse8_dec, + tdec_winsse8_extract_input, + tdec_winsse8_decision_byte +}; +#endif + +/* AVX window implementation */ +#ifdef LV_HAVE_AVX2 +#define WINIMP_IS_AVX8 +#include "srslte/phy/fec/turbodecoder_win.h" +#undef WINIMP_IS_AVX8 +srslte_tdec_8bit_impl_t avx8_win_impl = { + tdec_winavx8_init, + tdec_winavx8_free, + tdec_winavx8_dec, + tdec_winavx8_extract_input, + tdec_winavx8_decision_byte +}; +#endif + +#define AUTO_16_SSE 0 +#define AUTO_16_SSEWIN 1 +#define AUTO_16_AVXWIN 2 +#define AUTO_8_SSEWIN 0 +#define AUTO_8_AVXWIN 1 + + +// Include interfaces for 8 and 16 bit decoder implementations +#define LLR_IS_8BIT +#include "srslte/phy/fec/turbodecoder_iter.h" +#undef LLR_IS_8BIT + +#define LLR_IS_16BIT +#include "srslte/phy/fec/turbodecoder_iter.h" +#undef LLR_IS_16BIT int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) { -#ifdef LV_HAVE_SSE - return srslte_tdec_simd_init(&h->tdec_simd, SRSLTE_TDEC_MAX_NPAR, max_long_cb); -#else - h->input_conv = srslte_vec_malloc(sizeof(float) * (3*max_long_cb+12)); + return srslte_tdec_init_manual(h, max_long_cb, SRSLTE_TDEC_AUTO); +} + +uint32_t interleaver_idx(uint32_t nof_subblocks) { + switch (nof_subblocks) { + case 32: + return 3; + case 16: + return 2; + case 8: + return 1; + case 1: + return 0; + default: + return 0; + } +} + +/* Initializes the turbo decoder object */ +int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec_impl_type_t dec_type) +{ + int ret = -1; + bzero(h, sizeof(srslte_tdec_t)); + uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL; + + h->dec_type = dec_type; + + // Set manual + switch(dec_type) { + case SRSLTE_TDEC_AUTO: + break; + case SRSLTE_TDEC_SSE: + h->dec16[0] = &sse_impl; + h->current_llr_type = SRSLTE_TDEC_16; + break; + case SRSLTE_TDEC_SSE_WINDOW: + h->dec16[0] = &sse16_win_impl; + h->current_llr_type = SRSLTE_TDEC_16; + break; + case SRSLTE_TDEC_GENERIC: + h->dec16[0] = &gen_impl; + h->current_llr_type = SRSLTE_TDEC_16; + break; + case SRSLTE_TDEC_SSE8_WINDOW: + h->dec8[0] = &sse8_win_impl; + h->current_llr_type = SRSLTE_TDEC_8; + break; +#ifdef LV_HAVE_AVX2 + case SRSLTE_TDEC_AVX_WINDOW: + h->dec16[0] = &avx16_win_impl; + h->current_llr_type = SRSLTE_TDEC_16; + break; + case SRSLTE_TDEC_AVX8_WINDOW: + h->dec8[0] = &avx8_win_impl; + h->current_llr_type = SRSLTE_TDEC_8; + break; +#endif + default: + fprintf(stderr, "Error decoder %d not supported\n", dec_type); + goto clean_and_exit; + } + + h->max_long_cb = max_long_cb; + + h->app1 = srslte_vec_malloc(sizeof(int16_t) * len); + if (!h->app1) { + perror("srslte_vec_malloc"); + goto clean_and_exit; + } + h->app2 = srslte_vec_malloc(sizeof(int16_t) * len); + if (!h->app2) { + perror("srslte_vec_malloc"); + goto clean_and_exit; + } + h->ext1 = srslte_vec_malloc(sizeof(int16_t) * len); + if (!h->ext1) { + perror("srslte_vec_malloc"); + goto clean_and_exit; + } + h->ext2 = srslte_vec_malloc(sizeof(int16_t) * len); + if (!h->ext2) { + perror("srslte_vec_malloc"); + goto clean_and_exit; + } + h->syst0 = srslte_vec_malloc(sizeof(int16_t) * len); + if (!h->syst0) { + perror("srslte_vec_malloc"); + goto clean_and_exit; + } + h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len); + if (!h->parity0) { + perror("srslte_vec_malloc"); + goto clean_and_exit; + } + h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len); + if (!h->parity1) { + perror("srslte_vec_malloc"); + goto clean_and_exit; + } + h->input_conv = srslte_vec_malloc(sizeof(int16_t) * (len * 3+32*3)); if (!h->input_conv) { - perror("malloc"); - return -1; + perror("srslte_vec_malloc"); + goto clean_and_exit; } - return srslte_tdec_gen_init(&h->tdec_gen, max_long_cb); + + if (dec_type == SRSLTE_TDEC_AUTO) { + h->dec16[AUTO_16_SSE] = &sse_impl; + h->dec16[AUTO_16_SSEWIN] = &sse16_win_impl; + h->dec8[AUTO_8_SSEWIN] = &sse8_win_impl; +#ifdef LV_HAVE_AVX2 + h->dec16[AUTO_16_AVXWIN] = &avx16_win_impl; + h->dec8[AUTO_8_AVXWIN] = &avx8_win_impl; #endif + + for (int td=0;tddec16[td]) { + if ((h->nof_blocks16[td] = h->dec16[td]->tdec_init(&h->dec16_hdlr[td], h->max_long_cb))<0) { + goto clean_and_exit; + } + } + } + for (int td=0;tddec8[td]) { + if ((h->nof_blocks8[td] = h->dec8[td]->tdec_init(&h->dec8_hdlr[td], h->max_long_cb))<0) { + goto clean_and_exit; + } + } + } + + // Compute 1 interleaver for each possible nof_subblocks (1, 8, 16 or 32) + for (int s=0;s<4;s++) { + for (int i=0;iinterleaver[s][i], srslte_cbsegm_cbsize(i)) < 0) { + goto clean_and_exit; + } + srslte_tc_interl_LTE_gen_interl(&h->interleaver[s][i], srslte_cbsegm_cbsize(i), s?(8<<(s-1)):1); + } + } + } else { + uint32_t nof_subblocks; + if (dec_type < SRSLTE_TDEC_SSE8_WINDOW) { + if ((h->nof_blocks16[0] = h->dec16[0]->tdec_init(&h->dec16_hdlr[0], h->max_long_cb))<0) { + goto clean_and_exit; + } + nof_subblocks = h->nof_blocks16[0]; + } else { + if ((h->nof_blocks8[0] = h->dec8[0]->tdec_init(&h->dec8_hdlr[0], h->max_long_cb))<0) { + goto clean_and_exit; + } + nof_subblocks = h->nof_blocks8[0]; + } + for (int i=0;iinterleaver[interleaver_idx(nof_subblocks)][i], srslte_cbsegm_cbsize(i)) < 0) { + goto clean_and_exit; + } + srslte_tc_interl_LTE_gen_interl(&h->interleaver[interleaver_idx(nof_subblocks)][i], srslte_cbsegm_cbsize(i), nof_subblocks); + } + } + + h->current_cbidx = -1; + ret = 0; + +clean_and_exit: + if (ret == -1) { + srslte_tdec_free(h); + } + return ret; } -void srslte_tdec_free(srslte_tdec_t * h) { -#ifdef LV_HAVE_SSE - srslte_tdec_simd_free(&h->tdec_simd); -#else +void srslte_tdec_free(srslte_tdec_t * h) +{ + if (h->app1) { + free(h->app1); + } + if (h->app2) { + free(h->app2); + } + if (h->ext1) { + free(h->ext1); + } + if (h->ext2) { + free(h->ext2); + } + if (h->syst0) { + free(h->syst0); + } + if (h->parity0) { + free(h->parity0); + } + if (h->parity1) { + free(h->parity1); + } if (h->input_conv) { free(h->input_conv); } - srslte_tdec_gen_free(&h->tdec_gen); -#endif -} + for (int td=0;tddec8[td] && h->dec8_hdlr[td]) { + h->dec8[td]->tdec_free(h->dec8_hdlr[td]); + } + } + for (int td=0;tddec16[td] && h->dec16_hdlr[td]) { + h->dec16[td]->tdec_free(h->dec16_hdlr[td]); + } + } + for (int s=0;s<4;s++) { + for (int i=0;iinterleaver[s][i]); + } + } -int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb) { -#ifdef LV_HAVE_SSE - return srslte_tdec_simd_reset(&h->tdec_simd, long_cb); -#else - return srslte_tdec_gen_reset(&h->tdec_gen, long_cb); -#endif + bzero(h, sizeof(srslte_tdec_t)); } -int srslte_tdec_reset_cb(srslte_tdec_t * h, uint32_t cb_idx) { -#ifdef LV_HAVE_SSE - return srslte_tdec_simd_reset_cb(&h->tdec_simd, cb_idx); -#else - return srslte_tdec_gen_reset(&h->tdec_gen, h->tdec_gen.current_cb_len); -#endif +void srslte_tdec_force_not_sb(srslte_tdec_t *h) { + h->force_not_sb = true; } -int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h, uint32_t cb_idx) +static void tdec_decision_byte(srslte_tdec_t * h, uint8_t *output) { -#ifdef LV_HAVE_SSE - return srslte_tdec_simd_get_nof_iterations_cb(&h->tdec_simd, cb_idx); -#else - return h->tdec_gen.n_iter; -#endif + if (h->current_llr_type == SRSLTE_TDEC_16) { + h->dec16[h->current_dec]->tdec_decision_byte(!(h->n_iter%2)?h->app1:h->ext1, output, h->current_long_cb); + } else { + h->dec8[h->current_dec]->tdec_decision_byte(!(h->n_iter%2)?(int8_t*)h->app1:(int8_t*)h->ext1, output, h->current_long_cb); + } } -void srslte_tdec_iteration_par(srslte_tdec_t * h, int16_t* input[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) { -#ifdef LV_HAVE_SSE - srslte_tdec_simd_iteration(&h->tdec_simd, input, long_cb); -#else - srslte_vec_convert_if(input[0], 0.01, h->input_conv, 3*long_cb+12); - srslte_tdec_gen_iteration(&h->tdec_gen, h->input_conv, long_cb); + +/* Returns number of subblocks in automatic mode for this long_cb */ +uint32_t srslte_tdec_autoimp_get_subblocks(uint32_t long_cb) +{ +#ifdef LV_HAVE_AVX2 + if (!(long_cb%16) && long_cb > 800) { + return 16; + } else #endif + if (!(long_cb%8) && long_cb > 400) { + return 8; + } else { + return 0; + } } -void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) { - int16_t *input_par[SRSLTE_TDEC_MAX_NPAR]; - input_par[0] = input; - return srslte_tdec_iteration_par(h, input_par, long_cb); +static int tdec_sb_idx(uint32_t long_cb) { + uint32_t nof_sb = srslte_tdec_autoimp_get_subblocks(long_cb); + switch(nof_sb) { + case 16: + return AUTO_16_AVXWIN; + case 8: + return AUTO_16_SSEWIN; + case 0: + return AUTO_16_SSE; + } + fprintf(stderr, "Error in tdec_sb_idx() invalid nof_sb=%d\n", nof_sb); + return 0; } -void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) { -#ifdef LV_HAVE_SSE - return srslte_tdec_simd_decision(&h->tdec_simd, output, long_cb); -#else - return srslte_tdec_gen_decision(&h->tdec_gen, output[0], long_cb); +uint32_t srslte_tdec_autoimp_get_subblocks_8bit(uint32_t long_cb) +{ +#ifdef LV_HAVE_AVX2 + if (!(long_cb%32) && long_cb > 2048) { + return 32; + } else #endif + if (!(long_cb%16) && long_cb > 800) { + return 16; + } else if (!(long_cb%8) && long_cb > 400) { + return 8; + } else { + return 0; + } } -uint32_t srslte_tdec_get_nof_parallel(srslte_tdec_t *h) { -#ifdef LV_HAVE_AVX2 - return 2; -#else - return 1; -#endif +static int tdec_sb_idx_8(uint32_t long_cb) { + uint32_t nof_sb = srslte_tdec_autoimp_get_subblocks_8bit(long_cb); + switch(nof_sb) { + case 32: + return AUTO_8_AVXWIN; + case 16: + return AUTO_8_SSEWIN; + case 8: + return 10+AUTO_16_SSEWIN; + case 0: + return 10+AUTO_16_SSE; + } + fprintf(stderr, "Error in tdec_sb_idx_8() invalid nof_sb=%d\n", nof_sb); + return 0; } -void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) { - uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR]; - output_par[0] = output; - srslte_tdec_decision_par(h, output_par, long_cb); +// TODO: Implement SSE version. Don't really a problem since this only called at very low rates +static void convert_8_to_16(int8_t *in, int16_t *out, uint32_t len) +{ + for (int i=0;itdec_simd, output, long_cb); -#else - srslte_tdec_gen_decision_byte(&h->tdec_gen, output[0], long_cb); -#endif +static void convert_16_to_8(int16_t *in, int8_t *out, uint32_t len) +{ + for (int i=0;itdec_simd, output, cb_idx, long_cb); -#else - srslte_tdec_gen_decision_byte(&h->tdec_gen, output, long_cb); -#endif +static void tdec_iteration_8(srslte_tdec_t * h, int8_t * input) +{ + // Select decoder if in auto mode + if (h->dec_type == SRSLTE_TDEC_AUTO) { + h->current_llr_type = SRSLTE_TDEC_8; + h->current_dec = tdec_sb_idx_8(h->current_long_cb); + h->current_inter_idx = interleaver_idx(h->nof_blocks8[h->current_dec]); + + // If long_cb is not multiple of any 8-bit decoder, use a 16-bit decoder and do type conversion + if (h->current_dec >= 10) { + h->current_llr_type = SRSLTE_TDEC_16; + h->current_dec -= 10; + h->current_inter_idx = interleaver_idx(h->nof_blocks16[h->current_dec]); + } + } else { + h->current_dec = 0; + } + + if (h->current_llr_type == SRSLTE_TDEC_16) { + if (!h->n_iter) { + convert_8_to_16(input, h->input_conv, 3*h->current_long_cb+12); + } + run_tdec_iteration_16bit(h, h->input_conv); + } else { + run_tdec_iteration_8bit(h, input); + } } -void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) { - uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR]; - output_par[0] = output; - srslte_tdec_decision_byte_par(h, output_par, long_cb); +static void tdec_iteration_16(srslte_tdec_t * h, int16_t * input) +{ + // Select decoder if in auto mode + if (h->dec_type == SRSLTE_TDEC_AUTO) { + h->current_llr_type = SRSLTE_TDEC_16; + h->current_dec = tdec_sb_idx(h->current_long_cb); + } else { + h->current_dec = 0; + } + h->current_inter_idx = interleaver_idx(h->nof_blocks16[h->current_dec]); + + if (h->current_llr_type == SRSLTE_TDEC_8) { + + h->current_inter_idx = interleaver_idx(h->nof_blocks8[h->current_dec]); + + if (!h->n_iter) { + convert_16_to_8(input, h->input_conv, 3*h->current_long_cb+12); + } + run_tdec_iteration_8bit(h, h->input_conv); + } else { + run_tdec_iteration_16bit(h, input); + } } -int srslte_tdec_run_all_par(srslte_tdec_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], - uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t nof_iterations, uint32_t long_cb) { -#ifdef LV_HAVE_SSE - return srslte_tdec_simd_run_all(&h->tdec_simd, input, output, nof_iterations, long_cb); -#else - srslte_vec_convert_if(input[0], 0.01, h->input_conv, 3*long_cb+12); - return srslte_tdec_gen_run_all(&h->tdec_gen, h->input_conv, output[0], nof_iterations, long_cb); -#endif +/* Resets the decoder and sets the codeblock length */ +int srslte_tdec_new_cb(srslte_tdec_t * h, uint32_t long_cb) +{ + if (long_cb > h->max_long_cb) { + fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n", + h->max_long_cb); + return -1; + } + + h->n_iter = 0; + h->current_long_cb = long_cb; + h->current_cbidx = srslte_cbsegm_cbindex(long_cb); + if (h->current_cbidx < 0) { + fprintf(stderr, "Invalid CB length %d\n", long_cb); + return -1; + } + return 0; +} + +void srslte_tdec_iteration(srslte_tdec_t * h, int16_t * input, uint8_t *output) +{ + if (h->current_cbidx >= 0) { + tdec_iteration_16(h, input); + tdec_decision_byte(h, output); + } +} + +/* Runs nof_iterations iterations and decides the output bits */ +int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, + uint32_t nof_iterations, uint32_t long_cb) +{ + if (srslte_tdec_new_cb(h, long_cb)) { + return SRSLTE_ERROR; + } + + do { + tdec_iteration_16(h, input); + } while (h->n_iter < nof_iterations); + + tdec_decision_byte(h, output); + + return SRSLTE_SUCCESS; +} + + +void srslte_tdec_iteration_8bit(srslte_tdec_t * h, int8_t * input, uint8_t *output) +{ + if (h->current_cbidx >= 0) { + tdec_iteration_8(h, input); + tdec_decision_byte(h, output); + } +} + +/* Runs nof_iterations iterations and decides the output bits */ +int srslte_tdec_run_all_8bit(srslte_tdec_t * h, int8_t * input, uint8_t *output, + uint32_t nof_iterations, uint32_t long_cb) +{ + if (srslte_tdec_new_cb(h, long_cb)) { + return SRSLTE_ERROR; + } + + do { + tdec_iteration_8(h, input); + } while (h->n_iter < nof_iterations); + + tdec_decision_byte(h, output); + + return SRSLTE_SUCCESS; } -int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb) +int srslte_tdec_get_nof_iterations(srslte_tdec_t * h) { - uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR]; - output_par[0] = output; - int16_t *input_par[SRSLTE_TDEC_MAX_NPAR]; - input_par[0] = input; - - return srslte_tdec_run_all_par(h, input_par, output_par, nof_iterations, long_cb); + return h->n_iter; } diff --git a/lib/src/phy/fec/turbodecoder_avx.c b/lib/src/phy/fec/turbodecoder_avx.c deleted file mode 100644 index 2e877cbde..000000000 --- a/lib/src/phy/fec/turbodecoder_avx.c +++ /dev/null @@ -1,475 +0,0 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -#include -#include -#include -#include -#include -#include - -#include "srslte/phy/fec/turbodecoder_simd.h" -#include "srslte/phy/utils/vector.h" - -#include - -#define NUMSTATES 8 -#define NINPUTS 2 -#define TAIL 3 -#define TOTALTAIL 12 - -#define INF 10000 -#define ZERO 0 - - -#ifdef LV_HAVE_AVX2 - -#include -#include - - -// Number of CB processed in parllel in AVX -#define NCB 2 - -/* -static void print_256i(__m256i x) { - int16_t *s = (int16_t*) &x; - printf("[%d", s[0]); - for (int i=1;i<16;i++) { - printf(",%d", s[i]); - } - printf("]\n"); -} -*/ - -/* Computes the horizontal MAX from 8 16-bit integers using the minpos_epu16 SSE4.1 instruction */ -static inline int16_t hMax0(__m256i masked_value) -{ - __m128i tmp1 = _mm256_extractf128_si256(masked_value, 0); - __m128i tmp3 = _mm_minpos_epu16(tmp1); - return (int16_t)(_mm_cvtsi128_si32(tmp3)); -} - -static inline int16_t hMax1(__m256i masked_value) -{ - __m128i tmp1 = _mm256_extractf128_si256(masked_value, 1); - __m128i tmp3 = _mm_minpos_epu16(tmp1); - return (int16_t)(_mm_cvtsi128_si32(tmp3)); -} - -/* Computes beta values */ -void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) -{ - int k; - uint32_t end = long_cb + 3; - const __m256i *alphaPtr = (const __m256i*) s->alpha; - - __m256i beta_k = _mm256_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0, -INF, -INF, -INF, -INF, -INF, -INF, -INF, 0); - __m256i g, bp, bn, alpha_k; - - /* Define the shuffle constant for the positive beta */ - __m256i shuf_bp = _mm256_set_epi8( - // 1st CB - 15+16, 14+16, // 7 - 7+16, 6+16, // 3 - 5+16, 4+16, // 2 - 13+16, 12+16, // 6 - 11+16, 10+16, // 5 - 3+16, 2+16, // 1 - 1+16, 0+16, // 0 - 9+16, 8+16, // 4 - - // 2nd CB - 15, 14, // 7 - 7, 6, // 3 - 5, 4, // 2 - 13, 12, // 6 - 11, 10, // 5 - 3, 2, // 1 - 1, 0, // 0 - 9, 8 // 4 - ); - - /* Define the shuffle constant for the negative beta */ - __m256i shuf_bn = _mm256_set_epi8( - 7+16, 6+16, // 3 - 15+16, 14+16, // 7 - 13+16, 12+16, // 6 - 5+16, 4+16, // 2 - 3+16, 2+16, // 1 - 11+16, 10+16, // 5 - 9+16, 8+16, // 4 - 1+16, 0+16, // 0 - - 7, 6, // 3 - 15, 14, // 7 - 13, 12, // 6 - 5, 4, // 2 - 3, 2, // 1 - 11, 10, // 5 - 9, 8, // 4 - 1, 0 // 0 - ); - - alphaPtr += long_cb-1; - - /* Define shuffle for branch costs */ - __m256i shuf_g[4]; - shuf_g[3] = _mm256_set_epi8(3+16,2+16,1+16,0+16,1+16,0+16,3+16,2+16,3+16,2+16,1+16,0+16,1+16,0+16,3+16,2+16, - 3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2); - shuf_g[2] = _mm256_set_epi8(7+16,6+16,5+16,4+16,5+16,4+16,7+16,6+16,7+16,6+16,5+16,4+16,5+16,4+16,7+16,6+16, - 7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6); - shuf_g[1] = _mm256_set_epi8(11+16,10+16,9+16,8+16,9+16,8+16,11+16,10+16,11+16,10+16,9+16,8+16,9+16,8+16,11+16,10+16, - 11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10); - shuf_g[0] = _mm256_set_epi8(15+16,14+16,13+16,12+16,13+16,12+16,15+16,14+16,15+16,14+16,13+16,12+16,13+16,12+16,15+16,14+16, - 15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14); - - /* Define shuffle for beta normalization */ - __m256i shuf_norm = _mm256_set_epi8(17,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0); - - __m256i gv; - int16_t *b = &s->branch[2*NCB*long_cb-16]; - __m256i *gPtr = (__m256i*) b; - - /* This defines a beta computation step: - * Adds and substracts the branch metrics to the previous beta step, - * shuffles the states according to the trellis path and selects maximum state - */ -#define BETA_STEP(g) bp = _mm256_add_epi16(beta_k, g);\ - bn = _mm256_sub_epi16(beta_k, g);\ - bp = _mm256_shuffle_epi8(bp, shuf_bp);\ - bn = _mm256_shuffle_epi8(bn, shuf_bn);\ - beta_k = _mm256_max_epi16(bp, bn); - - /* Loads the alpha metrics from memory and adds them to the temporal bn and bp - * metrics. Then computes horizontal maximum of both metrics and computes difference - */ -#define BETA_STEP_CNT(c,d) g = _mm256_shuffle_epi8(gv, shuf_g[c]);\ - BETA_STEP(g)\ - alpha_k = _mm256_load_si256(alphaPtr);\ - alphaPtr--;\ - bp = _mm256_add_epi16(bp, alpha_k);\ - bn = _mm256_add_epi16(bn, alpha_k);\ - bn = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bn);\ - bp = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bp);\ - output[0][k-d] = hMax0(bn) - hMax0(bp);\ - output[1][k-d] = hMax1(bn) - hMax1(bp); - - /* The tail does not require to load alpha or produce outputs. Only update - * beta metrics accordingly */ - for (k=end-1; k>=long_cb; k--) { - int16_t g0_1 = s->branch[2*NCB*k]; - int16_t g1_1 = s->branch[2*NCB*k+1]; - int16_t g0_2 = s->branch[2*NCB*k+6]; - int16_t g1_2 = s->branch[2*NCB*k+6+1]; - g = _mm256_set_epi16(g1_2, g0_2, g0_2, g1_2, g1_2, g0_2, g0_2, g1_2, g1_1, g0_1, g0_1, g1_1, g1_1, g0_1, g0_1, g1_1); - BETA_STEP(g); - } - - /* We inline 2 trelis steps for each normalization */ - __m256i norm; - for (; k >= 0; k-=8) { - gv = _mm256_load_si256(gPtr); - gPtr--; - BETA_STEP_CNT(0,0); - BETA_STEP_CNT(1,1); - BETA_STEP_CNT(2,2); - BETA_STEP_CNT(3,3); - norm = _mm256_shuffle_epi8(beta_k, shuf_norm); - beta_k = _mm256_sub_epi16(beta_k, norm); - gv = _mm256_load_si256(gPtr); - gPtr--; - BETA_STEP_CNT(0,4); - BETA_STEP_CNT(1,5); - BETA_STEP_CNT(2,6); - BETA_STEP_CNT(3,7); - norm = _mm256_shuffle_epi8(beta_k, shuf_norm); - beta_k = _mm256_sub_epi16(beta_k, norm); - } -} - -/* Computes alpha metrics */ -void map_avx_alpha(map_gen_t * s, uint32_t long_cb) -{ - uint32_t k; - int16_t *alpha1 = s->alpha; - int16_t *alpha2 = &s->alpha[8]; - uint32_t i; - - alpha1[0] = 0; - alpha2[0] = 0; - for (i = 1; i < 8; i++) { - alpha1[i] = -INF; - alpha2[i] = -INF; - } - - /* Define the shuffle constant for the positive alpha */ - __m256i shuf_ap = _mm256_set_epi8( - - // 1st CB - 31, 30, // 7 - 25, 24, // 4 - 23, 22, // 3 - 17, 16, // 0 - 29, 28, // 6 - 27, 26, // 5 - 21, 20, // 2 - 19, 18, // 1 - - // 2nd CB - 15, 14, // 7 - 9, 8, // 4 - 7, 6, // 3 - 1, 0, // 0 - 13, 12, // 6 - 11, 10, // 5 - 5, 4, // 2 - 3, 2 // 1 - ); - - /* Define the shuffle constant for the negative alpha */ - __m256i shuf_an = _mm256_set_epi8( - - // 1nd CB - 29, 28, // 6 - 27, 26, // 5 - 21, 20, // 2 - 19, 18, // 1 - 31, 30, // 7 - 25, 24, // 4 - 23, 22, // 3 - 17, 16, // 0 - - // 2nd CB - 13, 12, // 6 - 11, 10, // 5 - 5, 4, // 2 - 3, 2, // 1 - 15, 14, // 7 - 9, 8, // 4 - 7, 6, // 3 - 1, 0 // 0 - ); - - /* Define shuffle for branch costs */ - __m256i shuf_g[4]; - shuf_g[0] = _mm256_set_epi8(3+16,2+16,3+16,2+16,1+16,0+16,1+16,0+16,1+16,0+16,1+16,0+16,3+16,2+16,3+16,2+16, - 3,2,3,2,1,0,1,0,1,0,1,0,3,2,3,2); - shuf_g[1] = _mm256_set_epi8(7+16,6+16,7+16,6+16,5+16,4+16,5+16,4+16,5+16,4+16,5+16,4+16,7+16,6+16,7+16,6+16, - 7,6,7,6,5,4,5,4,5,4,5,4,7,6,7,6); - shuf_g[2] = _mm256_set_epi8(11+16,10+16,11+16,10+16,9+16,8+16,9+16,8+16,9+16,8+16,9+16,8+16,11+16,10+16,11+16,10+16, - 11,10,11,10,9,8,9,8,9,8,9,8,11,10,11,10); - shuf_g[3] = _mm256_set_epi8(15+16,14+16,15+16,14+16,13+16,12+16,13+16,12+16,13+16,12+16,13+16,12+16,15+16,14+16,15+16,14+16, - 15,14,15,14,13,12,13,12,13,12,13,12,15,14,15,14); - - __m256i shuf_norm = _mm256_set_epi8(17,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0); - - __m256i* alphaPtr = (__m256i*) s->alpha; - alphaPtr++; - - __m256i gv; - __m256i *gPtr = (__m256i*) s->branch; - __m256i g, ap, an; - - __m256i alpha_k = _mm256_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0, -INF, -INF, -INF, -INF, -INF, -INF, -INF, 0); - - /* This defines a alpha computation step: - * Adds and substracts the branch metrics to the previous alpha step, - * shuffles the states according to the trellis path and selects maximum state - */ -#define ALPHA_STEP(c) g = _mm256_shuffle_epi8(gv, shuf_g[c]); \ - ap = _mm256_add_epi16(alpha_k, g);\ - an = _mm256_sub_epi16(alpha_k, g);\ - ap = _mm256_shuffle_epi8(ap, shuf_ap);\ - an = _mm256_shuffle_epi8(an, shuf_an);\ - alpha_k = _mm256_max_epi16(ap, an);\ - _mm256_store_si256(alphaPtr, alpha_k);\ - alphaPtr++;\ - - - /* In this loop, we compute 8 steps and normalize twice for each branch metrics memory load */ - __m256i norm; - for (k = 0; k < long_cb/8; k++) { - gv = _mm256_load_si256(gPtr); - - gPtr++; - ALPHA_STEP(0); - ALPHA_STEP(1); - ALPHA_STEP(2); - ALPHA_STEP(3); - norm = _mm256_shuffle_epi8(alpha_k, shuf_norm); - alpha_k = _mm256_sub_epi16(alpha_k, norm); - gv = _mm256_load_si256(gPtr); - gPtr++; - ALPHA_STEP(0); - ALPHA_STEP(1); - ALPHA_STEP(2); - ALPHA_STEP(3); - norm = _mm256_shuffle_epi8(alpha_k, shuf_norm); - alpha_k = _mm256_sub_epi16(alpha_k, norm); - } -} - -void map_sse_gamma_single(int16_t *output, int16_t *input, int16_t *app, int16_t *parity) -{ - __m128i res00, res10, res01, res11, res0, res1; - __m128i in, ap, pa, g1, g0; - - __m128i *inPtr = (__m128i*) input; - __m128i *appPtr = (__m128i*) app; - __m128i *paPtr = (__m128i*) parity; - __m128i *resPtr = (__m128i*) output; - - __m128i res00_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0); - __m128i res10_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8); - __m128i res01_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff); - __m128i res11_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff); - - in = _mm_load_si128(inPtr); - inPtr++; - pa = _mm_load_si128(paPtr); - paPtr++; - - if (appPtr) { - ap = _mm_load_si128(appPtr); - appPtr++; - in = _mm_add_epi16(ap, in); - } - - g1 = _mm_add_epi16(in, pa); - g0 = _mm_sub_epi16(in, pa); - - g1 = _mm_srai_epi16(g1, 1); - g0 = _mm_srai_epi16(g0, 1); - - res00 = _mm_shuffle_epi8(g0, res00_mask); - res10 = _mm_shuffle_epi8(g0, res10_mask); - res01 = _mm_shuffle_epi8(g1, res01_mask); - res11 = _mm_shuffle_epi8(g1, res11_mask); - - res0 = _mm_or_si128(res00, res01); - res1 = _mm_or_si128(res10, res11); - - _mm_store_si128(resPtr, res0); - resPtr++; - _mm_store_si128(resPtr, res1); - resPtr++; -} - - -/* Compute branch metrics (gamma) */ -void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb) -{ - __m128i res10, res20, res11, res21, res1, res2; - __m256i in, ap, pa, g1, g0; - - __m256i *inPtr = (__m256i*) input; - __m256i *appPtr = (__m256i*) app; - __m256i *paPtr = (__m256i*) parity; - __m128i *resPtr = (__m128i*) h->branch; - - if (cbidx) { - resPtr++; - } - - __m128i res10_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0); - __m128i res11_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff); - - __m128i res20_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8); - __m128i res21_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff); - - for (int i=0;ibranch[2*i*NCB+cbidx*6] = (input[i] - parity[i])/2; - h->branch[2*i*NCB+cbidx*6+1] = (input[i] + parity[i])/2; - } -} - - -#endif - - diff --git a/lib/src/phy/fec/turbodecoder_gen.c b/lib/src/phy/fec/turbodecoder_gen.c index 649c3363a..b7d67dae0 100644 --- a/lib/src/phy/fec/turbodecoder_gen.c +++ b/lib/src/phy/fec/turbodecoder_gen.c @@ -1,400 +1,275 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -#include -#include -#include -#include -#include -#include - -#include "srslte/phy/fec/turbodecoder_gen.h" -#include "srslte/phy/utils/vector.h" - -#define NUMSTATES 8 -#define NINPUTS 2 -#define TAIL 3 -#define TOTALTAIL 12 - -#define INF 9e4 -#define ZERO 9e-4 - -/************************************************ - * - * MAP_GEN is the MAX-LOG-MAP generic implementation of the - * Decoder - * - ************************************************/ -static void map_gen_beta(srslte_map_gen_vl_t * s, float * input, float * parity, - uint32_t long_cb) -{ - float m_b[8], new[8], old[8]; - float x, y, xy; - int k; - uint32_t end = long_cb + SRSLTE_TCOD_RATE; - float *beta = s->beta; - uint32_t i; - - for (i = 0; i < 8; i++) { - old[i] = beta[8 * (end) + i]; - } - - for (k = end - 1; k >= 0; k--) { - x = input[k]; - y = parity[k]; - - xy = x + y; - - m_b[0] = old[4] + xy; - m_b[1] = old[4]; - m_b[2] = old[5] + y; - m_b[3] = old[5] + x; - m_b[4] = old[6] + x; - m_b[5] = old[6] + y; - m_b[6] = old[7]; - m_b[7] = old[7] + xy; - - new[0] = old[0]; - new[1] = old[0] + xy; - new[2] = old[1] + x; - new[3] = old[1] + y; - new[4] = old[2] + y; - new[5] = old[2] + x; - new[6] = old[3] + xy; - new[7] = old[3]; - - for (i = 0; i < 8; i++) { - if (m_b[i] > new[i]) - new[i] = m_b[i]; - old[i] = new[i]; - beta[8 * k + i] = old[i]; - } - } -} - -static void map_gen_alpha(srslte_map_gen_vl_t * s, float * input, float * parity, float * output, - uint32_t long_cb) -{ - float m_b[8], new[8], old[8], max1[8], max0[8]; - float m1, m0; - float x, y, xy; - float out; - uint32_t k; - uint32_t end = long_cb; - float *beta = s->beta; - uint32_t i; - - old[0] = 0; - for (i = 1; i < 8; i++) { - old[i] = -INF; - } - - for (k = 1; k < end + 1; k++) { - x = input[k - 1]; - y = parity[k - 1]; - - xy = x + y; - - m_b[0] = old[0]; - m_b[1] = old[3] + y; - m_b[2] = old[4] + y; - m_b[3] = old[7]; - m_b[4] = old[1]; - m_b[5] = old[2] + y; - m_b[6] = old[5] + y; - m_b[7] = old[6]; - - new[0] = old[1] + xy; - new[1] = old[2] + x; - new[2] = old[5] + x; - new[3] = old[6] + xy; - new[4] = old[0] + xy; - new[5] = old[3] + x; - new[6] = old[4] + x; - new[7] = old[7] + xy; - - for (i = 0; i < 8; i++) { - max0[i] = m_b[i] + beta[8 * k + i]; - max1[i] = new[i] + beta[8 * k + i]; - } - - m1 = max1[0]; - m0 = max0[0]; - - for (i = 1; i < 8; i++) { - if (max1[i] > m1) - m1 = max1[i]; - if (max0[i] > m0) - m0 = max0[i]; - } - - for (i = 0; i < 8; i++) { - if (m_b[i] > new[i]) - new[i] = m_b[i]; - old[i] = new[i]; - } - - out = m1 - m0; - output[k - 1] = out; - } -} - -static int map_gen_init(srslte_map_gen_vl_t * h, int max_long_cb) -{ - bzero(h, sizeof(srslte_map_gen_vl_t)); - h->beta = srslte_vec_malloc(sizeof(float) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES); - if (!h->beta) { - perror("srslte_vec_malloc"); - return -1; - } - h->max_long_cb = max_long_cb; - return 0; -} - -static void map_gen_free(srslte_map_gen_vl_t * h) -{ - if (h->beta) { - free(h->beta); - } - bzero(h, sizeof(srslte_map_gen_vl_t)); -} - -static void map_gen_dec(srslte_map_gen_vl_t * h, float * input, float * parity, float * output, - uint32_t long_cb) -{ - uint32_t k; - - h->beta[(long_cb + TAIL) * NUMSTATES] = 0; - for (k = 1; k < NUMSTATES; k++) - h->beta[(long_cb + TAIL) * NUMSTATES + k] = -INF; - - map_gen_beta(h, input, parity, long_cb); - map_gen_alpha(h, input, parity, output, long_cb); -} - -/************************************************ - * - * TURBO DECODER INTERFACE - * - ************************************************/ -int srslte_tdec_gen_init(srslte_tdec_gen_t * h, uint32_t max_long_cb) -{ - int ret = -1; - bzero(h, sizeof(srslte_tdec_gen_t)); - uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL; - - h->max_long_cb = max_long_cb; - - h->llr1 = srslte_vec_malloc(sizeof(float) * len); - if (!h->llr1) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->llr2 = srslte_vec_malloc(sizeof(float) * len); - if (!h->llr2) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->w = srslte_vec_malloc(sizeof(float) * len); - if (!h->w) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->syst = srslte_vec_malloc(sizeof(float) * len); - if (!h->syst) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->parity = srslte_vec_malloc(sizeof(float) * len); - if (!h->parity) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - - if (map_gen_init(&h->dec, h->max_long_cb)) { - goto clean_and_exit; - } - - for (int i=0;iinterleaver[i], srslte_cbsegm_cbsize(i)) < 0) { - goto clean_and_exit; - } - srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i)); - } - h->current_cbidx = -1; - ret = 0; -clean_and_exit:if (ret == -1) { - srslte_tdec_gen_free(h); - } - return ret; -} - -void srslte_tdec_gen_free(srslte_tdec_gen_t * h) -{ - if (h->llr1) { - free(h->llr1); - } - if (h->llr2) { - free(h->llr2); - } - if (h->w) { - free(h->w); - } - if (h->syst) { - free(h->syst); - } - if (h->parity) { - free(h->parity); - } - - map_gen_free(&h->dec); - - for (int i=0;iinterleaver[i]); - } - - bzero(h, sizeof(srslte_tdec_gen_t)); -} - -void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h, float * input, uint32_t long_cb) -{ - uint32_t i; - - if (h->current_cbidx >= 0) { - - uint16_t *inter = h->interleaver[h->current_cbidx].forward; - uint16_t *deinter = h->interleaver[h->current_cbidx].reverse; - - // Prepare systematic and parity bits for MAP DEC #1 - for (i = 0; i < long_cb; i++) { - h->syst[i] = input[SRSLTE_TCOD_RATE * i] + h->w[i]; - h->parity[i] = input[SRSLTE_TCOD_RATE * i + 1]; - } - for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) { - h->syst[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb)]; - h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb) + 1]; - } - - // Run MAP DEC #1 - map_gen_dec(&h->dec, h->syst, h->parity, h->llr1, long_cb); - - // Prepare systematic and parity bits for MAP DEC #1 - for (i = 0; i < long_cb; i++) { - h->syst[i] = h->llr1[inter[i]] - - h->w[inter[i]]; - h->parity[i] = input[SRSLTE_TCOD_RATE * i + 2]; - } - for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) { - h->syst[i] = - input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE + NINPUTS * (i - long_cb)]; - h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE - + NINPUTS * (i - long_cb) + 1]; - } - - // Run MAP DEC #2 - map_gen_dec(&h->dec, h->syst, h->parity, h->llr2, long_cb); - - //printf("llr2="); - //srslte_vec_fprint_f(stdout, h->llr2, long_cb); - - - // Update a-priori LLR from the last iteration - for (i = 0; i < long_cb; i++) { - h->w[i] += h->llr2[deinter[i]] - h->llr1[i]; - } - } else { - fprintf(stderr, "Error CB index not set (call srslte_tdec_gen_reset() first\n"); - } - - // Increase number of iterations - h->n_iter++; -} - -int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb) -{ - if (long_cb > h->max_long_cb) { - fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n", - h->max_long_cb); - return -1; - } - memset(h->w, 0, sizeof(float) * long_cb); - h->current_cbidx = srslte_cbsegm_cbindex(long_cb); - h->current_cb_len = long_cb; - if (h->current_cbidx < 0) { - fprintf(stderr, "Invalid CB length %d\n", long_cb); - return -1; - } - return 0; -} - -void srslte_tdec_gen_decision(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb) -{ - uint16_t *deinter = h->interleaver[h->current_cbidx].reverse; - uint32_t i; - for (i = 0; i < long_cb; i++) { - output[i] = (h->llr2[deinter[i]] > 0) ? 1 : 0; - } -} - -void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb) -{ - uint32_t i; - uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1}; - uint16_t *deinter = h->interleaver[h->current_cbidx].reverse; - - // long_cb is always byte aligned - for (i = 0; i < long_cb/8; i++) { - uint8_t out0 = h->llr2[deinter[8*i+0]]>0?mask[0]:0; - uint8_t out1 = h->llr2[deinter[8*i+1]]>0?mask[1]:0; - uint8_t out2 = h->llr2[deinter[8*i+2]]>0?mask[2]:0; - uint8_t out3 = h->llr2[deinter[8*i+3]]>0?mask[3]:0; - uint8_t out4 = h->llr2[deinter[8*i+4]]>0?mask[4]:0; - uint8_t out5 = h->llr2[deinter[8*i+5]]>0?mask[5]:0; - uint8_t out6 = h->llr2[deinter[8*i+6]]>0?mask[6]:0; - uint8_t out7 = h->llr2[deinter[8*i+7]]>0?mask[7]:0; - - output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7; - } -} - -int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h, float * input, uint8_t *output, - uint32_t nof_iterations, uint32_t long_cb) -{ - uint32_t iter = 0; - - if (srslte_tdec_gen_reset(h, long_cb)) { - return SRSLTE_ERROR; - } - - do { - srslte_tdec_gen_iteration(h, input, long_cb); - iter++; - } while (iter < nof_iterations); - - srslte_tdec_gen_decision_byte(h, output, long_cb); - - return SRSLTE_SUCCESS; -} +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "srslte/phy/fec/turbodecoder_gen.h" +#include "srslte/phy/utils/vector.h" + +#define NUMSTATES 8 +#define NINPUTS 2 +#define TAIL 3 +#define TOTALTAIL 12 + +#define INF 10000 + +#define debug_enabled 0 + +#if debug_enabled +#define debug_state printf("k=%5d, in=%5d, pa=%3d, out=%5d, alpha=", k, x, parity[k-1], out); srslte_vec_fprint_s(stdout, alpha, 8); \ + printf(", beta="); srslte_vec_fprint_s(stdout, &beta[8*(k)], 8); printf("\n"); +#else +#define debug_state +#endif + +/************************************************ + * + * MAP_GEN is the MAX-LOG-MAP generic implementation of the + * Decoder + * + ************************************************/ +static void map_gen_beta(tdec_gen_t *s, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb) +{ + int16_t m_b[8], new[8], old[8]; + int16_t x, y, xy; + int k; + uint32_t end = long_cb + SRSLTE_TCOD_RATE; + int16_t *beta = s->beta; + uint32_t i; + + for (i = 0; i < 8; i++) { + old[i] = beta[8 * (end) + i]; + } + + for (k = end - 1; k >= 0; k--) { + x = input[k]; + if (app && k new[i]) + new[i] = m_b[i]; + old[i] = new[i]; + beta[8 * k + i] = old[i]; + } + + if ((k%4)==0 && k < long_cb) { + for (i = 1; i < 8; i++) { + old[i] -= old[0]; + } + old[0] = 0; + } + } +} + +static void map_gen_alpha(tdec_gen_t *s, int16_t *input, int16_t *app, int16_t *parity, int16_t *output, uint32_t long_cb) +{ + int16_t m_b[8], new[8], old[8], max1[8], max0[8]; + int16_t m1, m0; + int16_t x, y, xy; + int16_t out; + uint32_t k; + uint32_t end = long_cb; + int16_t *beta = s->beta; + uint32_t i; + + old[0] = 0; + for (i = 1; i < 8; i++) { + old[i] = -INF; + } + +#if debug_enabled + int16_t alpha[8]; +#endif + + for (k = 1; k < end + 1; k++) { + x = input[k - 1]; + if (app) { + x += app[k - 1]; + } + y = parity[k - 1]; + + xy = x + y; + +#if debug_enabled + memcpy(alpha, old, sizeof(int16_t)*8); +#endif + + m_b[0] = old[0]; + m_b[1] = old[3] + y; + m_b[2] = old[4] + y; + m_b[3] = old[7]; + m_b[4] = old[1]; + m_b[5] = old[2] + y; + m_b[6] = old[5] + y; + m_b[7] = old[6]; + + new[0] = old[1] + xy; + new[1] = old[2] + x; + new[2] = old[5] + x; + new[3] = old[6] + xy; + new[4] = old[0] + xy; + new[5] = old[3] + x; + new[6] = old[4] + x; + new[7] = old[7] + xy; + + for (i = 0; i < 8; i++) { + max0[i] = m_b[i] + beta[8 * k + i]; + max1[i] = new[i] + beta[8 * k + i]; + } + + m1 = max1[0]; + m0 = max0[0]; + + for (i = 1; i < 8; i++) { + if (max1[i] > m1) + m1 = max1[i]; + if (max0[i] > m0) + m0 = max0[i]; + } + for (i = 0; i < 8; i++) { + if (m_b[i] > new[i]) + new[i] = m_b[i]; + old[i] = new[i]; } + + if ((k%4)==0) { + for (i = 1; i < 8; i++) { + old[i] -= old[0]; + } + old[0] = 0; + } + + out = m1 - m0; + output[k - 1] = out; + + debug_state; + + } +} + +int tdec_gen_init(void **hh, uint32_t max_long_cb) +{ + *hh = calloc(1, sizeof(tdec_gen_t)); + + tdec_gen_t *h = (tdec_gen_t*) *hh; + + h->beta = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES); + if (!h->beta) { + perror("srslte_vec_malloc"); + return -1; + } + h->max_long_cb = max_long_cb; + return 1; +} + +void tdec_gen_free(void *hh) +{ + tdec_gen_t *h = (tdec_gen_t*) hh; + if (h) { + if (h->beta) { + free(h->beta); + } + free(h); + } +} + +void tdec_gen_dec(void *hh, int16_t *input, int16_t *app, int16_t *parity, int16_t *output, uint32_t long_cb) +{ + tdec_gen_t *h = (tdec_gen_t*) hh; + + h->beta[(long_cb + TAIL) * NUMSTATES] = 0; + for (uint32_t k = 1; k < NUMSTATES; k++) + h->beta[(long_cb + TAIL) * NUMSTATES + k] = -INF; + + map_gen_beta(h, input, app, parity, long_cb); + map_gen_alpha(h, input, app, parity, output, long_cb); +} + +void tdec_gen_extract_input(int16_t *input, int16_t *syst, int16_t *app2, int16_t *parity0, int16_t *parity1, uint32_t long_cb) +{ + // Prepare systematic and parity bits for MAP DEC #1 + for (uint32_t i = 0; i < long_cb; i++) { + syst[i] = input[SRSLTE_TCOD_RATE * i]; + parity0[i] = input[SRSLTE_TCOD_RATE * i + 1]; + parity1[i] = input[SRSLTE_TCOD_RATE * i + 2]; + } + for (uint32_t i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) { + syst[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb)]; + parity0[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb) + 1]; + + app2[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE + NINPUTS * (i - long_cb)]; + parity1[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE + + NINPUTS * (i - long_cb) + 1]; + } + +} + +void tdec_gen_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb) +{ + uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1}; + + // long_cb is always byte aligned + for (uint32_t i = 0; i < long_cb/8; i++) { + uint8_t out0 = app1[8*i+0]>0?mask[0]:0; + uint8_t out1 = app1[8*i+1]>0?mask[1]:0; + uint8_t out2 = app1[8*i+2]>0?mask[2]:0; + uint8_t out3 = app1[8*i+3]>0?mask[3]:0; + uint8_t out4 = app1[8*i+4]>0?mask[4]:0; + uint8_t out5 = app1[8*i+5]>0?mask[5]:0; + uint8_t out6 = app1[8*i+6]>0?mask[6]:0; + uint8_t out7 = app1[8*i+7]>0?mask[7]:0; + + output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7; + } +} + diff --git a/lib/src/phy/fec/turbodecoder_simd.c b/lib/src/phy/fec/turbodecoder_simd.c deleted file mode 100644 index f9bfbbd44..000000000 --- a/lib/src/phy/fec/turbodecoder_simd.c +++ /dev/null @@ -1,542 +0,0 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -#include -#include -#include -#include -#include -#include - -#include "srslte/phy/fec/turbodecoder_simd.h" -#include "srslte/phy/utils/vector.h" - -#include - -#define NUMSTATES 8 -#define NINPUTS 2 -#define TAIL 3 -#define TOTALTAIL 12 - -#define INF 10000 -#define ZERO 0 - - -#ifdef LV_HAVE_SSE -#include - -// Define SSE/AVX implementations -void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb); -void map_sse_alpha(map_gen_t * s, uint32_t long_cb); -void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb); - -#ifdef LV_HAVE_AVX2 -void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb); -void map_avx_alpha(map_gen_t * s, uint32_t long_cb); -void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb); -#endif - - -void map_simd_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb) -{ - if (nof_cb == 1) { - map_sse_beta(s, output[0], long_cb); - } -#ifdef LV_HAVE_AVX2 - else if (nof_cb == 2) { - map_avx_beta(s, output, long_cb); - } -#endif -} - -void map_simd_alpha(map_gen_t * s, uint32_t nof_cb, uint32_t long_cb) -{ - if (nof_cb == 1) { - map_sse_alpha(s, long_cb); - } -#ifdef LV_HAVE_AVX2 - else if (nof_cb == 2) { - map_avx_alpha(s, long_cb); - } -#endif -} -void map_simd_gamma(map_gen_t * s, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t nof_cb, uint32_t long_cb) -{ - if (nof_cb == 1) { - map_sse_gamma(s, input, app, parity, long_cb); - } -#ifdef LV_HAVE_AVX2 - else if (nof_cb == 2) { - map_avx_gamma(s, input, app, parity, cbidx, long_cb); - } -#endif -} - -/* Inititalizes constituent decoder object */ -int map_simd_init(map_gen_t * h, uint32_t max_par_cb, uint32_t max_long_cb) -{ - bzero(h, sizeof(map_gen_t)); - - h->max_par_cb = max_par_cb; - h->max_long_cb = max_long_cb; - - h->alpha = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES * h->max_par_cb); - if (!h->alpha) { - perror("srslte_vec_malloc"); - return -1; - } - h->branch = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES * h->max_par_cb); - if (!h->branch) { - perror("srslte_vec_malloc"); - return -1; - } - return 0; -} - -void map_simd_free(map_gen_t * h) -{ - if (h->alpha) { - free(h->alpha); - } - if (h->branch) { - free(h->branch); - } - bzero(h, sizeof(map_gen_t)); -} - -/* Runs one instance of a decoder */ -void map_simd_dec(map_gen_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], int16_t *app[SRSLTE_TDEC_MAX_NPAR], int16_t * parity[SRSLTE_TDEC_MAX_NPAR], - int16_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t cb_mask, uint32_t long_cb) -{ - - uint32_t nof_cb = 1; - int16_t *outptr[SRSLTE_TDEC_MAX_NPAR] = { NULL, NULL }; - - // Compute branch metrics - switch(cb_mask) { - case 1: - nof_cb = 1; - outptr[0] = output[0]; - map_simd_gamma(h, input[0], app?app[0]:NULL, parity[0], 0, 1, long_cb); - break; - case 2: - nof_cb = 1; - outptr[0] = output[1]; - map_simd_gamma(h, input[1], app?app[1]:NULL, parity[1], 0, 1, long_cb); - break; - case 3: - nof_cb = 2; - for (int i=0;i<2;i++) { - outptr[i] = output[i]; - map_simd_gamma(h, input[i], app?app[i]:NULL, parity[i], i, 2, long_cb); - } - break; - } - - // Forward recursion - map_simd_alpha(h, nof_cb, long_cb); - - // Backwards recursion + LLR computation - map_simd_beta(h, outptr, nof_cb, long_cb); -} - -/* Initializes the turbo decoder object */ -int srslte_tdec_simd_init(srslte_tdec_simd_t * h, uint32_t max_par_cb, uint32_t max_long_cb) -{ - int ret = -1; - bzero(h, sizeof(srslte_tdec_simd_t)); - uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL; - - h->max_long_cb = max_long_cb; - h->max_par_cb = max_par_cb; - - for (int i=0;imax_par_cb;i++) { - h->app1[i] = srslte_vec_malloc(sizeof(int16_t) * len); - if (!h->app1[i]) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->app2[i] = srslte_vec_malloc(sizeof(int16_t) * len); - if (!h->app2[i]) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->ext1[i] = srslte_vec_malloc(sizeof(int16_t) * len); - if (!h->ext1[i]) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->ext2[i] = srslte_vec_malloc(sizeof(int16_t) * len); - if (!h->ext2[i]) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->syst[i] = srslte_vec_malloc(sizeof(int16_t) * len); - if (!h->syst[i]) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->parity0[i] = srslte_vec_malloc(sizeof(int16_t) * len); - if (!h->parity0[i]) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->parity1[i] = srslte_vec_malloc(sizeof(int16_t) * len); - if (!h->parity1[i]) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - - } - - if (map_simd_init(&h->dec, h->max_par_cb, h->max_long_cb)) { - goto clean_and_exit; - } - - for (int i=0;iinterleaver[i], srslte_cbsegm_cbsize(i)) < 0) { - goto clean_and_exit; - } - srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i)); - } - h->current_cbidx = -1; - h->cb_mask = 0; - ret = 0; -clean_and_exit:if (ret == -1) { - srslte_tdec_simd_free(h); - } - return ret; -} - -void srslte_tdec_simd_free(srslte_tdec_simd_t * h) -{ - for (int i=0;imax_par_cb;i++) { - if (h->app1[i]) { - free(h->app1[i]); - } - if (h->app2[i]) { - free(h->app2[i]); - } - if (h->ext1[i]) { - free(h->ext1[i]); - } - if (h->ext2[i]) { - free(h->ext2[i]); - } - if (h->syst[i]) { - free(h->syst[i]); - } - if (h->parity0[i]) { - free(h->parity0[i]); - } - if (h->parity1[i]) { - free(h->parity1[i]); - } - } - - map_simd_free(&h->dec); - - for (int i=0;iinterleaver[i]); - } - - bzero(h, sizeof(srslte_tdec_simd_t)); -} - -/* Deinterleaves the 3 streams from the input (systematic and 2 parity bits) into - * 3 buffers ready to be used by compute_gamma() - */ -void deinterleave_input_simd(srslte_tdec_simd_t *h, int16_t *input, uint32_t cbidx, uint32_t long_cb) { - uint32_t i; - - __m128i *inputPtr = (__m128i*) input; - __m128i in0, in1, in2; - __m128i s0, s1, s2, s; - __m128i p00, p01, p02, p0; - __m128i p10, p11, p12, p1; - - __m128i *sysPtr = (__m128i*) h->syst[cbidx]; - __m128i *pa0Ptr = (__m128i*) h->parity0[cbidx]; - __m128i *pa1Ptr = (__m128i*) h->parity1[cbidx]; - - // pick bits 0, 3, 6 from 1st word - __m128i s0_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0); - // pick bits 1, 4, 7 from 2st word - __m128i s1_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff); - // pick bits 2, 5 from 3rd word - __m128i s2_mask = _mm_set_epi8(11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - // pick bits 1, 4, 7 from 1st word - __m128i p00_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,15,14,9,8,3,2); - // pick bits 2, 5, from 2st word - __m128i p01_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff); - // pick bits 0, 3, 6 from 3rd word - __m128i p02_mask = _mm_set_epi8(13,12,7,6,1,0,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - // pick bits 2, 5 from 1st word - __m128i p10_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4); - // pick bits 0, 3, 6, from 2st word - __m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff); - // pick bits 1, 4, 7 from 3rd word - __m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - // Split systematic and parity bits - for (i = 0; i < long_cb/8; i++) { - - in0 = _mm_load_si128(inputPtr); inputPtr++; - in1 = _mm_load_si128(inputPtr); inputPtr++; - in2 = _mm_load_si128(inputPtr); inputPtr++; - - /* Deinterleave Systematic bits */ - s0 = _mm_shuffle_epi8(in0, s0_mask); - s1 = _mm_shuffle_epi8(in1, s1_mask); - s2 = _mm_shuffle_epi8(in2, s2_mask); - s = _mm_or_si128(s0, s1); - s = _mm_or_si128(s, s2); - - _mm_store_si128(sysPtr, s); - sysPtr++; - - /* Deinterleave parity 0 bits */ - p00 = _mm_shuffle_epi8(in0, p00_mask); - p01 = _mm_shuffle_epi8(in1, p01_mask); - p02 = _mm_shuffle_epi8(in2, p02_mask); - p0 = _mm_or_si128(p00, p01); - p0 = _mm_or_si128(p0, p02); - - _mm_store_si128(pa0Ptr, p0); - pa0Ptr++; - - /* Deinterleave parity 1 bits */ - p10 = _mm_shuffle_epi8(in0, p10_mask); - p11 = _mm_shuffle_epi8(in1, p11_mask); - p12 = _mm_shuffle_epi8(in2, p12_mask); - p1 = _mm_or_si128(p10, p11); - p1 = _mm_or_si128(p1, p12); - - _mm_store_si128(pa1Ptr, p1); - pa1Ptr++; - - } - - for (i = 0; i < 3; i++) { - h->syst[cbidx][i+long_cb] = input[3*long_cb + 2*i]; - h->parity0[cbidx][i+long_cb] = input[3*long_cb + 2*i + 1]; - } - for (i = 0; i < 3; i++) { - h->app2[cbidx][i+long_cb] = input[3*long_cb + 6 + 2*i]; - h->parity1[cbidx][i+long_cb] = input[3*long_cb + 6 + 2*i + 1]; - } - -} - -/* Runs 1 turbo decoder iteration */ -void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) -{ - - int16_t *tmp_app[SRSLTE_TDEC_MAX_NPAR]; - - if (h->current_cbidx >= 0) { - uint16_t *inter = h->interleaver[h->current_cbidx].forward; - uint16_t *deinter = h->interleaver[h->current_cbidx].reverse; - -#ifndef LV_HAVE_AVX2 - input[1] = NULL; -#endif - - h->cb_mask = (input[0]?1:0) | (input[1]?2:0); - - for (int i=0;imax_par_cb;i++) { - if (h->n_iter[i] == 0 && input[i]) { - //printf("deinterleaveing %d\n",i); - deinterleave_input_simd(h, input[i], i, long_cb); - } - } - - // Add apriori information to decoder 1 - for (int i=0;imax_par_cb;i++) { - if (h->n_iter[i] > 0 && input[i]) { - srslte_vec_sub_sss(h->app1[i], h->ext1[i], h->app1[i], long_cb); - } - } - - // Run MAP DEC #1 - for (int i=0;imax_par_cb;i++) { - if (input[i]) { - tmp_app[i] = h->n_iter[i]?h->app1[i]:NULL; - } else { - tmp_app[i] = NULL; - } - } - map_simd_dec(&h->dec, h->syst, tmp_app, h->parity0, h->ext1, h->cb_mask, long_cb); - - // Convert aposteriori information into extrinsic information - for (int i=0;imax_par_cb;i++) { - if (h->n_iter[i] > 0 && input[i]) { - srslte_vec_sub_sss(h->ext1[i], h->app1[i], h->ext1[i], long_cb); - } - } - - // Interleave extrinsic output of DEC1 to form apriori info for decoder 2 - for (int i=0;imax_par_cb;i++) { - if (input[i]) { - srslte_vec_lut_sss(h->ext1[i], deinter, h->app2[i], long_cb); - } - } - - // Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits - map_simd_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, h->cb_mask, long_cb); - - // Deinterleaved extrinsic bits become apriori info for decoder 1 - for (int i=0;imax_par_cb;i++) { - if (input[i]) { - srslte_vec_lut_sss(h->ext2[i], inter, h->app1[i], long_cb); - } - } - - for (int i=0;imax_par_cb;i++) { - if (input[i]) { - h->n_iter[i]++; - } - } - } else { - fprintf(stderr, "Error CB index not set (call srslte_tdec_simd_reset() first\n"); - } -} - -/* Resets the decoder and sets the codeblock length */ -int srslte_tdec_simd_reset(srslte_tdec_simd_t * h, uint32_t long_cb) -{ - if (long_cb > h->max_long_cb) { - fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n", - h->max_long_cb); - return -1; - } - for (int i=0;imax_par_cb;i++) { - h->n_iter[i] = 0; - } - h->cb_mask = 0; - h->current_cbidx = srslte_cbsegm_cbindex(long_cb); - if (h->current_cbidx < 0) { - fprintf(stderr, "Invalid CB length %d\n", long_cb); - return -1; - } - return 0; -} - -int srslte_tdec_simd_reset_cb(srslte_tdec_simd_t * h, uint32_t cb_idx) -{ - h->n_iter[cb_idx] = 0; - return 0; -} - -int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h, uint32_t cb_idx) -{ - return h->n_iter[cb_idx]; -} - -void tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx, uint32_t long_cb) -{ - __m128i zero = _mm_set1_epi16(0); - __m128i lsb_mask = _mm_set1_epi16(1); - - __m128i *appPtr = (__m128i*) h->app1[cbidx]; - __m128i *outPtr = (__m128i*) output; - __m128i ap, out, out0, out1; - - for (uint32_t i = 0; i < long_cb/16; i++) { - ap = _mm_load_si128(appPtr); appPtr++; - out0 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask); - ap = _mm_load_si128(appPtr); appPtr++; - out1 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask); - - out = _mm_packs_epi16(out0, out1); - _mm_store_si128(outPtr, out); - outPtr++; - } - if (long_cb%16) { - for (int i=0;i<8;i++) { - output[long_cb-8+i] = h->app1[cbidx][long_cb-8+i]>0?1:0; - } - } -} - -void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) -{ - for (int i=0;imax_par_cb;i++) { - tdec_simd_decision(h, output[i], i, long_cb); - } -} - -void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx, uint32_t long_cb) -{ - uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1}; - - // long_cb is always byte aligned - for (uint32_t i = 0; i < long_cb/8; i++) { - uint8_t out0 = h->app1[cbidx][8*i+0]>0?mask[0]:0; - uint8_t out1 = h->app1[cbidx][8*i+1]>0?mask[1]:0; - uint8_t out2 = h->app1[cbidx][8*i+2]>0?mask[2]:0; - uint8_t out3 = h->app1[cbidx][8*i+3]>0?mask[3]:0; - uint8_t out4 = h->app1[cbidx][8*i+4]>0?mask[4]:0; - uint8_t out5 = h->app1[cbidx][8*i+5]>0?mask[5]:0; - uint8_t out6 = h->app1[cbidx][8*i+6]>0?mask[6]:0; - uint8_t out7 = h->app1[cbidx][8*i+7]>0?mask[7]:0; - - output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7; - } -} - -void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) -{ - for (int i=0;imax_par_cb;i++) { - if (output[i]) { - srslte_tdec_simd_decision_byte_cb(h, output[i], i, long_cb); - } - } -} - - -/* Runs nof_iterations iterations and decides the output bits */ -int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR], - uint32_t nof_iterations, uint32_t long_cb) -{ - if (srslte_tdec_simd_reset(h, long_cb)) { - return SRSLTE_ERROR; - } - - do { - srslte_tdec_simd_iteration(h, input, long_cb); - } while (h->n_iter[0] < nof_iterations); - - srslte_tdec_simd_decision_byte(h, output, long_cb); - - return SRSLTE_SUCCESS; -} - -#endif - - diff --git a/lib/src/phy/fec/turbodecoder_simd_inter.c b/lib/src/phy/fec/turbodecoder_simd_inter.c deleted file mode 100644 index 3c04e2136..000000000 --- a/lib/src/phy/fec/turbodecoder_simd_inter.c +++ /dev/null @@ -1,299 +0,0 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -#include -#include -#include -#include -#include -#include - -#include "srslte/phy/fec/turbodecoder_simd_inter.h" -#include "srslte/phy/utils/vector.h" - -#define TOTALTAIL 12 - -#ifdef LV_HAVE_SSE -#include - -void map_see_inter_alpha(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, uint32_t long_cb); -void map_sse_inter_beta(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, int16_t * output, uint32_t long_cb); -void sse_inter_update_w(srslte_tdec_simd_inter_t *h, uint16_t *deinter, uint32_t long_cb); -void sse_inter_extract_syst1(srslte_tdec_simd_inter_t *h, uint16_t *inter, uint32_t long_cb); - - -static void map_sse_inter_dec(srslte_tdec_simd_inter_t * h, int16_t * input, int16_t * parity, int16_t * output, - uint32_t long_cb) -{ - map_see_inter_alpha(h, input, parity, long_cb); - map_sse_inter_beta(h, input, parity, output, long_cb); -} - -/************************************************ - * - * TURBO DECODER INTERFACE - * - ************************************************/ -int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h, uint32_t max_par_cb, uint32_t max_long_cb) -{ - int ret = -1; - bzero(h, sizeof(srslte_tdec_simd_inter_t)); - uint32_t len = max_long_cb + 12; - - h->max_long_cb = max_long_cb; - h->max_par_cb = max_par_cb; - - h->llr1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb); - if (!h->llr1) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->llr2 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb); - if (!h->llr2) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->w = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb); - if (!h->w) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->syst0 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb); - if (!h->syst0) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->syst1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb); - if (!h->syst1) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb); - if (!h->parity0) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb); - if (!h->parity1) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - h->alpha = srslte_vec_malloc(sizeof(int16_t) * 8*(len+12) * h->max_par_cb); - if (!h->alpha) { - perror("srslte_vec_malloc"); - goto clean_and_exit; - } - - for (int i=0;iinterleaver[i], srslte_cbsegm_cbsize(i)) < 0) { - goto clean_and_exit; - } - srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i)); - } - h->current_cbidx = -1; - ret = 0; -clean_and_exit:if (ret == -1) { - srslte_tdec_simd_inter_free(h); - } - return ret; -} - -void srslte_tdec_simd_inter_free(srslte_tdec_simd_inter_t * h) -{ - if (h->llr1) { - free(h->llr1); - } - if (h->llr2) { - free(h->llr2); - } - if (h->w) { - free(h->w); - } - if (h->syst0) { - free(h->syst0); - } - if (h->syst1) { - free(h->syst1); - } - if (h->parity0) { - free(h->parity0); - } - if (h->parity1) { - free(h->parity1); - } - if (h->alpha) { - free(h->alpha); - } - - for (int i=0;iinterleaver[i]); - } - - bzero(h, sizeof(srslte_tdec_simd_inter_t)); -} - - -/* Deinterleave for inter-frame parallelization */ -void extract_input(srslte_tdec_simd_inter_t *h, int16_t *input, uint32_t cbidx, uint32_t long_cb) -{ - for (int i=0;isyst0[h->max_par_cb*i+cbidx] = input[3*i+0]; - h->parity0[h->max_par_cb*i+cbidx] = input[3*i+1]; - h->parity1[h->max_par_cb*i+cbidx] = input[3*i+2]; - } - for (int i = long_cb; i < long_cb + 3; i++) { - h->syst0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb)]; - h->syst1[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb)]; - h->parity0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb) + 1]; - h->parity0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb) + 2]; - } -} - -void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h, int16_t *input[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb) -{ - - if (h->current_cbidx >= 0) { - - uint16_t *inter = h->interleaver[h->current_cbidx].forward; - uint16_t *deinter = h->interleaver[h->current_cbidx].reverse; - - // Prepare systematic and parity bits for MAP DEC #1 - for (int i=0;in_iter[i] == 0) { - extract_input(h, input[i], i, long_cb); - } - srslte_vec_sum_sss(h->syst0, h->w, h->syst0, long_cb*h->max_par_cb); - } - - // Run MAP DEC #1 - map_sse_inter_dec(h, h->syst0, h->parity0, h->llr1, long_cb); - - // Prepare systematic and parity bits for MAP DEC #1 - sse_inter_extract_syst1(h, inter, long_cb); - - // Run MAP DEC #2 - map_sse_inter_dec(h, h->syst1, h->parity1, h->llr2, long_cb); - - // Update a-priori LLR from the last iteration - sse_inter_update_w(h, deinter, long_cb); - - } else { - fprintf(stderr, "Error CB index not set (call srslte_tdec_simd_inter_reset() first\n"); - } -} - -int srslte_tdec_simd_inter_reset_cb(srslte_tdec_simd_inter_t * h, uint32_t cb_idx) -{ - for (int i=0;icurrent_long_cb;i++) { - h->w[h->max_par_cb*i+cb_idx] = 0; - } - return 0; -} - -int srslte_tdec_simd_inter_reset(srslte_tdec_simd_inter_t * h, uint32_t long_cb) -{ - if (long_cb > h->max_long_cb) { - fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n", - h->max_long_cb); - return -1; - } - h->current_long_cb = long_cb; - h->current_cbidx = srslte_cbsegm_cbindex(long_cb); - if (h->current_cbidx < 0) { - fprintf(stderr, "Invalid CB length %d\n", long_cb); - return -1; - } - memset(h->w, 0, sizeof(int16_t) * long_cb * h->max_par_cb); - return 0; -} - -void srslte_tdec_simd_inter_decision_cb(srslte_tdec_simd_inter_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb) -{ - uint16_t *deinter = h->interleaver[h->current_cbidx].reverse; - uint32_t i; - for (i = 0; i < long_cb; i++) { - output[i] = (h->llr2[h->max_par_cb*deinter[i]+cb_idx] > 0) ? 1 : 0; - } -} - -void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb) -{ - for (int i=0;iinterleaver[h->current_cbidx].reverse; - -#define indexOf_cb(idx, cb) (h->max_par_cb*(deinter[8*i+idx])+cb) - - // long_cb is always byte aligned - for (i = 0; i < long_cb/8; i++) { - uint8_t out0 = h->llr2[indexOf_cb(0, cb_idx)]>0?mask[0]:0; - uint8_t out1 = h->llr2[indexOf_cb(1, cb_idx)]>0?mask[1]:0; - uint8_t out2 = h->llr2[indexOf_cb(2, cb_idx)]>0?mask[2]:0; - uint8_t out3 = h->llr2[indexOf_cb(3, cb_idx)]>0?mask[3]:0; - uint8_t out4 = h->llr2[indexOf_cb(4, cb_idx)]>0?mask[4]:0; - uint8_t out5 = h->llr2[indexOf_cb(5, cb_idx)]>0?mask[5]:0; - uint8_t out6 = h->llr2[indexOf_cb(6, cb_idx)]>0?mask[6]:0; - uint8_t out7 = h->llr2[indexOf_cb(7, cb_idx)]>0?mask[7]:0; - - output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7; - } -} - -void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb) -{ - for (int i=0;i -#include -#include -#include -#include -#include - -#include "srslte/phy/fec/turbodecoder_simd.h" -#include "srslte/phy/utils/vector.h" - -#include - -#ifdef LV_HAVE_SSE -#include -#endif - - -#define NUMSTATES 8 -#define NINPUTS 2 -#define TAIL 3 -#define TOTALTAIL 12 - -#define INF 10000 -#define ZERO 0 - - -#ifdef LV_HAVE_SSE - -/* -static void print_128i(__m128i x) { - int16_t *s = (int16_t*) &x; - printf("[%d", s[0]); - for (int i=1;i<8;i++) { - printf(",%d", s[i]); - } - printf("]\n"); -} -*/ -//#define use_beta_transposed_max - -#ifndef use_beta_transposed_max - -/* Computes the horizontal MAX from 8 16-bit integers using the minpos_epu16 SSE4.1 instruction */ -static inline int16_t hMax(__m128i buffer) -{ - __m128i tmp1 = _mm_sub_epi16(_mm_set1_epi16(0x7FFF), buffer); - __m128i tmp3 = _mm_minpos_epu16(tmp1); - return (int16_t)(_mm_cvtsi128_si32(tmp3)); -} - -/* Computes beta values */ -void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb) -{ - int k; - uint32_t end = long_cb + 3; - const __m128i *alphaPtr = (const __m128i*) s->alpha; - - __m128i beta_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0); - __m128i g, bp, bn, alpha_k; - - /* Define the shuffle constant for the positive beta */ - __m128i shuf_bp = _mm_set_epi8( - 15, 14, // 7 - 7, 6, // 3 - 5, 4, // 2 - 13, 12, // 6 - 11, 10, // 5 - 3, 2, // 1 - 1, 0, // 0 - 9, 8 // 4 - ); - - /* Define the shuffle constant for the negative beta */ - __m128i shuf_bn = _mm_set_epi8( - 7, 6, // 3 - 15, 14, // 7 - 13, 12, // 6 - 5, 4, // 2 - 3, 2, // 1 - 11, 10, // 5 - 9, 8, // 4 - 1, 0 // 0 - ); - - alphaPtr += long_cb-1; - - /* Define shuffle for branch costs */ - __m128i shuf_g[4]; - shuf_g[3] = _mm_set_epi8(3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2); - shuf_g[2] = _mm_set_epi8(7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6); - shuf_g[1] = _mm_set_epi8(11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10); - shuf_g[0] = _mm_set_epi8(15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14); - __m128i gv; - int16_t *b = &s->branch[2*long_cb-8]; - __m128i *gPtr = (__m128i*) b; - /* Define shuffle for beta normalization */ - __m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0); - - /* This defines a beta computation step: - * Adds and substracts the branch metrics to the previous beta step, - * shuffles the states according to the trellis path and selects maximum state - */ -#define BETA_STEP(g) bp = _mm_add_epi16(beta_k, g);\ - bn = _mm_sub_epi16(beta_k, g);\ - bp = _mm_shuffle_epi8(bp, shuf_bp);\ - bn = _mm_shuffle_epi8(bn, shuf_bn);\ - beta_k = _mm_max_epi16(bp, bn); - - /* Loads the alpha metrics from memory and adds them to the temporal bn and bp - * metrics. Then computes horizontal maximum of both metrics and computes difference - */ -#define BETA_STEP_CNT(c,d) g = _mm_shuffle_epi8(gv, shuf_g[c]);\ - BETA_STEP(g)\ - alpha_k = _mm_load_si128(alphaPtr);\ - alphaPtr--;\ - bp = _mm_add_epi16(bp, alpha_k);\ - bn = _mm_add_epi16(bn, alpha_k);\ - output[k-d] = hMax(bn)-hMax(bp); - - /* The tail does not require to load alpha or produce outputs. Only update - * beta metrics accordingly */ - for (k=end-1; k>=long_cb; k--) { - int16_t g0 = s->branch[2*k]; - int16_t g1 = s->branch[2*k+1]; - g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1); - BETA_STEP(g); - } - - /* We inline 2 trelis steps for each normalization */ - __m128i norm; - for (; k >= 0; k-=8) { - gv = _mm_load_si128(gPtr); - gPtr--; - - BETA_STEP_CNT(0,0); - BETA_STEP_CNT(1,1); - BETA_STEP_CNT(2,2); - BETA_STEP_CNT(3,3); - norm = _mm_shuffle_epi8(beta_k, shuf_norm); - beta_k = _mm_sub_epi16(beta_k, norm); - gv = _mm_load_si128(gPtr); - gPtr--; - BETA_STEP_CNT(0,4); - BETA_STEP_CNT(1,5); - BETA_STEP_CNT(2,6); - BETA_STEP_CNT(3,7); - - norm = _mm_shuffle_epi8(beta_k, shuf_norm); - beta_k = _mm_sub_epi16(beta_k, norm); - } -} - -#endif - -/* Computes alpha metrics */ -void map_sse_alpha(map_gen_t * s, uint32_t long_cb) -{ - uint32_t k; - int16_t *alpha = s->alpha; - uint32_t i; - - alpha[0] = 0; - for (i = 1; i < 8; i++) { - alpha[i] = -INF; - } - - /* Define the shuffle constant for the positive alpha */ - __m128i shuf_ap = _mm_set_epi8( - 15, 14, // 7 - 9, 8, // 4 - 7, 6, // 3 - 1, 0, // 0 - 13, 12, // 6 - 11, 10, // 5 - 5, 4, // 2 - 3, 2 // 1 - ); - - /* Define the shuffle constant for the negative alpha */ - __m128i shuf_an = _mm_set_epi8( - 13, 12, // 6 - 11, 10, // 5 - 5, 4, // 2 - 3, 2, // 1 - 15, 14, // 7 - 9, 8, // 4 - 7, 6, // 3 - 1, 0 // 0 - ); - - /* Define shuffle for branch costs */ - __m128i shuf_g[4]; - shuf_g[0] = _mm_set_epi8(3,2,3,2,1,0,1,0,1,0,1,0,3,2,3,2); - shuf_g[1] = _mm_set_epi8(7,6,7,6,5,4,5,4,5,4,5,4,7,6,7,6); - shuf_g[2] = _mm_set_epi8(11,10,11,10,9,8,9,8,9,8,9,8,11,10,11,10); - shuf_g[3] = _mm_set_epi8(15,14,15,14,13,12,13,12,13,12,13,12,15,14,15,14); - - __m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0); - - __m128i* alphaPtr = (__m128i*) alpha; - alphaPtr++; - - __m128i gv; - __m128i *gPtr = (__m128i*) s->branch; - __m128i g, ap, an; - - __m128i alpha_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0); - - /* This defines a alpha computation step: - * Adds and substracts the branch metrics to the previous alpha step, - * shuffles the states according to the trellis path and selects maximum state - */ -#define ALPHA_STEP(c) g = _mm_shuffle_epi8(gv, shuf_g[c]); \ - ap = _mm_add_epi16(alpha_k, g);\ - an = _mm_sub_epi16(alpha_k, g);\ - ap = _mm_shuffle_epi8(ap, shuf_ap);\ - an = _mm_shuffle_epi8(an, shuf_an);\ - alpha_k = _mm_max_epi16(ap, an);\ - _mm_store_si128(alphaPtr, alpha_k);\ - alphaPtr++; \ - - /* In this loop, we compute 8 steps and normalize twice for each branch metrics memory load */ - __m128i norm; - for (k = 0; k < long_cb/8; k++) { - gv = _mm_load_si128(gPtr); - gPtr++; - ALPHA_STEP(0); - ALPHA_STEP(1); - ALPHA_STEP(2); - ALPHA_STEP(3); - norm = _mm_shuffle_epi8(alpha_k, shuf_norm); - alpha_k = _mm_sub_epi16(alpha_k, norm); - gv = _mm_load_si128(gPtr); - gPtr++; - ALPHA_STEP(0); - ALPHA_STEP(1); - ALPHA_STEP(2); - ALPHA_STEP(3); - norm = _mm_shuffle_epi8(alpha_k, shuf_norm); - alpha_k = _mm_sub_epi16(alpha_k, norm); - } -} - -/* Compute branch metrics (gamma) */ -void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb) -{ - __m128i res00, res10, res01, res11, res0, res1; - __m128i in, ap, pa, g1, g0; - - __m128i *inPtr = (__m128i*) input; - __m128i *appPtr = (__m128i*) app; - __m128i *paPtr = (__m128i*) parity; - __m128i *resPtr = (__m128i*) h->branch; - - __m128i res00_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0); - __m128i res10_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8); - __m128i res01_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff); - __m128i res11_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff); - - for (int i=0;ibranch[2*i] = (input[i] - parity[i])/2; - h->branch[2*i+1] = (input[i] + parity[i])/2; - } -} - - - - - - -/*********************** - * - * This is an attempt to parallelize the horizontal max - * by doing a 8x8 tranpose of the vectors and computing max - * in cascade. However since we need to store 16 registers - * for the positive and negative values the performance is not very good - */ - - -#ifdef use_beta_transposed_max - -static inline __m128i transposed_max(__m128i a, __m128i b, __m128i c, __m128i d, - __m128i e, __m128i f, __m128i g, __m128i h) -{ - // Transpose 8 vectors - __m128i t0 = _mm_unpacklo_epi16(a, b); - __m128i t1 = _mm_unpacklo_epi16(c, d); - __m128i t2 = _mm_unpacklo_epi16(e, f); - __m128i t3 = _mm_unpacklo_epi16(g, h); - __m128i t4 = _mm_unpackhi_epi16(a, b); - __m128i t5 = _mm_unpackhi_epi16(c, d); - __m128i t6 = _mm_unpackhi_epi16(e, f); - __m128i t7 = _mm_unpackhi_epi16(g, h); - - __m128i s0 = _mm_unpacklo_epi32(t0, t1); - __m128i s1 = _mm_unpackhi_epi32(t0, t1); - __m128i s2 = _mm_unpacklo_epi32(t2, t3); - __m128i s3 = _mm_unpackhi_epi32(t2, t3); - __m128i s4 = _mm_unpacklo_epi32(t4, t5); - __m128i s5 = _mm_unpackhi_epi32(t4, t5); - __m128i s6 = _mm_unpacklo_epi32(t6, t7); - __m128i s7 = _mm_unpackhi_epi32(t6, t7); - - __m128i x0 = _mm_unpacklo_epi64(s0, s2); - __m128i x1 = _mm_unpackhi_epi64(s0, s2); - __m128i x2 = _mm_unpacklo_epi64(s1, s3); - __m128i x3 = _mm_unpackhi_epi64(s1, s3); - __m128i x4 = _mm_unpacklo_epi64(s4, s6); - __m128i x5 = _mm_unpackhi_epi64(s4, s6); - __m128i x6 = _mm_unpacklo_epi64(s5, s7); - __m128i x7 = _mm_unpackhi_epi64(s5, s7); - - // Cascade max on the transposed vector - __m128i res = _mm_max_epi16(x0, - _mm_max_epi16(x1, - _mm_max_epi16(x2, - _mm_max_epi16(x3, - _mm_max_epi16(x4, - _mm_max_epi16(x5, - _mm_max_epi16(x6, - x7))))))); - - return res; -} - -void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb) -{ - int k; - uint32_t end = long_cb + 3; - const __m128i *alphaPtr = (const __m128i*) s->alpha; - - __m128i beta_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0); - __m128i g, alpha_k; - __m128i bn, bn_0, bn_1, bn_2, bn_3, bn_4, bn_5, bn_6, bn_7; - __m128i bp, bp_0, bp_1, bp_2, bp_3, bp_4, bp_5, bp_6, bp_7; - - /* Define the shuffle constant for the positive beta */ - __m128i shuf_bp = _mm_set_epi8( - 15, 14, // 7 - 7, 6, // 3 - 5, 4, // 2 - 13, 12, // 6 - 11, 10, // 5 - 3, 2, // 1 - 1, 0, // 0 - 9, 8 // 4 - ); - - /* Define the shuffle constant for the negative beta */ - __m128i shuf_bn = _mm_set_epi8( - 7, 6, // 3 - 15, 14, // 7 - 13, 12, // 6 - 5, 4, // 2 - 3, 2, // 1 - 11, 10, // 5 - 9, 8, // 4 - 1, 0 // 0 - ); - - alphaPtr += long_cb-1; - - /* Define shuffle for branch costs */ - __m128i shuf_g[4]; - shuf_g[3] = _mm_set_epi8(3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2); - shuf_g[2] = _mm_set_epi8(7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6); - shuf_g[1] = _mm_set_epi8(11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10); - shuf_g[0] = _mm_set_epi8(15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14); - __m128i gv; - int16_t *b = &s->branch[2*long_cb-8]; - __m128i *gPtr = (__m128i*) b; - /* Define shuffle for beta normalization */ - __m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0); - - /* This defines a beta computation step: - * Adds and substracts the branch metrics to the previous beta step, - * shuffles the states according to the trellis path and selects maximum state - */ -#define BETA_STEP(g) bp = _mm_add_epi16(beta_k, g);\ - bn = _mm_sub_epi16(beta_k, g);\ - bp = _mm_shuffle_epi8(bp, shuf_bp);\ - bn = _mm_shuffle_epi8(bn, shuf_bn);\ - beta_k = _mm_max_epi16(bp, bn); - - /* Loads the alpha metrics from memory and adds them to the temporal bn and bp - * metrics. - */ -#define BETA_STEP_CNT(c,d) g = _mm_shuffle_epi8(gv, shuf_g[c]);\ - BETA_STEP(g)\ - alpha_k = _mm_load_si128(alphaPtr);\ - alphaPtr--;\ - bp_##d = _mm_add_epi16(bp, alpha_k);\ - bn_##d = _mm_add_epi16(bn, alpha_k);\ - - /* The tail does not require to load alpha or produce outputs. Only update - * beta metrics accordingly */ - for (k=end-1; k>=long_cb; k--) { - int16_t g0 = s->branch[2*k]; - int16_t g1 = s->branch[2*k+1]; - g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1); - BETA_STEP(g); - } - - /* We inline 2 trelis steps for each normalization */ - __m128i norm; - __m128i *outPtr = (__m128i*) &output[long_cb-8]; - for (; k >= 0; k-=8) { - gv = _mm_load_si128(gPtr); - gPtr--; - - BETA_STEP_CNT(0,0); - BETA_STEP_CNT(1,1); - BETA_STEP_CNT(2,2); - BETA_STEP_CNT(3,3); - norm = _mm_shuffle_epi8(beta_k, shuf_norm); - beta_k = _mm_sub_epi16(beta_k, norm); - gv = _mm_load_si128(gPtr); - gPtr--; - BETA_STEP_CNT(0,4); - BETA_STEP_CNT(1,5); - BETA_STEP_CNT(2,6); - BETA_STEP_CNT(3,7); - norm = _mm_shuffle_epi8(beta_k, shuf_norm); - beta_k = _mm_sub_epi16(beta_k, norm); - - __m128i bn_transp = transposed_max(bn_7, bn_6, bn_5, bn_4, bn_3, bn_2, bn_1, bn_0); - __m128i bp_transp = transposed_max(bp_7, bp_6, bp_5, bp_4, bp_3, bp_2, bp_1, bp_0); - __m128i outval = _mm_sub_epi16(bp_transp,bn_transp); - _mm_store_si128(outPtr, outval); - outPtr--; - } -} -#endif - - - - -#endif - - +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "srslte/phy/fec/turbodecoder_sse.h" +#include "srslte/phy/utils/vector.h" + +#include + +#ifdef LV_HAVE_SSE +#include +#include + +#endif + + +#define NUMSTATES 8 +#define NINPUTS 2 +#define TAIL 3 +#define TOTALTAIL 12 + +#define INF 10000 + + +#ifdef LV_HAVE_SSE + + +#define debug_enabled 0 + +#if debug_enabled +#define debug_state(c,d) printf("k=%5d, in=%5d, pa=%5d, out=%5d, alpha=", k-d,\ + s->branch[2*(k-d)] + s->branch[2*(k-d)+1], \ + -s->branch[2*(k-d)] + s->branch[2*(k-d)+1], output[k-d]);print_128i(alpha_k);\ + printf(", beta=");print_128i(beta_k);printf("\n"); + +static void print_128i(__m128i x) { + int16_t *s = (int16_t*) &x; + printf("[%5d", s[0]); + for (int i=1;i<8;i++) { + printf(",%5d", s[i]); + } + printf("]"); +} + +static uint32_t max_128i(__m128i x) { + int16_t *s = (int16_t*) &x; + int16_t m = -INF; + uint32_t max = 0; + for (int i=1;i<8;i++) { + if (s[i] > m) { + max = i; + m = s[i]; + } + } + return max; +} + +#else +#define debug_state(c,d) +#endif + + +//#define use_beta_transposed_max + +#ifndef use_beta_transposed_max + +/* Computes the horizontal MAX from 8 16-bit integers using the minpos_epu16 SSE4.1 instruction */ +static inline int16_t hMax(__m128i buffer) +{ + __m128i tmp1 = _mm_sub_epi16(_mm_set1_epi16(0x7FFF), buffer); + __m128i tmp3 = _mm_minpos_epu16(tmp1); + return (int16_t)(_mm_cvtsi128_si32(tmp3)); +} + +/* Computes beta values */ +void tdec_sse_beta(tdec_sse_t * s, int16_t * output, uint32_t long_cb) +{ + int k; + uint32_t end = long_cb + 3; + const __m128i *alphaPtr = (const __m128i*) s->alpha; + + __m128i beta_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0); + __m128i g, bp, bn, alpha_k; + + /* Define the shuffle constant for the positive beta */ + __m128i shuf_bp = _mm_set_epi8( + 15, 14, // 7 + 7, 6, // 3 + 5, 4, // 2 + 13, 12, // 6 + 11, 10, // 5 + 3, 2, // 1 + 1, 0, // 0 + 9, 8 // 4 + ); + + /* Define the shuffle constant for the negative beta */ + __m128i shuf_bn = _mm_set_epi8( + 7, 6, // 3 + 15, 14, // 7 + 13, 12, // 6 + 5, 4, // 2 + 3, 2, // 1 + 11, 10, // 5 + 9, 8, // 4 + 1, 0 // 0 + ); + + alphaPtr += long_cb-1; + + /* Define shuffle for branch costs */ + __m128i shuf_g[4]; + shuf_g[3] = _mm_set_epi8(3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2); + shuf_g[2] = _mm_set_epi8(7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6); + shuf_g[1] = _mm_set_epi8(11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10); + shuf_g[0] = _mm_set_epi8(15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14); + __m128i gv; + int16_t *b = &s->branch[2*long_cb-8]; + __m128i *gPtr = (__m128i*) b; + /* Define shuffle for beta normalization */ + __m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0); + + /* This defines a beta computation step: + * Adds and substracts the branch metrics to the previous beta step, + * shuffles the states according to the trellis path and selects maximum state + */ +#define BETA_STEP(g) bp = _mm_add_epi16(beta_k, g);\ + bn = _mm_sub_epi16(beta_k, g);\ + bp = _mm_shuffle_epi8(bp, shuf_bp);\ + bn = _mm_shuffle_epi8(bn, shuf_bn);\ + beta_k = _mm_max_epi16(bp, bn); + + /* Loads the alpha metrics from memory and adds them to the temporal bn and bp + * metrics. Then computes horizontal maximum of both metrics and computes difference + */ +#define BETA_STEP_CNT(c,d) g = _mm_shuffle_epi8(gv, shuf_g[c]);\ + BETA_STEP(g)\ + alpha_k = _mm_load_si128(alphaPtr);\ + alphaPtr--;\ + bp = _mm_add_epi16(bp, alpha_k);\ + bn = _mm_add_epi16(bn, alpha_k);\ + output[k-d] = hMax(bn)-hMax(bp);\ + debug_state(c,d); + + + /* The tail does not require to load alpha or produce outputs. Only update + * beta metrics accordingly */ + for (k=end-1; k>=long_cb; k--) { + int16_t g0 = s->branch[2*k]; + int16_t g1 = s->branch[2*k+1]; + g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1); + BETA_STEP(g); + } + + /* We inline 2 trelis steps for each normalization */ + __m128i norm; + for (; k >= 0; k-=8) { + gv = _mm_load_si128(gPtr); + gPtr--; + + BETA_STEP_CNT(0,0); + BETA_STEP_CNT(1,1); + BETA_STEP_CNT(2,2); + BETA_STEP_CNT(3,3); + norm = _mm_shuffle_epi8(beta_k, shuf_norm); + beta_k = _mm_sub_epi16(beta_k, norm); + gv = _mm_load_si128(gPtr); + gPtr--; + BETA_STEP_CNT(0,4); + BETA_STEP_CNT(1,5); + BETA_STEP_CNT(2,6); + BETA_STEP_CNT(3,7); + + norm = _mm_shuffle_epi8(beta_k, shuf_norm); + beta_k = _mm_sub_epi16(beta_k, norm); + } +} + +#endif + +/* Computes alpha metrics */ +void tdec_sse_alpha(tdec_sse_t * s, uint32_t long_cb) +{ + uint32_t k; + int16_t *alpha = s->alpha; + uint32_t i; + + alpha[0] = 0; + for (i = 1; i < 8; i++) { + alpha[i] = -INF; + } + + /* Define the shuffle constant for the positive alpha */ + __m128i shuf_ap = _mm_set_epi8( + 15, 14, // 7 + 9, 8, // 4 + 7, 6, // 3 + 1, 0, // 0 + 13, 12, // 6 + 11, 10, // 5 + 5, 4, // 2 + 3, 2 // 1 + ); + + /* Define the shuffle constant for the negative alpha */ + __m128i shuf_an = _mm_set_epi8( + 13, 12, // 6 + 11, 10, // 5 + 5, 4, // 2 + 3, 2, // 1 + 15, 14, // 7 + 9, 8, // 4 + 7, 6, // 3 + 1, 0 // 0 + ); + + /* Define shuffle for branch costs */ + __m128i shuf_g[4]; + shuf_g[0] = _mm_set_epi8(3,2,3,2,1,0,1,0,1,0,1,0,3,2,3,2); + shuf_g[1] = _mm_set_epi8(7,6,7,6,5,4,5,4,5,4,5,4,7,6,7,6); + shuf_g[2] = _mm_set_epi8(11,10,11,10,9,8,9,8,9,8,9,8,11,10,11,10); + shuf_g[3] = _mm_set_epi8(15,14,15,14,13,12,13,12,13,12,13,12,15,14,15,14); + + __m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0); + + __m128i* alphaPtr = (__m128i*) alpha; + alphaPtr++; + + __m128i gv; + __m128i *gPtr = (__m128i*) s->branch; + __m128i g, ap, an; + + __m128i alpha_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0); + + /* This defines a alpha computation step: + * Adds and substracts the branch metrics to the previous alpha step, + * shuffles the states according to the trellis path and selects maximum state + */ +#define ALPHA_STEP(c) g = _mm_shuffle_epi8(gv, shuf_g[c]); \ + ap = _mm_add_epi16(alpha_k, g);\ + an = _mm_sub_epi16(alpha_k, g);\ + ap = _mm_shuffle_epi8(ap, shuf_ap);\ + an = _mm_shuffle_epi8(an, shuf_an);\ + alpha_k = _mm_max_epi16(ap, an);\ + _mm_store_si128(alphaPtr, alpha_k);\ + alphaPtr++; \ + + /* In this loop, we compute 8 steps and normalize twice for each branch metrics memory load */ + __m128i norm; + for (k = 0; k < long_cb/8; k++) { + gv = _mm_load_si128(gPtr); + gPtr++; + ALPHA_STEP(0); + ALPHA_STEP(1); + ALPHA_STEP(2); + ALPHA_STEP(3); + norm = _mm_shuffle_epi8(alpha_k, shuf_norm); + alpha_k = _mm_sub_epi16(alpha_k, norm); + gv = _mm_load_si128(gPtr); + gPtr++; + ALPHA_STEP(0); + ALPHA_STEP(1); + ALPHA_STEP(2); + ALPHA_STEP(3); + norm = _mm_shuffle_epi8(alpha_k, shuf_norm); + alpha_k = _mm_sub_epi16(alpha_k, norm); + } +} + +/* Compute branch metrics (gamma) */ +void tdec_sse_gamma(tdec_sse_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb) +{ + __m128i res00, res10, res01, res11, res0, res1; + __m128i in, ap, pa, g1, g0; + + __m128i *inPtr = (__m128i*) input; + __m128i *appPtr = (__m128i*) app; + __m128i *paPtr = (__m128i*) parity; + __m128i *resPtr = (__m128i*) h->branch; + + __m128i res00_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0); + __m128i res10_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8); + __m128i res01_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff); + __m128i res11_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff); + + for (int i=0;ibranch[2*i], h->branch[2*i+1]); + } + + for (int i=long_cb;ibranch[2*i] = (input[i] - parity[i])/2; + h->branch[2*i+1] = (input[i] + parity[i])/2; + } +} + + +/* Inititalizes constituent decoder object */ +int tdec_sse_init(void **hh, uint32_t max_long_cb) +{ + *hh = calloc(1, sizeof(tdec_sse_t)); + + tdec_sse_t *h = (tdec_sse_t*) *hh; + + h->max_long_cb = max_long_cb; + + h->alpha = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + TOTALTAIL + 1) * NUMSTATES); + if (!h->alpha) { + perror("srslte_vec_malloc"); + return -1; + } + h->branch = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + TOTALTAIL + 1) * NUMSTATES); + if (!h->branch) { + perror("srslte_vec_malloc"); + return -1; + } + return 1; +} + +void tdec_sse_free(void *hh) +{ + tdec_sse_t *h = (tdec_sse_t*) hh; + + if (h) { + if (h->alpha) { + free(h->alpha); + } + if (h->branch) { + free(h->branch); + } + free(h); + } +} + +/* Runs one instance of a decoder */ +void tdec_sse_dec(void *hh, int16_t * input, int16_t *app, int16_t * parity, + int16_t *output, uint32_t long_cb) +{ + tdec_sse_t *h = (tdec_sse_t*) hh; + + // Compute branch metrics + tdec_sse_gamma(h, input, app, parity, long_cb); + + // Forward recursion + tdec_sse_alpha(h, long_cb); + + // Backwards recursion + LLR computation + tdec_sse_beta(h, output, long_cb); +} + +/* Deinterleaves the 3 streams from the input (systematic and 2 parity bits) into + * 3 buffers ready to be used by compute_gamma() + */ +void tdec_sse_extract_input(int16_t *input, int16_t *syst0, int16_t *app2, int16_t *parity0, int16_t *parity1, uint32_t long_cb) { + uint32_t i; + + __m128i *inputPtr = (__m128i*) input; + __m128i in0, in1, in2; + __m128i s0, s1, s2, s; + __m128i p00, p01, p02, p0; + __m128i p10, p11, p12, p1; + + __m128i *sysPtr = (__m128i*) syst0; + __m128i *pa0Ptr = (__m128i*) parity0; + __m128i *pa1Ptr = (__m128i*) parity1; + + // pick bits 0, 3, 6 from 1st word + __m128i s0_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0); + // pick bits 1, 4, 7 from 2st word + __m128i s1_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff); + // pick bits 2, 5 from 3rd word + __m128i s2_mask = _mm_set_epi8(11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + + // pick bits 1, 4, 7 from 1st word + __m128i p00_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,15,14,9,8,3,2); + // pick bits 2, 5, from 2st word + __m128i p01_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff); + // pick bits 0, 3, 6 from 3rd word + __m128i p02_mask = _mm_set_epi8(13,12,7,6,1,0,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + + // pick bits 2, 5 from 1st word + __m128i p10_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4); + // pick bits 0, 3, 6, from 2st word + __m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff); + // pick bits 1, 4, 7 from 3rd word + __m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + + // Split systematic and parity bits + for (i = 0; i < long_cb/8; i++) { + + in0 = _mm_load_si128(inputPtr); inputPtr++; + in1 = _mm_load_si128(inputPtr); inputPtr++; + in2 = _mm_load_si128(inputPtr); inputPtr++; + + /* Deinterleave Systematic bits */ + s0 = _mm_shuffle_epi8(in0, s0_mask); + s1 = _mm_shuffle_epi8(in1, s1_mask); + s2 = _mm_shuffle_epi8(in2, s2_mask); + s = _mm_or_si128(s0, s1); + s = _mm_or_si128(s, s2); + + _mm_store_si128(sysPtr, s); + sysPtr++; + + /* Deinterleave parity 0 bits */ + p00 = _mm_shuffle_epi8(in0, p00_mask); + p01 = _mm_shuffle_epi8(in1, p01_mask); + p02 = _mm_shuffle_epi8(in2, p02_mask); + p0 = _mm_or_si128(p00, p01); + p0 = _mm_or_si128(p0, p02); + + _mm_store_si128(pa0Ptr, p0); + pa0Ptr++; + + /* Deinterleave parity 1 bits */ + p10 = _mm_shuffle_epi8(in0, p10_mask); + p11 = _mm_shuffle_epi8(in1, p11_mask); + p12 = _mm_shuffle_epi8(in2, p12_mask); + p1 = _mm_or_si128(p10, p11); + p1 = _mm_or_si128(p1, p12); + + _mm_store_si128(pa1Ptr, p1); + pa1Ptr++; + + } + + for (i = 0; i < 3; i++) { + syst0[i+long_cb] = input[3*long_cb + 2*i]; + parity0[i+long_cb] = input[3*long_cb + 2*i + 1]; + } + for (i = 0; i < 3; i++) { + app2[i+long_cb] = input[3*long_cb + 6 + 2*i]; + parity1[i+long_cb] = input[3*long_cb + 6 + 2*i + 1]; + } +} + +void tdec_sse_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb) +{ + uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1}; + + // long_cb is always byte aligned + for (uint32_t i = 0; i < long_cb/8; i++) { + uint8_t out0 = app1[8*i+0]>0?mask[0]:0; + uint8_t out1 = app1[8*i+1]>0?mask[1]:0; + uint8_t out2 = app1[8*i+2]>0?mask[2]:0; + uint8_t out3 = app1[8*i+3]>0?mask[3]:0; + uint8_t out4 = app1[8*i+4]>0?mask[4]:0; + uint8_t out5 = app1[8*i+5]>0?mask[5]:0; + uint8_t out6 = app1[8*i+6]>0?mask[6]:0; + uint8_t out7 = app1[8*i+7]>0?mask[7]:0; + + output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7; + } +} + + + + +/*********************** + * + * This is an attempt to parallelize the horizontal max + * by doing a 8x8 tranpose of the vectors and computing max + * in cascade. However since we need to store 16 registers + * for the positive and negative values the performance is not very good + */ + + +#ifdef use_beta_transposed_max + +static inline __m128i transposed_max(__m128i a, __m128i b, __m128i c, __m128i d, + __m128i e, __m128i f, __m128i g, __m128i h) +{ + // Transpose 8 vectors + __m128i t0 = _mm_unpacklo_epi16(a, b); + __m128i t1 = _mm_unpacklo_epi16(c, d); + __m128i t2 = _mm_unpacklo_epi16(e, f); + __m128i t3 = _mm_unpacklo_epi16(g, h); + __m128i t4 = _mm_unpackhi_epi16(a, b); + __m128i t5 = _mm_unpackhi_epi16(c, d); + __m128i t6 = _mm_unpackhi_epi16(e, f); + __m128i t7 = _mm_unpackhi_epi16(g, h); + + __m128i s0 = _mm_unpacklo_epi32(t0, t1); + __m128i s1 = _mm_unpackhi_epi32(t0, t1); + __m128i s2 = _mm_unpacklo_epi32(t2, t3); + __m128i s3 = _mm_unpackhi_epi32(t2, t3); + __m128i s4 = _mm_unpacklo_epi32(t4, t5); + __m128i s5 = _mm_unpackhi_epi32(t4, t5); + __m128i s6 = _mm_unpacklo_epi32(t6, t7); + __m128i s7 = _mm_unpackhi_epi32(t6, t7); + + __m128i x0 = _mm_unpacklo_epi64(s0, s2); + __m128i x1 = _mm_unpackhi_epi64(s0, s2); + __m128i x2 = _mm_unpacklo_epi64(s1, s3); + __m128i x3 = _mm_unpackhi_epi64(s1, s3); + __m128i x4 = _mm_unpacklo_epi64(s4, s6); + __m128i x5 = _mm_unpackhi_epi64(s4, s6); + __m128i x6 = _mm_unpacklo_epi64(s5, s7); + __m128i x7 = _mm_unpackhi_epi64(s5, s7); + + // Cascade max on the transposed vector + __m128i res = _mm_max_epi16(x0, + _mm_max_epi16(x1, + _mm_max_epi16(x2, + _mm_max_epi16(x3, + _mm_max_epi16(x4, + _mm_max_epi16(x5, + _mm_max_epi16(x6, + x7))))))); + + return res; +} + +void tdec_sse_beta(tdec_sse_t * s, int16_t * output, uint32_t long_cb) +{ + int k; + uint32_t end = long_cb + 3; + const __m128i *alphaPtr = (const __m128i*) s->alpha; + + __m128i beta_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0); + __m128i g, alpha_k; + __m128i bn, bn_0, bn_1, bn_2, bn_3, bn_4, bn_5, bn_6, bn_7; + __m128i bp, bp_0, bp_1, bp_2, bp_3, bp_4, bp_5, bp_6, bp_7; + + /* Define the shuffle constant for the positive beta */ + __m128i shuf_bp = _mm_set_epi8( + 15, 14, // 7 + 7, 6, // 3 + 5, 4, // 2 + 13, 12, // 6 + 11, 10, // 5 + 3, 2, // 1 + 1, 0, // 0 + 9, 8 // 4 + ); + + /* Define the shuffle constant for the negative beta */ + __m128i shuf_bn = _mm_set_epi8( + 7, 6, // 3 + 15, 14, // 7 + 13, 12, // 6 + 5, 4, // 2 + 3, 2, // 1 + 11, 10, // 5 + 9, 8, // 4 + 1, 0 // 0 + ); + + alphaPtr += long_cb-1; + + /* Define shuffle for branch costs */ + __m128i shuf_g[4]; + shuf_g[3] = _mm_set_epi8(3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2); + shuf_g[2] = _mm_set_epi8(7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6); + shuf_g[1] = _mm_set_epi8(11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10); + shuf_g[0] = _mm_set_epi8(15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14); + __m128i gv; + int16_t *b = &s->branch[2*long_cb-8]; + __m128i *gPtr = (__m128i*) b; + /* Define shuffle for beta normalization */ + __m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0); + + /* This defines a beta computation step: + * Adds and substracts the branch metrics to the previous beta step, + * shuffles the states according to the trellis path and selects maximum state + */ +#define BETA_STEP(g) bp = _mm_add_epi16(beta_k, g);\ + bn = _mm_sub_epi16(beta_k, g);\ + bp = _mm_shuffle_epi8(bp, shuf_bp);\ + bn = _mm_shuffle_epi8(bn, shuf_bn);\ + beta_k = _mm_max_epi16(bp, bn); + + /* Loads the alpha metrics from memory and adds them to the temporal bn and bp + * metrics. + */ +#define BETA_STEP_CNT(c,d) g = _mm_shuffle_epi8(gv, shuf_g[c]);\ + BETA_STEP(g)\ + alpha_k = _mm_load_si128(alphaPtr);\ + alphaPtr--;\ + bp_##d = _mm_add_epi16(bp, alpha_k);\ + bn_##d = _mm_add_epi16(bn, alpha_k);\ + + /* The tail does not require to load alpha or produce outputs. Only update + * beta metrics accordingly */ + for (k=end-1; k>=long_cb; k--) { + int16_t g0 = s->branch[2*k]; + int16_t g1 = s->branch[2*k+1]; + g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1); + BETA_STEP(g); + } + + /* We inline 2 trelis steps for each normalization */ + __m128i norm; + __m128i *outPtr = (__m128i*) &output[long_cb-8]; + for (; k >= 0; k-=8) { + gv = _mm_load_si128(gPtr); + gPtr--; + + BETA_STEP_CNT(0,0); + BETA_STEP_CNT(1,1); + BETA_STEP_CNT(2,2); + BETA_STEP_CNT(3,3); + norm = _mm_shuffle_epi8(beta_k, shuf_norm); + beta_k = _mm_sub_epi16(beta_k, norm); + gv = _mm_load_si128(gPtr); + gPtr--; + BETA_STEP_CNT(0,4); + BETA_STEP_CNT(1,5); + BETA_STEP_CNT(2,6); + BETA_STEP_CNT(3,7); + norm = _mm_shuffle_epi8(beta_k, shuf_norm); + beta_k = _mm_sub_epi16(beta_k, norm); + + __m128i bn_transp = transposed_max(bn_7, bn_6, bn_5, bn_4, bn_3, bn_2, bn_1, bn_0); + __m128i bp_transp = transposed_max(bp_7, bp_6, bp_5, bp_4, bp_3, bp_2, bp_1, bp_0); + __m128i outval = _mm_sub_epi16(bp_transp,bn_transp); + _mm_store_si128(outPtr, outval); + outPtr--; + } +} +#endif + + + + +#endif + + diff --git a/lib/src/phy/fec/turbodecoder_sse_inter.c b/lib/src/phy/fec/turbodecoder_sse_inter.c deleted file mode 100644 index d75c8a649..000000000 --- a/lib/src/phy/fec/turbodecoder_sse_inter.c +++ /dev/null @@ -1,202 +0,0 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -#include -#include -#include -#include -#include -#include - -#include "srslte/phy/fec/turbodecoder_simd_inter.h" -#include "srslte/phy/utils/vector.h" - - -#define NCB 8 - -#define INF 10000 - -#ifdef LV_HAVE_SSE -#include - -void sse_inter_extract_syst1(srslte_tdec_simd_inter_t *h, uint16_t *inter, uint32_t long_cb) -{ - __m128i *llr1Ptr = (__m128i*) h->llr1; - __m128i *wPtr = (__m128i*) h->w; - __m128i *syst1Ptr = (__m128i*) h->syst1; - - for (int i = 0; i < long_cb; i++) { - __m128i llr1 = _mm_load_si128(&llr1Ptr[inter[i]]); - __m128i w = _mm_load_si128(&wPtr[inter[i]]); - _mm_store_si128(syst1Ptr++, _mm_sub_epi16(llr1, w)); - } -} - -void sse_inter_update_w(srslte_tdec_simd_inter_t *h, uint16_t *deinter, uint32_t long_cb) -{ - __m128i *llr1Ptr = (__m128i*) h->llr1; - __m128i *llr2Ptr = (__m128i*) h->llr2; - __m128i *wPtr = (__m128i*) h->w; - __m128i *syst1Ptr = (__m128i*) h->syst1; - - for (int i = 0; i < long_cb; i++) { - __m128i llr1 = _mm_load_si128(llr1Ptr++); - __m128i w = _mm_load_si128(wPtr++); - __m128i llr2 = _mm_load_si128(&llr2Ptr[deinter[i]]); - - _mm_store_si128(syst1Ptr++, _mm_add_epi16(w, _mm_sub_epi16(llr2, llr1))); - } -} - -/* Computes beta values */ -void map_sse_inter_beta(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, int16_t * output, uint32_t long_cb) -{ - __m128i m_b[8], new[8], old[8], max1[8], max0[8]; - __m128i x, y, xy; - __m128i m1, m0; - uint32_t end = long_cb + 3; - uint32_t i; - - __m128i *inputPtr = (__m128i*) input; - __m128i *parityPtr = (__m128i*) parity; - __m128i *outputPtr = (__m128i*) output; - __m128i *alphaPtr = (__m128i*) s->alpha; - - for (int i = 0; i < 8; i++) { - old[i] = _mm_set1_epi16(0); - } - - for (int k = end - 1; k >= 0; k--) { - x = _mm_load_si128(inputPtr++); - y = _mm_load_si128(parityPtr++); - - xy = _mm_add_epi16(x,y); - - m_b[0] = _mm_add_epi16(old[4], xy); - m_b[1] = old[4]; - m_b[2] = _mm_add_epi16(old[5], y); - m_b[3] = _mm_add_epi16(old[5], x); - m_b[4] = _mm_add_epi16(old[6], x); - m_b[5] = _mm_add_epi16(old[6], y); - m_b[6] = old[7]; - m_b[7] = _mm_add_epi16(old[7], xy); - - new[0] = old[0]; - new[1] = _mm_add_epi16(old[0], xy); - new[2] = _mm_add_epi16(old[1], x); - new[3] = _mm_add_epi16(old[1], y); - new[4] = _mm_add_epi16(old[2], y); - new[5] = _mm_add_epi16(old[2], x); - new[6] = _mm_add_epi16(old[3], xy); - new[7] = old[3]; - - for (i = 0; i < 8; i++) { - __m128i alpha = _mm_load_si128(alphaPtr++); - max0[i] = _mm_add_epi16(alpha, m_b[i]); - max1[i] = _mm_add_epi16(alpha, new[i]); - } - - m1 = _mm_max_epi16(max1[0], max1[1]); - m0 = _mm_max_epi16(max0[0], max0[1]); - - for (i = 2; i < 8; i++) { - m1 = _mm_max_epi16(m1, max1[i]); - m0 = _mm_max_epi16(m0, max0[i]); - } - - for (i = 0; i < 8; i++) { - new[i] = _mm_max_epi16(m_b[i], new[i]); - old[i] = new[i]; - } - - __m128i out = _mm_sub_epi16(m1, m0); - _mm_store_si128(outputPtr++, out); - - // normalize - if ((k%4)==0) { - for (int i=1;i<8;i++) { - _mm_sub_epi16(old[i], old[0]); - } - } - } -} - -/* Computes alpha metrics */ -void map_see_inter_alpha(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, uint32_t long_cb) -{ - __m128i m_b[8], new[8], old[8]; - __m128i x, y, xy; - uint32_t k; - - __m128i *inputPtr = (__m128i*) input; - __m128i *parityPtr = (__m128i*) parity; - __m128i *alphaPtr = (__m128i*) s->alpha; - - old[0] = _mm_set1_epi16(0); - for (int i = 1; i < 8; i++) { - old[i] = _mm_set1_epi16(-INF); - } - - for (k = 0; k < long_cb; k++) { - x = _mm_load_si128(inputPtr++); - y = _mm_load_si128(parityPtr++); - - xy = _mm_add_epi16(x,y); - - m_b[0] = old[0]; - m_b[1] = _mm_add_epi16(old[3], y); - m_b[2] = _mm_add_epi16(old[4], y); - m_b[3] = old[7]; - m_b[4] = old[1]; - m_b[5] = _mm_add_epi16(old[2], y); - m_b[6] = _mm_add_epi16(old[5], y); - m_b[7] = old[6]; - - new[0] = _mm_add_epi16(old[1], xy); - new[1] = _mm_add_epi16(old[2], x); - new[2] = _mm_add_epi16(old[5], x); - new[3] = _mm_add_epi16(old[6], xy); - new[4] = _mm_add_epi16(old[0], xy); - new[5] = _mm_add_epi16(old[3], x); - new[6] = _mm_add_epi16(old[4], x); - new[7] = _mm_add_epi16(old[7], xy); - - for (int i = 0; i < 8; i++) { - new[i] = _mm_max_epi16(m_b[i], new[i]); - old[i] = new[i]; - _mm_store_si128(alphaPtr++, old[i]); - } - - // normalize - if ((k%4)==0) { - for (int i=1;i<8;i++) { - _mm_sub_epi16(old[i], old[0]); - } - } - } -} - -#endif diff --git a/lib/src/phy/modem/demod_soft.c b/lib/src/phy/modem/demod_soft.c index 0ea3ce938..d7edd78eb 100644 --- a/lib/src/phy/modem/demod_soft.c +++ b/lib/src/phy/modem/demod_soft.c @@ -32,18 +32,26 @@ #include "srslte/phy/utils/bit.h" #include "srslte/phy/modem/demod_soft.h" -// AVX implementation not useful for integers. Wait for AVX2 #ifdef LV_HAVE_SSE #include void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols); #endif - #define SCALE_SHORT_CONV_QPSK 100 #define SCALE_SHORT_CONV_QAM16 400 #define SCALE_SHORT_CONV_QAM64 700 +#define SCALE_BYTE_CONV_QPSK 20 +#define SCALE_BYTE_CONV_QAM16 30 +#define SCALE_BYTE_CONV_QAM64 40 + +void demod_bpsk_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) { + for (int i=0;i 0) { mean_texec_s = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec_s, n-1); } - + + gettimeofday(&t[1], NULL); + srslte_demod_soft_demodulate_b(modulation, symbols, llr_b, num_bits / mod.nbits_x_symbol); + gettimeofday(&t[2], NULL); + get_time_interval(t); + + if (n > 0) { + mean_texec_b = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec_b, n-1); + } + if (SRSLTE_VERBOSE_ISDEBUG()) { printf("bits="); srslte_vec_fprint_b(stdout, input, num_bits); @@ -200,6 +217,9 @@ int main(int argc, char **argv) { printf("llr_s="); srslte_vec_fprint_s(stdout, llr_s, num_bits); + printf("llr_b="); + srslte_vec_fprint_bs(stdout, llr_b, num_bits); + } // Check demodulation errors @@ -212,7 +232,9 @@ int main(int argc, char **argv) { } ret = 0; -clean_exit: +clean_exit: + free(llr_b); + free(llr_s); free(llr); free(symbols); free(output); @@ -220,7 +242,7 @@ clean_exit: srslte_modem_table_free(&mod); - printf("Mean Throughput: %.2f/%.2f. Mbps ExTime: %.2f/%.2f us\n", - num_bits/mean_texec, num_bits/mean_texec_s, mean_texec, mean_texec_s); + printf("Mean Throughput: %.2f/%.2f/%.2f. Mbps ExTime: %.2f/%.2f/%.2f us\n", + num_bits/mean_texec, num_bits/mean_texec_s, num_bits/mean_texec_b, mean_texec, mean_texec_s, mean_texec_b); exit(ret); } diff --git a/lib/src/phy/phch/pdsch.c b/lib/src/phy/phch/pdsch.c index e6dd9f52b..a203d459d 100644 --- a/lib/src/phy/phch/pdsch.c +++ b/lib/src/phy/phch/pdsch.c @@ -41,6 +41,10 @@ #include "srslte/phy/utils/vector.h" +#ifdef LV_HAVE_SSE +#include +#endif /* LV_HAVE_SSE */ + #define MAX_PDSCH_RE(cp) (2 * SRSLTE_CP_NSYMB(cp) * 12) @@ -615,10 +619,10 @@ static srslte_sequence_t *get_user_sequence(srslte_pdsch_t *q, uint16_t rnti, uint32_t rnti_idx = q->is_ue?0:rnti; // The scrambling sequence is pregenerated for all RNTIs in the eNodeB but only for C-RNTI in the UE - if (q->users[rnti_idx] && q->users[rnti_idx]->sequence_generated && - q->users[rnti_idx]->cell_id == q->cell.id && - q->ue_rnti == rnti && - ((rnti >= SRSLTE_CRNTI_START && rnti < SRSLTE_CRNTI_END) || !q->is_ue)) + if (q->users[rnti_idx] && + q->users[rnti_idx]->sequence_generated && + q->users[rnti_idx]->cell_id == q->cell.id && + (!q->is_ue || q->ue_rnti == rnti)) { return &q->users[rnti_idx]->seq[codeword_idx][sf_idx]; } else { @@ -669,6 +673,108 @@ static int srslte_pdsch_codeword_encode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *c return SRSLTE_SUCCESS; } +static void csi_correction(srslte_pdsch_t *q, srslte_pdsch_cfg_t *cfg, uint32_t codeword_idx, uint32_t tb_idx, void *e) +{ + + srslte_ra_nbits_t *nbits = &cfg->nbits[tb_idx]; + uint32_t qm = 0; + switch(cfg->grant.mcs[tb_idx].mod) { + + case SRSLTE_MOD_BPSK: + qm = 1; + break; + case SRSLTE_MOD_QPSK: + qm = 2; + break; + case SRSLTE_MOD_16QAM: + qm = 4; + break; + case SRSLTE_MOD_64QAM: + qm = 6; + break; + default: + ERROR("No modulation"); + } + + const uint32_t csi_max_idx = srslte_vec_max_fi(q->csi[codeword_idx], nbits->nof_bits / qm); + float csi_max = 1.0f; + if (csi_max_idx < nbits->nof_bits / qm) { + csi_max = q->csi[codeword_idx][csi_max_idx]; + } + int8_t *e_b = e; + int16_t *e_s = e; + float *csi_v = q->csi[codeword_idx]; + if (q->llr_is_8bit) { + for (int i = 0; i < nbits->nof_bits / qm; i++) { + const float csi = *(csi_v++) / csi_max; + for (int k = 0; k < qm; k++) { + *e_b = (int8_t) ((float) *e_b * csi); + e_b++; + } + } + } else { + int i = 0; + +#ifdef LV_HAVE_SSE + __m128 _csi_scale = _mm_set1_ps(INT16_MAX / csi_max); + __m64 *_e = (__m64 *) e; + + switch(cfg->grant.mcs[tb_idx].mod) { + case SRSLTE_MOD_QPSK: + for (; i < nbits->nof_bits - 3; i += 4) { + __m128 _csi1 = _mm_set1_ps(*(csi_v++)); + __m128 _csi2 = _mm_set1_ps(*(csi_v++)); + _csi1 = _mm_blend_ps(_csi1, _csi2, 3); + + _csi1 = _mm_mul_ps(_csi1, _csi_scale); + + _e[0] = _mm_mulhi_pi16(_e[0], _mm_cvtps_pi16(_csi1)); + _e += 1; + } + break; + case SRSLTE_MOD_16QAM: + for (; i < nbits->nof_bits - 3; i += 4) { + __m128 _csi = _mm_set1_ps(*(csi_v++)); + + _csi = _mm_mul_ps(_csi, _csi_scale); + + _e[0] = _mm_mulhi_pi16(_e[0], _mm_cvtps_pi16(_csi)); + _e += 1; + } + break; + case SRSLTE_MOD_64QAM: + for (; i < nbits->nof_bits - 11; i += 12) { + __m128 _csi1 = _mm_set1_ps(*(csi_v++)); + __m128 _csi3 = _mm_set1_ps(*(csi_v++)); + + _csi1 = _mm_mul_ps(_csi1, _csi_scale); + _csi3 = _mm_mul_ps(_csi3, _csi_scale); + __m128 _csi2 = _mm_blend_ps(_csi1, _csi3, 3); + + _e[0] = _mm_mulhi_pi16(_e[0], _mm_cvtps_pi16(_csi1)); + _e[1] = _mm_mulhi_pi16(_e[1], _mm_cvtps_pi16(_csi2)); + _e[2] = _mm_mulhi_pi16(_e[2], _mm_cvtps_pi16(_csi3)); + _e += 3; + } + break; + case SRSLTE_MOD_BPSK: + case SRSLTE_MOD_LAST: + /* Do nothing */ + break; + } + + i /= qm; +#endif /* LV_HAVE_SSE */ + + for (; i < nbits->nof_bits / qm; i++) { + const float csi = q->csi[codeword_idx][i] / csi_max; + for (int k = 0; k < qm; k++) { + e_s[qm * i + k] = (int16_t) ((float) e_s[qm * i + k] * csi); + } + } + } +} + static int srslte_pdsch_codeword_decode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *cfg, srslte_sch_t *dl_sch, srslte_softbuffer_rx_t *softbuffer, uint16_t rnti, uint8_t *data, uint32_t codeword_idx, uint32_t tb_idx, bool *ack) { @@ -686,47 +792,24 @@ static int srslte_pdsch_codeword_decode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *c * The MAX-log-MAP algorithm used in turbo decoding is unsensitive to SNR estimation, * thus we don't need tot set it in the LLRs normalization */ - srslte_demod_soft_demodulate_s(mcs->mod, q->d[codeword_idx], q->e[codeword_idx], nbits->nof_re); + if (q->llr_is_8bit) { + srslte_demod_soft_demodulate_b(mcs->mod, q->d[codeword_idx], q->e[codeword_idx], nbits->nof_re); + } else { + srslte_demod_soft_demodulate_s(mcs->mod, q->d[codeword_idx], q->e[codeword_idx], nbits->nof_re); + } /* Select scrambling sequence */ srslte_sequence_t *seq = get_user_sequence(q, rnti, codeword_idx, cfg->sf_idx, nbits->nof_bits); /* Bit scrambling */ - srslte_scrambling_s_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits); - - uint32_t qm = 0; - switch(cfg->grant.mcs[tb_idx].mod) { - - case SRSLTE_MOD_BPSK: - qm = 1; - break; - case SRSLTE_MOD_QPSK: - qm = 2; - break; - case SRSLTE_MOD_16QAM: - qm = 4; - break; - case SRSLTE_MOD_64QAM: - qm = 6; - break; - default: - ERROR("No modulation"); + if (q->llr_is_8bit) { + srslte_scrambling_sb_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits); + } else { + srslte_scrambling_s_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits); } - int16_t *e = q->e[codeword_idx]; - if (q->csi_enabled) { - const uint32_t csi_max_idx = srslte_vec_max_fi(q->csi[codeword_idx], nbits->nof_bits / qm); - float csi_max = 1.0f; - if (csi_max_idx < nbits->nof_bits / qm) { - csi_max = q->csi[codeword_idx][csi_max_idx]; - } - for (int i = 0; i < nbits->nof_bits / qm; i++) { - const float csi = q->csi[codeword_idx][i] / csi_max; - for (int k = 0; k < qm; k++) { - e[qm * i + k] = (int16_t) ((float) e[qm * i + k] * csi); - } - } + csi_correction(q, cfg, codeword_idx, tb_idx, q->e[codeword_idx]); } /* Return */ diff --git a/lib/src/phy/phch/prach.c b/lib/src/phy/phch/prach.c index 0aca3bf19..85717a3f3 100644 --- a/lib/src/phy/phch/prach.c +++ b/lib/src/phy/phch/prach.c @@ -654,7 +654,15 @@ int srslte_prach_detect_offset(srslte_prach_t *p, peak_to_avg[*n_indices] = p->peak_values[j] / corr_ave; } if (t_offsets) { - t_offsets[*n_indices] = (float) p->peak_offsets[j] * p->T_seq / p->N_zc; + float corr = 1.8; + if (p->peak_offsets[j] > 30) { + corr = 1.9; + } + if (p->peak_offsets[j] > 250) { + corr = 1.91; + } + + t_offsets[*n_indices] = corr*p->peak_offsets[j]/(DELTA_F_RA * p->N_zc); } (*n_indices)++; } diff --git a/lib/src/phy/phch/pucch.c b/lib/src/phy/phch/pucch.c index 834963df7..ff94483d4 100644 --- a/lib/src/phy/phch/pucch.c +++ b/lib/src/phy/phch/pucch.c @@ -416,7 +416,7 @@ void srslte_pucch_set_threshold(srslte_pucch_t *q, float format1_threshold) { } /** Initializes the PDCCH transmitter and receiver */ -int srslte_pucch_init(srslte_pucch_t *q) { +int srslte_pucch_init_(srslte_pucch_t *q, bool is_ue) { int ret = SRSLTE_ERROR_INVALID_INPUTS; if (q != NULL) { ret = SRSLTE_ERROR; @@ -426,17 +426,26 @@ int srslte_pucch_init(srslte_pucch_t *q) { return SRSLTE_ERROR; } - q->users = calloc(sizeof(srslte_pucch_user_t*), 1+SRSLTE_SIRNTI); + q->is_ue = is_ue; + + q->users = calloc(sizeof(srslte_pucch_user_t*), q->is_ue?1:(1+SRSLTE_SIRNTI)); if (!q->users) { perror("malloc"); goto clean_exit; } - + + if (srslte_sequence_init(&q->tmp_seq, 20)) { + goto clean_exit; + } + srslte_uci_cqi_pucch_init(&q->cqi); q->z = srslte_vec_malloc(sizeof(cf_t)*SRSLTE_PUCCH_MAX_SYMBOLS); q->z_tmp = srslte_vec_malloc(sizeof(cf_t)*SRSLTE_PUCCH_MAX_SYMBOLS); - q->ce = srslte_vec_malloc(sizeof(cf_t)*SRSLTE_PUCCH_MAX_SYMBOLS); + + if (!q->is_ue) { + q->ce = srslte_vec_malloc(sizeof(cf_t)*SRSLTE_PUCCH_MAX_SYMBOLS); + } q->threshold_format1 = 0.8; @@ -449,13 +458,28 @@ clean_exit: return ret; } +int srslte_pucch_init_ue(srslte_pucch_t *q) { + return srslte_pucch_init_(q, true); +} + +int srslte_pucch_init_enb(srslte_pucch_t *q) { + return srslte_pucch_init_(q, false); +} + void srslte_pucch_free(srslte_pucch_t *q) { if (q->users) { - for (int rnti=0;rnti<=SRSLTE_SIRNTI;rnti++) { - srslte_pucch_clear_rnti(q, rnti); + if (q->is_ue) { + srslte_pucch_clear_rnti(q, 0); + } else { + for (int rnti = 0; rnti <= SRSLTE_SIRNTI; rnti++) { + srslte_pucch_clear_rnti(q, rnti); + } } free(q->users); } + + srslte_sequence_free(&q->tmp_seq); + srslte_uci_cqi_pucch_free(&q->cqi); if (q->z) { free(q->z); @@ -466,7 +490,7 @@ void srslte_pucch_free(srslte_pucch_t *q) { if (q->ce) { free(q->ce); } - + srslte_modem_table_free(&q->mod); bzero(q, sizeof(srslte_pucch_t)); } @@ -497,31 +521,45 @@ int srslte_pucch_set_cell(srslte_pucch_t *q, srslte_cell_t cell) { void srslte_pucch_clear_rnti(srslte_pucch_t *q, uint16_t rnti) { - if (q->users[rnti]) { + uint32_t rnti_idx = q->is_ue?0:rnti; + + if (q->users[rnti_idx]) { for (int i = 0; i < SRSLTE_NSUBFRAMES_X_FRAME; i++) { - srslte_sequence_free(&q->users[rnti]->seq_f2[i]); + srslte_sequence_free(&q->users[rnti_idx]->seq_f2[i]); } - free(q->users[rnti]); - q->users[rnti] = NULL; + free(q->users[rnti_idx]); + q->users[rnti_idx] = NULL; + q->ue_rnti = 0; } } int srslte_pucch_set_crnti(srslte_pucch_t *q, uint16_t rnti) { - if (!q->users[rnti]) { - q->users[rnti] = calloc(1, sizeof(srslte_pucch_user_t)); - if (q->users[rnti]) { - for (uint32_t sf_idx=0;sf_idxusers[rnti]->seq_f2[sf_idx], rnti, 2*sf_idx, q->cell.id)) { - fprintf(stderr, "Error computing PUCCH Format 2 scrambling sequence\n"); - srslte_pucch_clear_rnti(q, rnti); - return SRSLTE_ERROR; - } + + uint32_t rnti_idx = q->is_ue?0:rnti; + if (!q->users[rnti_idx] || q->is_ue) { + if (!q->users[rnti_idx]) { + q->users[rnti_idx] = calloc(1, sizeof(srslte_pucch_user_t)); + if (!q->users[rnti_idx]) { + perror("calloc"); + return -1; } - q->users[rnti]->sequence_generated = true; } + q->users[rnti_idx]->sequence_generated = false; + for (uint32_t sf_idx=0;sf_idxusers[rnti_idx]->seq_f2[sf_idx], rnti, 2*sf_idx, q->cell.id)) { + fprintf(stderr, "Error computing PUCCH Format 2 scrambling sequence\n"); + srslte_pucch_clear_rnti(q, rnti); + return SRSLTE_ERROR; + } + } + q->ue_rnti = rnti; + q->users[rnti_idx]->cell_id = q->cell.id; + q->users[rnti_idx]->sequence_generated = true; + } else { + fprintf(stderr, "Error generating PUSCH sequence: rnti=0x%x already generated\n", rnti); } - return SRSLTE_SUCCESS; + return SRSLTE_SUCCESS; } bool srslte_pucch_set_cfg(srslte_pucch_t *q, srslte_pucch_cfg_t *cfg, bool group_hopping_en) @@ -592,11 +630,36 @@ int srslte_pucch_format2ab_mod_bits(srslte_pucch_format_t format, uint8_t bits[2 } } +static srslte_sequence_t *get_user_sequence(srslte_pucch_t *q, uint16_t rnti, uint32_t sf_idx) +{ + uint32_t rnti_idx = q->is_ue?0:rnti; + + // The scrambling sequence is pregenerated for all RNTIs in the eNodeB but only for C-RNTI in the UE + if (rnti >= SRSLTE_CRNTI_START && rnti < SRSLTE_CRNTI_END) { + if (q->users[rnti_idx] && + q->users[rnti_idx]->sequence_generated && + q->users[rnti_idx]->cell_id == q->cell.id && + (!q->is_ue || q->ue_rnti == rnti)) + { + return &q->users[rnti_idx]->seq_f2[sf_idx]; + } else { + if (srslte_sequence_pucch(&q->tmp_seq, rnti, 2 * sf_idx, q->cell.id)) { + fprintf(stderr, "Error computing PUCCH Format 2 scrambling sequence\n"); + return NULL; + } + return &q->tmp_seq; + } + } else { + fprintf(stderr, "Invalid RNTI=0x%x\n", rnti); + return NULL; + } +} + /* Encode PUCCH bits according to Table 5.4.1-1 in Section 5.4.1 of 36.211 */ static int uci_mod_bits(srslte_pucch_t *q, srslte_pucch_format_t format, uint8_t bits[SRSLTE_PUCCH_MAX_BITS], uint32_t sf_idx, uint16_t rnti) { uint8_t tmp[2]; - + srslte_sequence_t *seq; switch(format) { case SRSLTE_PUCCH_FORMAT_1: q->d[0] = uci_encode_format1(); @@ -612,12 +675,13 @@ static int uci_mod_bits(srslte_pucch_t *q, srslte_pucch_format_t format, uint8_t case SRSLTE_PUCCH_FORMAT_2: case SRSLTE_PUCCH_FORMAT_2A: case SRSLTE_PUCCH_FORMAT_2B: - if (q->users[rnti] && q->users[rnti]->sequence_generated) { + seq = get_user_sequence(q, rnti, sf_idx); + if (seq) { memcpy(q->bits_scram, bits, SRSLTE_PUCCH2_NOF_BITS*sizeof(uint8_t)); - srslte_scrambling_b(&q->users[rnti]->seq_f2[sf_idx], q->bits_scram); + srslte_scrambling_b(seq, q->bits_scram); srslte_mod_modulate(&q->mod, q->bits_scram, q->d, SRSLTE_PUCCH2_NOF_BITS); } else { - fprintf(stderr, "Error modulating PUCCH2 bits: rnti not set\n"); + fprintf(stderr, "Error modulating PUCCH2 bits: could not generate sequence\n"); return -1; } break; @@ -745,6 +809,8 @@ int srslte_pucch_decode(srslte_pucch_t* q, srslte_pucch_format_t format, uint8_t bits[SRSLTE_PUCCH_MAX_BITS], uint32_t nof_bits) { int ret = SRSLTE_ERROR_INVALID_INPUTS; + srslte_sequence_t *seq; + if (q != NULL && ce != NULL && sf_symbols != NULL) @@ -843,7 +909,8 @@ int srslte_pucch_decode(srslte_pucch_t* q, srslte_pucch_format_t format, case SRSLTE_PUCCH_FORMAT_2: case SRSLTE_PUCCH_FORMAT_2A: case SRSLTE_PUCCH_FORMAT_2B: - if (q->users[rnti] && q->users[rnti]->sequence_generated) { + seq = get_user_sequence(q, rnti, sf_idx); + if (seq) { pucch_encode_(q, format, n_pucch, sf_idx, rnti, NULL, ref, true); srslte_vec_prod_conj_ccc(q->z, ref, q->z_tmp, SRSLTE_PUCCH_MAX_SYMBOLS); for (int i=0;iz, llr_pucch2, SRSLTE_PUCCH2_NOF_BITS/2); - srslte_scrambling_s(&q->users[rnti]->seq_f2[sf_idx], llr_pucch2); + srslte_scrambling_s(seq, llr_pucch2); q->last_corr = (float) srslte_uci_decode_cqi_pucch(&q->cqi, llr_pucch2, bits, nof_bits)/2000; ret = 1; } else { - fprintf(stderr, "Decoding PUCCH2: rnti not set\n"); + fprintf(stderr, "Decoding PUCCH2: could not generate sequence\n"); return -1; } break; diff --git a/lib/src/phy/phch/pusch.c b/lib/src/phy/phch/pusch.c index 43bd08537..3f6a082f8 100644 --- a/lib/src/phy/phch/pusch.c +++ b/lib/src/phy/phch/pusch.c @@ -481,16 +481,24 @@ static srslte_sequence_t *get_user_sequence(srslte_pusch_t *q, uint16_t rnti, ui { uint32_t rnti_idx = q->is_ue?0:rnti; - // The scrambling sequence is pregenerated for all RNTIs in the eNodeB but only for C-RNTI in the UE - if (q->users[rnti_idx] && q->users[rnti_idx]->sequence_generated && - q->users[rnti_idx]->cell_id == q->cell.id && - q->ue_rnti == rnti && - ((rnti >= SRSLTE_CRNTI_START && rnti < SRSLTE_CRNTI_END) || !q->is_ue)) - { - return &q->users[rnti_idx]->seq[sf_idx]; + if (rnti >= SRSLTE_CRNTI_START && rnti < SRSLTE_CRNTI_END) { + // The scrambling sequence is pregenerated for all RNTIs in the eNodeB but only for C-RNTI in the UE + if (q->users[rnti_idx] && + q->users[rnti_idx]->sequence_generated && + q->users[rnti_idx]->cell_id == q->cell.id && + (!q->is_ue || q->ue_rnti == rnti)) + { + return &q->users[rnti_idx]->seq[sf_idx]; + } else { + if (srslte_sequence_pusch(&q->tmp_seq, rnti, 2 * sf_idx, q->cell.id, len)) { + fprintf(stderr, "Error generating temporal scrambling sequence\n"); + return NULL; + } + return &q->tmp_seq; + } } else { - srslte_sequence_pusch(&q->tmp_seq, rnti, 2 * sf_idx, q->cell.id, len); - return &q->tmp_seq; + fprintf(stderr, "Invalid RNTI=0x%x\n", rnti); + return NULL; } } @@ -603,7 +611,11 @@ int srslte_pusch_decode(srslte_pusch_t *q, srslte_dft_precoding(&q->dft_precoding, q->z, q->d, cfg->grant.L_prb, cfg->nbits.nof_symb); // Soft demodulation - srslte_demod_soft_demodulate_s(cfg->grant.mcs.mod, q->d, q->q, cfg->nbits.nof_re); + if (q->llr_is_8bit) { + srslte_demod_soft_demodulate_b(cfg->grant.mcs.mod, q->d, q->q, cfg->nbits.nof_re); + } else { + srslte_demod_soft_demodulate_s(cfg->grant.mcs.mod, q->d, q->q, cfg->nbits.nof_re); + } // Generate scrambling sequence if not pre-generated srslte_sequence_t *seq = get_user_sequence(q, rnti, cfg->sf_idx, cfg->nbits.nof_bits); @@ -632,7 +644,11 @@ int srslte_pusch_decode(srslte_pusch_t *q, } // Descrambling - srslte_scrambling_s_offset(seq, q->q, 0, cfg->nbits.nof_bits); + if (q->llr_is_8bit) { + srslte_scrambling_sb_offset(seq, q->q, 0, cfg->nbits.nof_bits); + } else { + srslte_scrambling_s_offset(seq, q->q, 0, cfg->nbits.nof_bits); + } // Decode ret = srslte_ulsch_uci_decode(&q->ul_sch, cfg, softbuffer, q->q, q->g, data, uci_data); diff --git a/lib/src/phy/phch/sch.c b/lib/src/phy/phch/sch.c index 2d7290e9d..f9ea054a0 100644 --- a/lib/src/phy/phch/sch.c +++ b/lib/src/phy/phch/sch.c @@ -32,12 +32,18 @@ #include #include #include +#include #include "srslte/phy/phch/pdsch.h" #include "srslte/phy/utils/bit.h" #include "srslte/phy/utils/debug.h" #include "srslte/phy/utils/vector.h" -#define SRSLTE_PDSCH_MAX_TDEC_ITERS 4 +#define SRSLTE_PDSCH_MAX_TDEC_ITERS 10 + +#ifdef LV_HAVE_SSE +#include +#endif /* LV_HAVE_SSE */ + /* 36.213 Table 8.6.3-1: Mapping of HARQ-ACK offset values and the index signalled by higher layers */ float beta_harq_offset[16] = {2.0, 2.5, 3.125, 4.0, 5.0, 6.250, 8.0, 10.0, @@ -184,8 +190,6 @@ static int encode_tb_off(srslte_sch_t *q, uint32_t Qm, uint32_t rv, uint32_t nof_e_bits, uint8_t *data, uint8_t *e_bits, uint32_t w_offset) { - uint8_t parity[3] = {0, 0, 0}; - uint32_t par; uint32_t i; uint32_t cb_len=0, rp=0, wp=0, rlen=0, n_e=0; int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -213,17 +217,9 @@ static int encode_tb_off(srslte_sch_t *q, gamma = Gp%cb_segm->C; } - if (data) { + /* Reset TB CRC */ + srslte_crc_set_init(&q->crc_tb, 0); - /* Compute transport block CRC */ - par = srslte_crc_checksum_byte(&q->crc_tb, data, cb_segm->tbs); - - /* parity bits will be appended later */ - parity[0] = (par&(0xff<<16))>>16; - parity[1] = (par&(0xff<<8))>>8; - parity[2] = par&0xff; - } - wp = 0; rp = 0; for (i = 0; i < cb_segm->C; i++) { @@ -252,6 +248,7 @@ static int encode_tb_off(srslte_sch_t *q, cb_len, rlen, wp, rp, n_e); if (data) { + bool last_cb = false; /* Copy data to another buffer, making space for the Codeblock CRC */ if (i < cb_segm->C - 1) { @@ -263,13 +260,19 @@ static int encode_tb_off(srslte_sch_t *q, /* Append Transport Block parity bits to the last CB */ memcpy(q->cb_in, &data[rp/8], (rlen - 24) * sizeof(uint8_t)/8); - memcpy(&q->cb_in[(rlen - 24)/8], parity, 3 * sizeof(uint8_t)); + last_cb = true; } /* Turbo Encoding * If Codeblock CRC is required it is given the CRC instance pointer, otherwise CRC pointer shall be NULL */ - srslte_tcod_encode_lut(&q->encoder, (cb_segm->C > 1) ? &q->crc_cb : NULL, q->cb_in, q->parity_bits, cblen_idx); + srslte_tcod_encode_lut(&q->encoder, + &q->crc_tb, + (cb_segm->C > 1) ? &q->crc_cb : NULL, + q->cb_in, + q->parity_bits, + cblen_idx, + last_cb); } DEBUG("RM cblen_idx=%d, n_e=%d, wp=%d, nof_e_bits=%d\n",cblen_idx, n_e, wp, nof_e_bits); @@ -304,142 +307,117 @@ static int encode_tb(srslte_sch_t *q, bool decode_tb_cb(srslte_sch_t *q, srslte_softbuffer_rx_t *softbuffer, srslte_cbsegm_t *cb_segm, uint32_t Qm, uint32_t rv, uint32_t nof_e_bits, - int16_t *e_bits, uint8_t *data, - uint32_t cb_size_group) + void *e_bits, uint8_t *data) { - bool cb_map[SRSLTE_MAX_CODEBLOCKS]; - - uint32_t cb_idx[SRSLTE_TDEC_MAX_NPAR]; - int16_t *decoder_input[SRSLTE_TDEC_MAX_NPAR]; - - uint32_t nof_cb = cb_size_group?cb_segm->C2:cb_segm->C1; - uint32_t first_cb = cb_size_group?cb_segm->C1:0; - uint32_t cb_len = cb_size_group?cb_segm->K2:cb_segm->K1; - uint32_t cb_len_idx = cb_size_group?cb_segm->K2_idx:cb_segm->K1_idx; - - uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24); - uint32_t Gp = nof_e_bits / Qm; - uint32_t gamma = cb_segm->C>0?Gp%cb_segm->C:Gp; - uint32_t n_e = Qm * (Gp/cb_segm->C); - - if (nof_cb > SRSLTE_MAX_CODEBLOCKS) { + int8_t *e_bits_b = e_bits; + int16_t *e_bits_s = e_bits; + + if (cb_segm->C > SRSLTE_MAX_CODEBLOCKS) { fprintf(stderr, "Error SRSLTE_MAX_CODEBLOCKS=%d\n", SRSLTE_MAX_CODEBLOCKS); - return false; - } - - for (int i=0;idecoder);i++) { - cb_idx[i] = i+first_cb; - decoder_input[i] = NULL; - } - - uint32_t remaining_cb = 0; - for (int i=0;icb_crc[i]; - if (softbuffer->cb_crc[i] == false) { - remaining_cb ++; - } + return false; } - - srslte_tdec_reset(&q->decoder, cb_len); - + q->nof_iterations = 0; - while(remaining_cb>0) { - - // Unratematch the codeblocks left to decode - for (int i=0;idecoder);i++) { - - if (!decoder_input[i] && remaining_cb > 0) { - // Find an unprocessed CB - cb_idx[i]=first_cb; - while(cb_idx[i]C;cb_idx++) + { + /* Do not process blocks with CRC Ok */ + if (softbuffer->cb_crc[cb_idx] == false) { + + uint32_t cb_len = cb_idxC1?cb_segm->K1:cb_segm->K2; + uint32_t cb_len_idx = cb_idxC1?cb_segm->K1_idx:cb_segm->K2_idx; + + uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24); + uint32_t Gp = nof_e_bits / Qm; + uint32_t gamma = cb_segm->C>0?Gp%cb_segm->C:Gp; + uint32_t n_e = Qm * (Gp/cb_segm->C); + + uint32_t rp = cb_idx*n_e; + uint32_t n_e2 = n_e; + + if (cb_idx > cb_segm->C - gamma) { + n_e2 = n_e+Qm; + rp = (cb_segm->C - gamma)*n_e + (cb_idx-(cb_segm->C - gamma))*n_e2; + } + + if (q->llr_is_8bit) { + if (srslte_rm_turbo_rx_lut_8bit(&e_bits_b[rp], (int8_t*) softbuffer->buffer_f[cb_idx], n_e2, cb_len_idx, rv)) { + fprintf(stderr, "Error in rate matching\n"); + return SRSLTE_ERROR; } - if (cb_map[cb_idx[i]] == false) { - cb_map[cb_idx[i]] = true; - - uint32_t rp = cb_idx[i]*n_e; - uint32_t n_e2 = n_e; - - if (cb_idx[i] > cb_segm->C - gamma) { - n_e2 = n_e+Qm; - rp = (cb_segm->C - gamma)*n_e + (cb_idx[i]-(cb_segm->C - gamma))*n_e2; - } - - INFO("CB %d: rp=%d, n_e=%d, i=%d\n", cb_idx[i], rp, n_e2, i); - if (srslte_rm_turbo_rx_lut(&e_bits[rp], softbuffer->buffer_f[cb_idx[i]], n_e2, cb_len_idx, rv)) { - fprintf(stderr, "Error in rate matching\n"); - return SRSLTE_ERROR; - } - - decoder_input[i] = softbuffer->buffer_f[cb_idx[i]]; + } else { + if (srslte_rm_turbo_rx_lut(&e_bits_s[rp], softbuffer->buffer_f[cb_idx], n_e2, cb_len_idx, rv)) { + fprintf(stderr, "Error in rate matching\n"); + return SRSLTE_ERROR; } } - } - - // Run 1 iteration for the codeblocks in queue - srslte_tdec_iteration_par(&q->decoder, decoder_input, cb_len); - // Decide output bits and compute CRC - for (int i=0;idecoder);i++) { - if (decoder_input[i]) { - srslte_tdec_decision_byte_par_cb(&q->decoder, q->cb_in, i, cb_len); + srslte_tdec_new_cb(&q->decoder, cb_len); + + // Run iterations and use CRC for early stopping + bool early_stop = false; + uint32_t cb_noi = 0; + do { + if (q->llr_is_8bit) { + srslte_tdec_iteration_8bit(&q->decoder, (int8_t*) softbuffer->buffer_f[cb_idx], &data[cb_idx*rlen/8]); + } else { + srslte_tdec_iteration(&q->decoder, softbuffer->buffer_f[cb_idx], &data[cb_idx*rlen/8]); + } + q->nof_iterations++; + cb_noi++; + + uint32_t len_crc; + srslte_crc_t *crc_ptr; - uint32_t len_crc; - srslte_crc_t *crc_ptr; - if (cb_segm->C > 1) { - len_crc = cb_len; - crc_ptr = &q->crc_cb; + len_crc = cb_len; + crc_ptr = &q->crc_cb; } else { - len_crc = cb_segm->tbs+24; - crc_ptr = &q->crc_tb; + len_crc = cb_segm->tbs+24; + crc_ptr = &q->crc_tb; } // CRC is OK - if (!srslte_crc_checksum_byte(crc_ptr, q->cb_in, len_crc)) { - - memcpy(softbuffer->data[cb_idx[i]], q->cb_in, rlen/8 * sizeof(uint8_t)); - softbuffer->cb_crc[cb_idx[i]] = true; - - q->nof_iterations += srslte_tdec_get_nof_iterations_cb(&q->decoder, i); - - // Reset number of iterations for that CB in the decoder - srslte_tdec_reset_cb(&q->decoder, i); - remaining_cb--; - decoder_input[i] = NULL; - cb_idx[i] = 0; - - // CRC is error and exceeded maximum iterations for this CB. - // Early stop the whole transport block. - } else if (srslte_tdec_get_nof_iterations_cb(&q->decoder, i) >= q->max_iterations) { - INFO("CB %d: Error. CB is erroneous. remaining_cb=%d, i=%d, first_cb=%d, nof_cb=%d\n", - cb_idx[i], remaining_cb, i, first_cb, nof_cb); - - q->nof_iterations += q->max_iterations; - srslte_tdec_reset_cb(&q->decoder, i); - remaining_cb--; - decoder_input[i] = NULL; - cb_idx[i] = 0; + if (!srslte_crc_checksum_byte(crc_ptr, &data[cb_idx*rlen/8], len_crc)) { + + softbuffer->cb_crc[cb_idx] = true; + early_stop = true; + + // CRC is error and exceeded maximum iterations for this CB. + // Early stop the whole transport block. } - } - } + + } while (cb_noi < q->max_iterations && !early_stop); + + INFO("CB %d: rp=%d, n_e=%d, cb_len=%d, CRC=%s, rlen=%d, iterations=%d/%d\n", + cb_idx, rp, n_e2, cb_len, early_stop?"OK":"KO", rlen, cb_noi, q->max_iterations); + + } else { + // Copy decoded data from previous transmissions + uint32_t cb_len = cb_idxC1?cb_segm->K1:cb_segm->K2; + uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24); + memcpy(&data[cb_idx*rlen/8], softbuffer->data[cb_idx], rlen/8 * sizeof(uint8_t)); + } } softbuffer->tb_crc = true; - for (int i = 0; i < nof_cb && softbuffer->tb_crc; i++) { + for (int i = 0; i < cb_segm->C && softbuffer->tb_crc; i++) { /* If one CB failed return false */ softbuffer->tb_crc = softbuffer->cb_crc[i]; } - if (softbuffer->tb_crc) { - for (int i = 0; i < nof_cb; i++) { - memcpy(&data[i * rlen / 8], softbuffer->data[i], rlen/8 * sizeof(uint8_t)); + // If TB CRC failed, save correct CB for next retransmission + if (!softbuffer->tb_crc) { + for (int i = 0; i < cb_segm->C; i++) { + if (softbuffer->cb_crc[i]) { + uint32_t cb_len = iC1?cb_segm->K1:cb_segm->K2; + uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24); + memcpy(softbuffer->data[i], &data[i * rlen / 8], rlen/8 * sizeof(uint8_t)); + } } } - q->nof_iterations /= nof_cb; + q->nof_iterations /= cb_segm->C; return softbuffer->tb_crc; } @@ -484,18 +462,14 @@ static int decode_tb(srslte_sch_t *q, } bool crc_ok = true; - - uint32_t nof_cb_groups = cb_segm->C2>0?2:1; - + data[cb_segm->tbs/8+0] = 0; data[cb_segm->tbs/8+1] = 0; data[cb_segm->tbs/8+2] = 0; - // Process Codeblocks in groups of equal CB size to parallelize according to SRSLTE_TDEC_MAX_NPAR - for (uint32_t i=0;i> (6 - read_bit_idx)) & (uint8_t) 0x03; + q_bits[write_byte_idx] |= w << (6 - write_bit_idx); + + bit_read_idx += 2; + } + } + + for (uint32_t j = ri_min_row; j < rows; j++) { + for (uint32_t i = 0; i < cols; i++) { + uint32_t k = (i * rows + j) * 2; + + if (ri_present[k]) { + /* do nothing */ + } else { + uint32_t read_byte_idx = bit_read_idx / 8; + uint32_t read_bit_idx = bit_read_idx % 8; + uint32_t write_byte_idx = k / 8; + uint32_t write_bit_idx = k % 8; + uint8_t w = (g_bits[read_byte_idx] >> (6 - read_bit_idx)) & (uint8_t) 0x03; + q_bits[write_byte_idx] |= w << (6 - write_bit_idx); + + bit_read_idx += 2; + } + } + } +} + +static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols, uint8_t *q_bits, uint32_t ri_min_row, const uint8_t *ri_present) { + uint32_t bit_read_idx = 0; + + for (uint32_t j = 0; j < ri_min_row; j++) { + int32_t i = 0; + +#ifndef LV_HAVE_SSE + __m128i _counter = _mm_slli_epi32(_mm_add_epi32(_mm_mullo_epi32(_counter0,_rows),_mm_set1_epi32(j)), 2); + uint8_t *_g_bits = &g_bits[bit_read_idx/8]; + + /* First bits are aligned to byte */ + if (0 == (bit_read_idx & 0x3)) { + for (; i < (cols - 3); i += 4) { + + uint8_t w1 = *(_g_bits++); + uint8_t w2 = *(_g_bits++); + + __m128i _write_byte_idx = _mm_srli_epi32(_counter, 3); + __m128i _write_bit_idx = _mm_and_si128(_counter, _7); + __m128i _write_shift = _mm_sub_epi32(_4, _write_bit_idx); + + q_bits[_mm_extract_epi32(_write_byte_idx, 0)] |= (w1 >> 0x4) << _mm_extract_epi32(_write_shift, 0); + q_bits[_mm_extract_epi32(_write_byte_idx, 1)] |= (w1 & 0xf) << _mm_extract_epi32(_write_shift, 1); + q_bits[_mm_extract_epi32(_write_byte_idx, 2)] |= (w2 >> 0x4) << _mm_extract_epi32(_write_shift, 2); + q_bits[_mm_extract_epi32(_write_byte_idx, 3)] |= (w2 & 0xf) << _mm_extract_epi32(_write_shift, 3); + _counter = _mm_add_epi32(_counter, _inc); + } + } else { + for (; i < (cols - 3); i += 4) { + __m128i _write_byte_idx = _mm_srli_epi32(_counter, 3); + __m128i _write_bit_idx = _mm_and_si128(_counter, _7); + __m128i _write_shift = _mm_sub_epi32(_4, _write_bit_idx); + + uint8_t w1 = *(_g_bits); + uint8_t w2 = *(_g_bits++); + uint8_t w3 = *(_g_bits++); + q_bits[_mm_extract_epi32(_write_byte_idx, 0)] |= (w1 & 0xf) << _mm_extract_epi32(_write_shift, 0); + q_bits[_mm_extract_epi32(_write_byte_idx, 1)] |= (w2 >> 0x4) << _mm_extract_epi32(_write_shift, 1); + q_bits[_mm_extract_epi32(_write_byte_idx, 2)] |= (w2 & 0xf) << _mm_extract_epi32(_write_shift, 2); + q_bits[_mm_extract_epi32(_write_byte_idx, 3)] |= (w3 >> 0x4) << _mm_extract_epi32(_write_shift, 3); + + _counter = _mm_add_epi32(_counter, _inc); + } + } + bit_read_idx += i * 4; +#endif /* LV_HAVE_SSE */ + + /* Spare bits */ + for (; i < cols; i++) { + uint32_t k = (i * rows + j) * 4; + + uint32_t read_byte_idx = bit_read_idx / 8; + uint32_t read_bit_idx = bit_read_idx % 8; + uint32_t write_byte_idx = k / 8; + uint32_t write_bit_idx = k % 8; + uint8_t w = (g_bits[read_byte_idx] >> (4 - read_bit_idx)) & (uint8_t) 0x0f; + q_bits[write_byte_idx] |= w << (4 - write_bit_idx); + + bit_read_idx += 4; + } + } + + /* Do rows containing RI */ + for (uint32_t j = ri_min_row; j < rows; j++) { + for (uint32_t i = 0; i < cols; i++) { + uint32_t k = (i * rows + j) * 4; + + if (ri_present[k]) { + /* do nothing */ + } else { + uint32_t read_byte_idx = bit_read_idx / 8; + uint32_t read_bit_idx = bit_read_idx % 8; + uint32_t write_byte_idx = k / 8; + uint32_t write_bit_idx = k % 8; + uint8_t w = (g_bits[read_byte_idx] >> (4 - read_bit_idx)) & (uint8_t) 0x0f; + q_bits[write_byte_idx] |= w << (4 - write_bit_idx); + + bit_read_idx += 4; + } + } + + } +} + +static void ulsch_interleave_qm6(const uint8_t *g_bits, + uint32_t rows, + uint32_t cols, + uint8_t *q_bits, + uint32_t ri_min_row, + const uint8_t *ri_present) { + uint32_t bit_read_idx = 0; + + for (uint32_t j = 0; j < ri_min_row; j++) { + for (uint32_t i = 0; i < cols; i++) { + uint32_t k = (i * rows + j) * 6; + + uint32_t read_byte_idx = bit_read_idx / 8; + uint32_t read_bit_idx = bit_read_idx % 8; + uint32_t write_byte_idx = k / 8; + uint32_t write_bit_idx = k % 8; + uint8_t w; + + switch (read_bit_idx) { + case 0: + w = g_bits[read_byte_idx] >> 2; + break; + case 2: + w = g_bits[read_byte_idx] & (uint8_t) 0x3f; + break; + case 4: + w = ((g_bits[read_byte_idx] << 2) | (g_bits[read_byte_idx + 1] >> 6)) & (uint8_t) 0x3f; + break; + case 6: + w = ((g_bits[read_byte_idx] << 4) | (g_bits[read_byte_idx + 1] >> 4)) & (uint8_t) 0x3f; + break; + default: + w = 0; + } + + switch (write_bit_idx) { + case 0: + q_bits[write_byte_idx] |= w << 2; + break; + case 2: + q_bits[write_byte_idx] |= w; + break; + case 4: + q_bits[write_byte_idx] |= w >> 2; + q_bits[write_byte_idx + 1] |= w << 6; + break; + case 6: + q_bits[write_byte_idx] |= w >> 4; + q_bits[write_byte_idx + 1] |= w << 4; + break; + default: + /* Do nothing */; + } + + bit_read_idx += 6; + } + } + + for (uint32_t j = ri_min_row; j < rows; j++) { + for (uint32_t i = 0; i < cols; i++) { + uint32_t k = (i * rows + j) * 6; + + if (ri_present[k]) { + /* do nothing */ + } else { + uint32_t read_byte_idx = bit_read_idx / 8; + uint32_t read_bit_idx = bit_read_idx % 8; + uint32_t write_byte_idx = k / 8; + uint32_t write_bit_idx = k % 8; + uint8_t w; + + switch (read_bit_idx) { + case 0: + w = g_bits[read_byte_idx] >> 2; + break; + case 2: + w = g_bits[read_byte_idx] & (uint8_t) 0x3f; + break; + case 4: + w = ((g_bits[read_byte_idx] << 2) | (g_bits[read_byte_idx + 1] >> 6)) & (uint8_t) 0x3f; + break; + case 6: + w = ((g_bits[read_byte_idx] << 4) | (g_bits[read_byte_idx + 1] >> 4)) & (uint8_t) 0x3f; + break; + default: + w = 0; + } + + switch (write_bit_idx) { + case 0: + q_bits[write_byte_idx] |= w << 2; + break; + case 2: + q_bits[write_byte_idx] |= w; + break; + case 4: + q_bits[write_byte_idx] |= w >> 2; + q_bits[write_byte_idx + 1] |= w << 6; + break; + case 6: + q_bits[write_byte_idx] |= w >> 4; + q_bits[write_byte_idx + 1] |= w << 4; + break; + default: + /* Do nothing */; + } + + bit_read_idx += 6; + } + } + } +} + /* UL-SCH channel interleaver according to 5.2.2.8 of 36.212 */ void ulsch_interleave(uint8_t *g_bits, uint32_t Qm, uint32_t H_prime_total, uint32_t N_pusch_symbs, uint8_t *q_bits, srslte_uci_bit_t *ri_bits, uint32_t nof_ri_bits, uint8_t *ri_present, uint32_t *inteleaver_lut) { - + + const uint32_t nof_bits = H_prime_total * Qm; + uint32_t rows = H_prime_total / N_pusch_symbs; + uint32_t cols = N_pusch_symbs; + uint32_t ri_min_row = rows; + // Prepare ri_bits for fast search using temp_buffer if (nof_ri_bits > 0) { for (uint32_t i=0;i 0) { for (uint32_t i=0;i -#include #include #include -#include #include "srslte/srslte.h" #include "rf_blade_imp.h" -#include "srslte/phy/rf/rf.h" -#define CONVERT_BUFFER_SIZE 240*1024 +#define CONVERT_BUFFER_SIZE (240*1024) typedef struct { struct bladerf *dev; - uint32_t rx_rate; - uint32_t tx_rate; + bladerf_sample_rate rx_rate; + bladerf_sample_rate tx_rate; int16_t rx_buffer[CONVERT_BUFFER_SIZE]; int16_t tx_buffer[CONVERT_BUFFER_SIZE]; bool rx_stream_enabled; @@ -83,7 +80,7 @@ int rf_blade_start_tx_stream(void *h) rf_blade_handler_t *handler = (rf_blade_handler_t*) h; status = bladerf_sync_config(handler->dev, - BLADERF_MODULE_TX, + BLADERF_TX_X1, BLADERF_FORMAT_SC16_Q11_META, num_buffers, buffer_size_tx, @@ -112,7 +109,7 @@ int rf_blade_start_rx_stream(void *h, bool now) uint32_t buffer_size_rx = ms_buffer_size_rx*(handler->rx_rate/1000/1024); status = bladerf_sync_config(handler->dev, - BLADERF_MODULE_RX, + BLADERF_RX_X1, BLADERF_FORMAT_SC16_Q11_META, num_buffers, buffer_size_rx, @@ -123,7 +120,7 @@ int rf_blade_start_rx_stream(void *h, bool now) return status; } status = bladerf_sync_config(handler->dev, - BLADERF_MODULE_TX, + BLADERF_TX_X1, BLADERF_FORMAT_SC16_Q11_META, num_buffers, buffer_size_tx, @@ -186,6 +183,8 @@ int rf_blade_open_multi(char *args, void **h, uint32_t nof_channels) int rf_blade_open(char *args, void **h) { + const struct bladerf_range *range_tx = NULL; + const struct bladerf_range *range_rx = NULL; *h = NULL; rf_blade_handler_t *handler = (rf_blade_handler_t*) malloc(sizeof(rf_blade_handler_t)); @@ -201,33 +200,45 @@ int rf_blade_open(char *args, void **h) fprintf(stderr, "Unable to open device: %s\n", bladerf_strerror(status)); return status; } + + status = bladerf_set_gain_mode(handler->dev, BLADERF_MODULE_RX, BLADERF_GAIN_MGC); + if (status) { + fprintf(stderr, "Unable to open device: %s\n", bladerf_strerror(status)); + return status; + } //bladerf_log_set_verbosity(BLADERF_LOG_LEVEL_VERBOSE); - /* Configure the gains of the RX LNA and RX VGA1*/ - status = bladerf_set_lna_gain(handler->dev, BLADERF_LNA_GAIN_MAX); - if (status != 0) { - fprintf(stderr, "Failed to set RX LNA gain: %s\n", bladerf_strerror(status)); + /* Get Gain ranges and set Rx to maximum */ + status = bladerf_get_gain_range(handler->dev, BLADERF_MODULE_RX, &range_rx); + if ((status != 0) | (range_rx == NULL)) { + fprintf(stderr, "Failed to get RX gain range: %s\n", bladerf_strerror(status)); return status; } - status = bladerf_set_rxvga1(handler->dev, 27); - if (status != 0) { - fprintf(stderr, "Failed to set RX VGA1 gain: %s\n", bladerf_strerror(status)); + + bladerf_get_gain_range(handler->dev, BLADERF_MODULE_RX, &range_tx); + if ((status != 0) | (range_tx == NULL)) { + fprintf(stderr, "Failed to get TX gain range: %s\n", bladerf_strerror(status)); return status; } - status = bladerf_set_txvga1(handler->dev, BLADERF_TXVGA1_GAIN_MAX); + + status = bladerf_set_gain(handler->dev, BLADERF_MODULE_RX, (bladerf_gain) range_rx->max); if (status != 0) { - fprintf(stderr, "Failed to set TX VGA1 gain: %s\n", bladerf_strerror(status)); + fprintf(stderr, "Failed to set RX LNA gain: %s\n", bladerf_strerror(status)); return status; } handler->rx_stream_enabled = false; handler->tx_stream_enabled = false; + /* Set default sampling rates */ + rf_blade_set_tx_srate(handler, 1.92e6); + rf_blade_set_rx_srate(handler, 1.92e6); + /* Set info structure */ - handler->info.min_tx_gain = BLADERF_TXVGA2_GAIN_MIN; - handler->info.max_tx_gain = BLADERF_TXVGA2_GAIN_MAX; - handler->info.min_rx_gain = BLADERF_RXVGA2_GAIN_MIN; - handler->info.max_rx_gain = BLADERF_RXVGA2_GAIN_MAX; + handler->info.min_tx_gain = range_tx->min; + handler->info.max_tx_gain = range_tx->max; + handler->info.min_rx_gain = range_rx->min; + handler->info.max_rx_gain = range_rx->max; return 0; } @@ -265,7 +276,7 @@ double rf_blade_set_rx_srate(void *h, double freq) return -1; } } else { - status = bladerf_set_bandwidth(handler->dev, BLADERF_MODULE_RX, handler->rx_rate*0.8, &bw); + status = bladerf_set_bandwidth(handler->dev, BLADERF_MODULE_RX, (bladerf_bandwidth) (handler->rx_rate * 0.8), &bw); if (status != 0) { fprintf(stderr, "Failed to set bandwidth = %u: %s\n", handler->rx_rate, bladerf_strerror(status)); return -1; @@ -295,10 +306,10 @@ double rf_blade_set_tx_srate(void *h, double freq) double rf_blade_set_rx_gain(void *h, double gain) { int status; - rf_blade_handler_t *handler = (rf_blade_handler_t*) h; - status = bladerf_set_rxvga2(handler->dev, (int) gain); + rf_blade_handler_t *handler = (rf_blade_handler_t*) h; + status = bladerf_set_gain(handler->dev, BLADERF_MODULE_RX, (bladerf_gain) gain); if (status != 0) { - fprintf(stderr, "Failed to set RX VGA2 gain: %s\n", bladerf_strerror(status)); + fprintf(stderr, "Failed to set RX gain: %s\n", bladerf_strerror(status)); return -1; } return rf_blade_get_rx_gain(h); @@ -307,10 +318,10 @@ double rf_blade_set_rx_gain(void *h, double gain) double rf_blade_set_tx_gain(void *h, double gain) { int status; - rf_blade_handler_t *handler = (rf_blade_handler_t*) h; - status = bladerf_set_txvga2(handler->dev, (int) gain); + rf_blade_handler_t *handler = (rf_blade_handler_t*) h; + status = bladerf_set_gain(handler->dev, BLADERF_MODULE_TX, (bladerf_gain) gain); if (status != 0) { - fprintf(stderr, "Failed to set TX VGA2 gain: %s\n", bladerf_strerror(status)); + fprintf(stderr, "Failed to set TX gain: %s\n", bladerf_strerror(status)); return -1; } return rf_blade_get_tx_gain(h); @@ -318,30 +329,30 @@ double rf_blade_set_tx_gain(void *h, double gain) double rf_blade_get_rx_gain(void *h) { - int status; - int gain; - rf_blade_handler_t *handler = (rf_blade_handler_t*) h; - status = bladerf_get_rxvga2(handler->dev, &gain); + int status; + bladerf_gain gain = 0; + rf_blade_handler_t *handler = (rf_blade_handler_t*) h; + status = bladerf_get_gain(handler->dev, BLADERF_MODULE_RX, &gain); if (status != 0) { - fprintf(stderr, "Failed to get RX VGA2 gain: %s\n", + fprintf(stderr, "Failed to get RX gain: %s\n", bladerf_strerror(status)); return -1; } - return gain; // Add rxvga1 and LNA + return gain; } double rf_blade_get_tx_gain(void *h) { - int status; - int gain; - rf_blade_handler_t *handler = (rf_blade_handler_t*) h; - status = bladerf_get_txvga2(handler->dev, &gain); + int status; + bladerf_gain gain = 0; + rf_blade_handler_t *handler = (rf_blade_handler_t*) h; + status = bladerf_get_gain(handler->dev, BLADERF_MODULE_TX, &gain); if (status != 0) { - fprintf(stderr, "Failed to get TX VGA2 gain: %s\n", + fprintf(stderr, "Failed to get TX gain: %s\n", bladerf_strerror(status)); return -1; } - return gain; // Add txvga1 + return gain; } srslte_rf_info_t *rf_blade_get_info(void *h) @@ -360,7 +371,7 @@ srslte_rf_info_t *rf_blade_get_info(void *h) double rf_blade_set_rx_freq(void *h, double freq) { rf_blade_handler_t *handler = (rf_blade_handler_t*) h; - uint32_t f_int = (uint32_t) round(freq); + bladerf_frequency f_int = (uint32_t) round(freq); int status = bladerf_set_frequency(handler->dev, BLADERF_MODULE_RX, f_int); if (status != 0) { fprintf(stderr, "Failed to set samplerate = %u: %s\n", @@ -369,7 +380,7 @@ double rf_blade_set_rx_freq(void *h, double freq) } f_int=0; bladerf_get_frequency(handler->dev, BLADERF_MODULE_RX, &f_int); - printf("set RX frequency to %u\n", f_int); + printf("set RX frequency to %lu\n", f_int); return freq; } @@ -377,7 +388,7 @@ double rf_blade_set_rx_freq(void *h, double freq) double rf_blade_set_tx_freq(void *h, double freq) { rf_blade_handler_t *handler = (rf_blade_handler_t*) h; - uint32_t f_int = (uint32_t) round(freq); + bladerf_frequency f_int = (uint32_t) round(freq); int status = bladerf_set_frequency(handler->dev, BLADERF_MODULE_TX, f_int); if (status != 0) { fprintf(stderr, "Failed to set samplerate = %u: %s\n", @@ -387,22 +398,22 @@ double rf_blade_set_tx_freq(void *h, double freq) f_int=0; bladerf_get_frequency(handler->dev, BLADERF_MODULE_TX, &f_int); - printf("set TX frequency to %u\n", f_int); + printf("set TX frequency to %lu\n", f_int); return freq; } void rf_blade_set_tx_cal(void *h, srslte_rf_cal_t *cal) { rf_blade_handler_t *handler = (rf_blade_handler_t*) h; - bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_FPGA_PHASE, cal->dc_gain); - bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_FPGA_GAIN, cal->dc_phase); + bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_FPGA_PHASE, cal->dc_phase); + bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_FPGA_GAIN, cal->dc_gain); bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_LMS_DCOFF_I, cal->iq_i); bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_LMS_DCOFF_Q, cal->iq_q); } void rf_blade_set_rx_cal(void *h, srslte_rf_cal_t *cal) { rf_blade_handler_t *handler = (rf_blade_handler_t*) h; - bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_FPGA_PHASE, cal->dc_gain); - bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_FPGA_GAIN, cal->dc_phase); + bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_FPGA_PHASE, cal->dc_phase); + bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_FPGA_GAIN, cal->dc_gain); bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_LMS_DCOFF_I, cal->iq_i); bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_LMS_DCOFF_Q, cal->iq_q); } @@ -431,7 +442,7 @@ void rf_blade_get_time(void *h, time_t *secs, double *frac_secs) rf_blade_handler_t *handler = (rf_blade_handler_t*) h; struct bladerf_metadata meta; - int status = bladerf_get_timestamp(handler->dev, BLADERF_MODULE_RX, &meta.timestamp); + int status = bladerf_get_timestamp(handler->dev, BLADERF_RX, &meta.timestamp); if (status != 0) { fprintf(stderr, "Failed to get current RX timestamp: %s\n", bladerf_strerror(status)); @@ -478,9 +489,9 @@ int rf_blade_recv_with_time(void *h, error.opt = meta.actual_count; error.type = SRSLTE_RF_ERROR_OVERFLOW; blade_error_handler(error); - } else { - fprintf(stderr, "Overrun detected in scheduled RX. " - "%u valid samples were read.\n\n", meta.actual_count); + } else { + /*fprintf(stderr, "Overrun detected in scheduled RX. " + "%u valid samples were read.\n\n", meta.actual_count);*/ } } diff --git a/lib/src/phy/rf/rf_soapy_imp.c b/lib/src/phy/rf/rf_soapy_imp.c index a0dcc6542..26ad5bac0 100644 --- a/lib/src/phy/rf/rf_soapy_imp.c +++ b/lib/src/phy/rf/rf_soapy_imp.c @@ -228,7 +228,7 @@ int rf_soapy_start_rx_stream(void *h, bool now) { rf_soapy_handler_t *handler = (rf_soapy_handler_t*) h; if(handler->rx_stream_active == false){ - if(SoapySDRDevice_activateStream(handler->device, handler->rxStream, 0, 0, 0) != 0) + if(SoapySDRDevice_activateStream(handler->device, handler->rxStream, SOAPY_SDR_HAS_TIME | SOAPY_SDR_END_BURST, 0, 0) != 0) return SRSLTE_ERROR; handler->rx_stream_active = true; } @@ -339,9 +339,19 @@ int rf_soapy_open_multi(char *args, void **h, uint32_t nof_rx_antennas) rf_soapy_set_rx_srate(handler, 1.92e6); rf_soapy_set_tx_srate(handler, 1.92e6); - if(SoapySDRDevice_getNumChannels(handler->device, SOAPY_SDR_RX) > 0){ + size_t channels = SoapySDRDevice_getNumChannels(handler->device, SOAPY_SDR_RX); + + if((channels > 0) && (nof_rx_antennas > 0)){ printf("Setting up RX stream\n"); - if(SoapySDRDevice_setupStream(handler->device, &(handler->rxStream), SOAPY_SDR_RX, SOAPY_SDR_CF32, NULL, 0, NULL) != 0) { + size_t numChannels = channels; + if (channels > nof_rx_antennas) { + numChannels = nof_rx_antennas; + } + size_t rx_channels[numChannels]; + for(int i = 0 ; i < numChannels ; i++) { + rx_channels[i] = i; + } + if(SoapySDRDevice_setupStream(handler->device, &(handler->rxStream), SOAPY_SDR_RX, SOAPY_SDR_CF32, rx_channels, numChannels, NULL) != 0) { printf("Rx setupStream fail: %s\n", SoapySDRDevice_lastError()); return SRSLTE_ERROR; } @@ -439,9 +449,11 @@ int rf_soapy_open_multi(char *args, void **h, uint32_t nof_rx_antennas) #if HAVE_ASYNC_THREAD bool start_async_thread = true; - if (strstr(args, "silent")) { - REMOVE_SUBSTRING_WITHCOMAS(args, "silent"); - start_async_thread = false; + if (args) { + if (strstr(args, "silent")) { + REMOVE_SUBSTRING_WITHCOMAS(args, "silent"); + start_async_thread = false; + } } #endif @@ -839,7 +851,7 @@ int rf_soapy_send_timed_multi(void *h, // Convert initial tx time if (has_time_spec) { - timeNs = secs * 1000000000; + timeNs = (long long)secs * 1000000000; timeNs = timeNs + (frac_secs * 1000000000); } diff --git a/lib/src/phy/rf/rf_uhd_imp.c b/lib/src/phy/rf/rf_uhd_imp.c index 202b47b8d..5439db828 100644 --- a/lib/src/phy/rf/rf_uhd_imp.c +++ b/lib/src/phy/rf/rf_uhd_imp.c @@ -788,17 +788,17 @@ int rf_uhd_recv_with_time_multi(void *h, rf_uhd_handler_t *handler = (rf_uhd_handler_t*) h; uhd_rx_metadata_handle *md = &handler->rx_md_first; size_t rxd_samples = 0; + size_t rxd_samples_total = 0; int trials = 0; if (blocking) { - int n = 0; - while (n < nsamples && trials < 100) { + while (rxd_samples_total < nsamples && trials < 100) { void *buffs_ptr[4]; for (int i=0;inof_rx_channels;i++) { cf_t *data_c = (cf_t*) data[i]; - buffs_ptr[i] = &data_c[n]; + buffs_ptr[i] = &data_c[rxd_samples_total]; } - size_t num_samps_left = nsamples - n; + size_t num_samps_left = nsamples - rxd_samples_total; size_t num_rx_samples = (num_samps_left > handler->rx_nof_samples) ? handler->rx_nof_samples : num_samps_left; rxd_samples = 0; @@ -814,7 +814,7 @@ int rf_uhd_recv_with_time_multi(void *h, uhd_rx_metadata_error_code(*md, &error_code); md = &handler->rx_md; - n += rxd_samples; + rxd_samples_total += rxd_samples; trials++; if (error_code == UHD_RX_METADATA_ERROR_CODE_OVERFLOW) { @@ -830,6 +830,7 @@ int rf_uhd_recv_with_time_multi(void *h, } } else { uhd_error error = uhd_rx_streamer_recv(handler->rx_stream, data, nsamples, md, 0.0, false, &rxd_samples); + rxd_samples_total = rxd_samples; if (error) { fprintf(stderr, "Error receiving from UHD: %d\n", error); log_rx_error(handler); @@ -839,7 +840,7 @@ int rf_uhd_recv_with_time_multi(void *h, if (secs && frac_secs) { uhd_rx_metadata_time_spec(handler->rx_md_first, secs, frac_secs); } - return nsamples; + return rxd_samples_total; } int rf_uhd_send_timed(void *h, diff --git a/lib/src/phy/scrambling/scrambling.c b/lib/src/phy/scrambling/scrambling.c index ca0342905..4aba1cb83 100644 --- a/lib/src/phy/scrambling/scrambling.c +++ b/lib/src/phy/scrambling/scrambling.c @@ -47,7 +47,12 @@ void srslte_scrambling_s(srslte_sequence_t *s, short *data) { void srslte_scrambling_s_offset(srslte_sequence_t *s, short *data, int offset, int len) { assert (len + offset <= s->cur_len); - srslte_vec_prod_sss(data, &s->c_short[offset], data, len); + srslte_vec_neg_sss(data, &s->c_short[offset], data, len); +} + +void srslte_scrambling_sb_offset(srslte_sequence_t *s, int8_t *data, int offset, int len) { + assert (len + offset <= s->cur_len); + srslte_vec_neg_bbb(data, &s->c_char[offset], data, len); } void srslte_scrambling_c(srslte_sequence_t *s, cf_t *data) { diff --git a/lib/src/phy/scrambling/test/scrambling_test.c b/lib/src/phy/scrambling/test/scrambling_test.c index 40722f09b..10a197cc1 100644 --- a/lib/src/phy/scrambling/test/scrambling_test.c +++ b/lib/src/phy/scrambling/test/scrambling_test.c @@ -177,6 +177,83 @@ int main(int argc, char **argv) { free(input_f); free(scrambled_f); + + int16_t *input_s, *scrambled_s; + + // Scramble also shorts + input_s= malloc(sizeof(int16_t) * seq.cur_len); + if (!input_s) { + perror("malloc"); + exit(-1); + } + scrambled_s = malloc(sizeof(int16_t) * seq.cur_len); + if (!scrambled_s) { + perror("malloc"); + exit(-1); + } + + for (i=0;ipucch)) { + if (srslte_pucch_init_ue(&q->pucch)) { fprintf(stderr, "Error creating PUSCH object\n"); goto clean_exit; } diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index 35156acaa..44f8af1ca 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -60,6 +60,8 @@ bool verbose = false; #define TEST(X, CODE) static bool test_##X (char *func_name, double *timing, uint32_t block_size) {\ struct timeval start, end;\ + bzero(&start, sizeof(start));\ + bzero(&end, sizeof(end));\ float mse = 0.0f;\ bool passed;\ strncpy(func_name, #X, 32);\ @@ -781,7 +783,8 @@ TEST(srslte_vec_apply_cfo, ) TEST(srslte_cfo_correct, - srslte_cfo_t srslte_cfo = {0}; + srslte_cfo_t srslte_cfo; + bzero(&srslte_cfo, sizeof(srslte_cfo)); MALLOC(cf_t, x); MALLOC(cf_t, z); @@ -807,7 +810,8 @@ TEST(srslte_cfo_correct, ) TEST(srslte_cfo_correct_change, - srslte_cfo_t srslte_cfo = {0}; + srslte_cfo_t srslte_cfo; + bzero(&srslte_cfo, sizeof(srslte_cfo)); MALLOC(cf_t, x); MALLOC(cf_t, z); diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 9c1b84fdb..367b18f32 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -58,7 +58,11 @@ void srslte_vec_sub_sss(const int16_t *x, const int16_t *y, int16_t *z, const ui srslte_vec_sub_sss_simd(x, y, z, len); } -// Noise estimation in chest_dl, interpolation +void srslte_vec_sub_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len) { + srslte_vec_sub_bbb_simd(x, y, z, len); +} + +// Noise estimation in chest_dl, interpolation void srslte_vec_sub_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len) { return srslte_vec_sub_fff((const float*) x,(const float*) y,(float*) z, 2*len); } @@ -100,10 +104,18 @@ void srslte_vec_convert_fi(const float *x, const float scale, int16_t *z, const srslte_vec_convert_fi_simd(x, z, scale, len); } +void srslte_vec_convert_fb(const float *x, const float scale, int8_t *z, const uint32_t len) { + srslte_vec_convert_fb_simd(x, z, scale, len); +} + void srslte_vec_lut_sss(const short *x, const unsigned short *lut, short *y, const uint32_t len) { srslte_vec_lut_sss_simd(x, lut, y, len); } +void srslte_vec_lut_bbb(const int8_t *x, const unsigned short *lut, int8_t *y, const uint32_t len) { + srslte_vec_lut_bbb_simd(x, lut, y, len); +} + void srslte_vec_lut_sis(const short *x, const unsigned int *lut, short *y, const uint32_t len) { for (int i=0; i < len; i++) { y[lut[i]] = x[i]; @@ -163,6 +175,15 @@ void srslte_vec_fprint_b(FILE *stream, uint8_t *x, const uint32_t len) { fprintf(stream, "];\n"); } +void srslte_vec_fprint_bs(FILE *stream, int8_t *x, const uint32_t len) { + int i; + fprintf(stream, "["); + for (i=0;i burst_preamble_max_samples) { + fprintf(stderr, "Error setting TX srate %.1f MHz. Maximum burst preamble samples: %d, requested: %d\n", srate*1e-6, burst_preamble_max_samples, burst_preamble_samples ); burst_preamble_samples = burst_preamble_max_samples; - fprintf(stderr, "Error setting TX srate %.1f MHz. Maximum frequency for zero prepadding is 30.72 MHz\n", srate*1e-6); } burst_preamble_time_rounded = (double) burst_preamble_samples/cur_tx_srate; diff --git a/lib/src/radio/test/benchmark_radio.cc b/lib/src/radio/test/benchmark_radio.cc index d15adaa6b..844624086 100644 --- a/lib/src/radio/test/benchmark_radio.cc +++ b/lib/src/radio/test/benchmark_radio.cc @@ -129,7 +129,9 @@ int main(int argc, char **argv) { int ret = SRSLTE_ERROR; srslte::radio_multi *radio_h = NULL; - srslte_timestamp_t ts_rx = {}, ts_tx = {}; + srslte_timestamp_t ts_rx, ts_tx; + bzero(&ts_rx, sizeof(ts_rx)); + bzero(&ts_tx, sizeof(ts_tx)); signal(SIGINT, sig_int_handler); diff --git a/lib/src/upper/CMakeLists.txt b/lib/src/upper/CMakeLists.txt index 725d6de60..4ba2ff951 100644 --- a/lib/src/upper/CMakeLists.txt +++ b/lib/src/upper/CMakeLists.txt @@ -22,3 +22,8 @@ file(GLOB SOURCES "*.cc") add_library(srslte_upper STATIC ${SOURCES}) target_link_libraries(srslte_upper srslte_common srslte_asn1) install(TARGETS srslte_upper DESTINATION ${LIBRARY_DIR}) + +# Run clang-tidy if available +if(CLANG_TIDY_BIN) + set_target_properties(srslte_upper PROPERTIES CXX_CLANG_TIDY "${DO_CLANG_TIDY}") +endif() \ No newline at end of file diff --git a/lib/src/upper/pdcp.cc b/lib/src/upper/pdcp.cc index c8391518b..93ac0cf15 100644 --- a/lib/src/upper/pdcp.cc +++ b/lib/src/upper/pdcp.cc @@ -187,6 +187,28 @@ void pdcp::del_bearer(uint32_t lcid) pthread_rwlock_unlock(&rwlock); } +void pdcp::change_lcid(uint32_t old_lcid, uint32_t new_lcid) +{ + pthread_rwlock_wrlock(&rwlock); + + // make sure old LCID exists and new LCID is still free + if (valid_lcid(old_lcid) && not valid_lcid(new_lcid)) { + // insert old PDCP entity into new LCID + pdcp_map_t::iterator it = pdcp_array.find(old_lcid); + pdcp_entity_interface *pdcp_entity = it->second; + if (not pdcp_array.insert(pdcp_map_pair_t(new_lcid, pdcp_entity)).second) { + pdcp_log->error("Error inserting PDCP entity into array\n."); + goto exit; + } + // erase from old position + pdcp_array.erase(it); + pdcp_log->warning("Changed LCID of PDCP bearer from %d to %d\n", old_lcid, new_lcid); + } else { + pdcp_log->error("Can't change PDCP of bearer %s from %d to %d. Bearer doesn't exist or new LCID already occupied.\n", rrc->get_rb_name(old_lcid).c_str(), old_lcid, new_lcid); + } +exit: + pthread_rwlock_unlock(&rwlock); +} void pdcp::config_security(uint32_t lcid, uint8_t *k_enc, diff --git a/lib/src/upper/pdcp_entity.cc b/lib/src/upper/pdcp_entity.cc index 65bb07fae..9d0300820 100644 --- a/lib/src/upper/pdcp_entity.cc +++ b/lib/src/upper/pdcp_entity.cc @@ -218,7 +218,7 @@ void pdcp_entity::write_pdu(byte_buffer_t *pdu) rx_count, pdu->N_bytes - 4, &(pdu->msg[pdu->N_bytes - 4]))) { - log->error_hex(pdu->msg, pdu->N_bytes, "RX %s PDU SN: %d", rrc->get_rb_name(lcid).c_str(), sn); + log->error_hex(pdu->msg, pdu->N_bytes, "RX %s PDU SN: %d (Dropping PDU)", rrc->get_rb_name(lcid).c_str(), sn); goto exit; } } diff --git a/lib/src/upper/rlc.cc b/lib/src/upper/rlc.cc index 237891430..5cf561415 100644 --- a/lib/src/upper/rlc.cc +++ b/lib/src/upper/rlc.cc @@ -42,6 +42,7 @@ rlc::rlc() mac_timers = NULL; ue = NULL; default_lcid = 0; + buffer_size = 0; bzero(metrics_time, sizeof(metrics_time)); pthread_rwlock_init(&rwlock, NULL); } @@ -117,25 +118,22 @@ void rlc::get_metrics(rlc_metrics_t &m) gettimeofday(&metrics_time[2], NULL); get_time_interval(metrics_time); double secs = (double)metrics_time[0].tv_sec + metrics_time[0].tv_usec*1e-6; - - m.dl_tput_mbps = 0; - m.ul_tput_mbps = 0; for (rlc_map_t::iterator it = rlc_array.begin(); it != rlc_array.end(); ++it) { - m.dl_tput_mbps += (it->second->get_num_rx_bytes()*8/(double)1e6)/secs; - m.ul_tput_mbps += (it->second->get_num_tx_bytes()*8/(double)1e6)/secs; + m.dl_tput_mbps[it->first] = (it->second->get_num_rx_bytes()*8/static_cast(1e6))/secs; + m.ul_tput_mbps[it->first] = (it->second->get_num_tx_bytes()*8/static_cast(1e6))/secs; rlc_log->info("LCID=%d, RX throughput: %4.6f Mbps. TX throughput: %4.6f Mbps.\n", it->first, - (it->second->get_num_rx_bytes()*8/(double)1e6)/secs, - (it->second->get_num_tx_bytes()*8/(double)1e6)/secs); + (it->second->get_num_rx_bytes()*8/static_cast(1e6))/secs, + (it->second->get_num_tx_bytes()*8/static_cast(1e6))/secs); } // Add multicast metrics for (rlc_map_t::iterator it = rlc_array_mrb.begin(); it != rlc_array_mrb.end(); ++it) { - m.dl_tput_mbps += (it->second->get_num_rx_bytes()*8/(double)1e6)/secs; + m.dl_tput_mbps[it->first] = (it->second->get_num_rx_bytes()*8/static_cast(1e6))/secs; rlc_log->info("MCH_LCID=%d, RX throughput: %4.6f Mbps\n", it->first, - (it->second->get_num_rx_bytes()*8/(double)1e6)/secs); + (it->second->get_num_rx_bytes()*8/static_cast(1e6))/secs); } memcpy(&metrics_time[1], &metrics_time[2], sizeof(struct timeval)); @@ -144,7 +142,7 @@ void rlc::get_metrics(rlc_metrics_t &m) pthread_rwlock_unlock(&rwlock); } -// A call to reestablish stops all lcids but does not delete the instances. The mapping lcid to rlc mode can not change +// Reestablish all RLC bearer void rlc::reestablish() { pthread_rwlock_rdlock(&rwlock); @@ -160,6 +158,18 @@ void rlc::reestablish() pthread_rwlock_unlock(&rwlock); } +// Reestablish a specific RLC bearer +void rlc::reestablish(uint32_t lcid) +{ + pthread_rwlock_rdlock(&rwlock); + if (valid_lcid(lcid)) { + rlc_array.at(lcid)->reestablish(); + } else { + rlc_log->warning("RLC LCID %d doesn't exist. Deallocating SDU\n", lcid); + } + pthread_rwlock_unlock(&rwlock); +} + // Resetting the RLC layer returns the object to the state after the call to init(): // All LCIDs are removed, except SRB0 void rlc::reset() @@ -270,7 +280,7 @@ uint32_t rlc::get_total_mch_buffer_state(uint32_t lcid) uint32_t ret = 0; pthread_rwlock_rdlock(&rwlock); - if (valid_lcid(lcid)) { + if (valid_lcid_mrb(lcid)) { ret = rlc_array_mrb.at(lcid)->get_total_buffer_state(); } pthread_rwlock_unlock(&rwlock); @@ -296,7 +306,7 @@ int rlc::read_pdu_mch(uint32_t lcid, uint8_t *payload, uint32_t nof_bytes) uint32_t ret = 0; pthread_rwlock_rdlock(&rwlock); - if (valid_lcid(lcid)) { + if (valid_lcid_mrb(lcid)) { ret = rlc_array_mrb.at(lcid)->read_pdu(payload, nof_bytes); } pthread_rwlock_unlock(&rwlock); @@ -318,7 +328,7 @@ void rlc::write_pdu_bcch_bch(uint8_t *payload, uint32_t nof_bytes) { rlc_log->info_hex(payload, nof_bytes, "BCCH BCH message received."); byte_buffer_t *buf = pool_allocate; - if (buf) { + if (buf != NULL) { memcpy(buf->msg, payload, nof_bytes); buf->N_bytes = nof_bytes; buf->set_timestamp(); @@ -333,7 +343,7 @@ void rlc::write_pdu_bcch_dlsch(uint8_t *payload, uint32_t nof_bytes) { rlc_log->info_hex(payload, nof_bytes, "BCCH TXSCH message received."); byte_buffer_t *buf = pool_allocate; - if (buf) { + if (buf != NULL) { memcpy(buf->msg, payload, nof_bytes); buf->N_bytes = nof_bytes; buf->set_timestamp(); @@ -348,7 +358,7 @@ void rlc::write_pdu_pcch(uint8_t *payload, uint32_t nof_bytes) { rlc_log->info_hex(payload, nof_bytes, "PCCH message received."); byte_buffer_t *buf = pool_allocate; - if (buf) { + if (buf != NULL) { memcpy(buf->msg, payload, nof_bytes); buf->N_bytes = nof_bytes; buf->set_timestamp(); @@ -385,7 +395,7 @@ void rlc::add_bearer(uint32_t lcid) add_bearer(lcid, srslte_rlc_config_t()); } else { // SRB1 and SRB2 are AM - LIBLTE_RRC_RLC_CONFIG_STRUCT cnfg; + LIBLTE_RRC_RLC_CONFIG_STRUCT cnfg = {}; cnfg.rlc_mode = LIBLTE_RRC_RLC_MODE_AM; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS45; cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_INFINITY; @@ -440,7 +450,7 @@ void rlc::add_bearer(uint32_t lcid, srslte_rlc_config_t cnfg) rlc_log->error("Error instantiating RLC\n"); goto delete_and_exit; } - rlc_log->warning("Added radio bearer %s with mode %s\n", rrc->get_rb_name(lcid).c_str(), liblte_rrc_rlc_mode_text[cnfg.rlc_mode]); + rlc_log->warning("Added radio bearer %s in %s\n", rrc->get_rb_name(lcid).c_str(), rlc_mode_text[cnfg.rlc_mode]); goto unlock_and_exit; } else { rlc_log->warning("Bearer %s already created.\n", rrc->get_rb_name(lcid).c_str()); @@ -463,10 +473,10 @@ void rlc::add_bearer_mrb(uint32_t lcid) if (not valid_lcid_mrb(lcid)) { rlc_entity = new rlc_um(); - if (rlc_entity) { + if (rlc_entity != NULL) { // configure and add to array rlc_entity->init(rlc_log, lcid, pdcp, rrc, mac_timers); - if (rlc_entity->configure(srslte_rlc_config_t::mch_config()) == false) { + if (not rlc_entity->configure(srslte_rlc_config_t::mch_config())) { rlc_log->error("Error configuring RLC entity\n."); goto delete_and_exit; } @@ -485,7 +495,7 @@ void rlc::add_bearer_mrb(uint32_t lcid) } delete_and_exit: - if (rlc_entity) { + if (rlc_entity != NULL) { delete(rlc_entity); } @@ -498,20 +508,62 @@ void rlc::del_bearer(uint32_t lcid) { pthread_rwlock_wrlock(&rwlock); - if (valid_lcid_mrb(lcid)) { + if (valid_lcid(lcid)) { rlc_map_t::iterator it = rlc_array.find(lcid); it->second->stop(); delete(it->second); rlc_array.erase(it); rlc_log->warning("Deleted RLC bearer %s\n", rrc->get_rb_name(lcid).c_str()); } else { - rlc_log->warning("Can't delete bearer %s. Bearer doesn't exist.\n", rrc->get_rb_name(lcid).c_str()); + rlc_log->error("Can't delete bearer %s. Bearer doesn't exist.\n", rrc->get_rb_name(lcid).c_str()); } pthread_rwlock_unlock(&rwlock); } +void rlc::del_bearer_mrb(uint32_t lcid) +{ + pthread_rwlock_wrlock(&rwlock); + + if (valid_lcid_mrb(lcid)) { + rlc_map_t::iterator it = rlc_array_mrb.find(lcid); + it->second->stop(); + delete(it->second); + rlc_array_mrb.erase(it); + rlc_log->warning("Deleted RLC MRB bearer %s\n", rrc->get_rb_name(lcid).c_str()); + } else { + rlc_log->error("Can't delete bearer %s. Bearer doesn't exist.\n", rrc->get_rb_name(lcid).c_str()); + } + + pthread_rwlock_unlock(&rwlock); +} + + +void rlc::change_lcid(uint32_t old_lcid, uint32_t new_lcid) +{ + pthread_rwlock_wrlock(&rwlock); + + // make sure old LCID exists and new LCID is still free + if (valid_lcid(old_lcid) && not valid_lcid(new_lcid)) { + // insert old rlc entity into new LCID + rlc_map_t::iterator it = rlc_array.find(old_lcid); + rlc_common *rlc_entity = it->second; + if (not rlc_array.insert(rlc_map_pair_t(new_lcid, rlc_entity)).second) { + rlc_log->error("Error inserting RLC entity into array\n."); + goto exit; + } + // erase from old position + rlc_array.erase(it); + rlc_log->warning("Changed LCID of RLC bearer from %d to %d\n", old_lcid, new_lcid); + } else { + rlc_log->error("Can't change LCID of bearer %s from %d to %d. Bearer doesn't exist or new LCID already occupied.\n", rrc->get_rb_name(old_lcid).c_str(), old_lcid, new_lcid); + } +exit: + pthread_rwlock_unlock(&rwlock); +} + + /******************************************************************************* Helpers (Lock must be hold when calling those) *******************************************************************************/ @@ -544,4 +596,4 @@ bool rlc::valid_lcid_mrb(uint32_t lcid) return true; } -} // namespace srsue +} // namespace srslte diff --git a/lib/src/upper/rlc_am.cc b/lib/src/upper/rlc_am.cc index 4b15e9bc5..a30e0c264 100644 --- a/lib/src/upper/rlc_am.cc +++ b/lib/src/upper/rlc_am.cc @@ -31,110 +31,236 @@ #include #define MOD 1024 -#define RX_MOD_BASE(x) ((x-vr_r)%1024) -#define TX_MOD_BASE(x) ((x-vt_a)%1024) +#define RX_MOD_BASE(x) (((x)-vr_r)%1024) +#define TX_MOD_BASE(x) (((x)-vt_a)%1024) +#define LCID (parent->lcid) +#define RB_NAME (parent->rb_name.c_str()) namespace srslte { -rlc_am::rlc_am(uint32_t queue_len) : tx_sdu_queue(queue_len) +rlc_am::rlc_am(uint32_t queue_len) + :tx(this, queue_len) + ,rx(this) + ,log(NULL) + ,rrc(NULL) + ,pdcp(NULL) + ,mac_timers(NULL) + ,lcid(0) + ,rb_name("") + ,cfg() { - log = NULL; - pdcp = NULL; - rrc = NULL; - lcid = 0; - bzero(&cfg, sizeof(srslte_rlc_am_config_t)); - - tx_sdu = NULL; - rx_sdu = NULL; - pool = byte_buffer_pool::get_instance(); - - pthread_mutex_init(&mutex, NULL); - - vt_a = 0; - vt_ms = RLC_AM_WINDOW_SIZE; - vt_s = 0; - poll_sn = 0; - - vr_r = 0; - vr_mr = RLC_AM_WINDOW_SIZE; - vr_x = 0; - vr_ms = 0; - vr_h = 0; - - num_tx_bytes = 0; - num_rx_bytes = 0; - - pdu_without_poll = 0; - byte_without_poll = 0; - - poll_received = false; - do_status = false; } -// Warning: must call stop() to properly deallocate all buffers rlc_am::~rlc_am() { - pthread_mutex_destroy(&mutex); - pool = NULL; } void rlc_am::init(srslte::log *log_, uint32_t lcid_, srsue::pdcp_interface_rlc *pdcp_, srsue::rrc_interface_rlc *rrc_, - srslte::mac_interface_timers *mac_timers) + srslte::mac_interface_timers *mac_timers_) { - log = log_; + log = log_; lcid = lcid_; pdcp = pdcp_; - rrc = rrc_; - tx_enabled = true; + rrc = rrc_; + mac_timers = mac_timers_; + + rx.init(); + tx.init(); } bool rlc_am::configure(srslte_rlc_config_t cfg_) { + // determine bearer name and configure Rx/Tx objects + rb_name = rrc->get_rb_name(lcid); + + if (not rx.configure(cfg_.am)) { + return false; + } + + if (not tx.configure(cfg_.am)) { + return false; + } + + // store config cfg = cfg_.am; + log->warning("%s configured: t_poll_retx=%d, poll_pdu=%d, poll_byte=%d, max_retx_thresh=%d, " - "t_reordering=%d, t_status_prohibit=%d\n", - rrc->get_rb_name(lcid).c_str(), cfg.t_poll_retx, cfg.poll_pdu, cfg.poll_byte, cfg.max_retx_thresh, - cfg.t_reordering, cfg.t_status_prohibit); + "t_reordering=%d, t_status_prohibit=%d\n", + rb_name.c_str(), cfg.t_poll_retx, cfg.poll_pdu, cfg.poll_byte, cfg.max_retx_thresh, + cfg.t_reordering, cfg.t_status_prohibit); return true; } - -void rlc_am::empty_queue() { +void rlc_am::empty_queue() +{ // Drop all messages in TX SDU queue - pthread_mutex_lock(&mutex); - byte_buffer_t *buf; - while(tx_sdu_queue.try_read(&buf)) { - pool->deallocate(buf); + tx.empty_queue(); +} + +void rlc_am::reestablish() +{ + tx.reestablish(); // calls stop and enables tx again + rx.reestablish(); // calls only stop +} + +void rlc_am::stop() +{ + tx.stop(); + rx.stop(); +} + +rlc_mode_t rlc_am::get_mode() +{ + return RLC_MODE_AM; +} + +uint32_t rlc_am::get_bearer() +{ + return lcid; +} + +uint32_t rlc_am::get_num_rx_bytes() +{ + return rx.get_num_rx_bytes(); +} + +uint32_t rlc_am::get_num_tx_bytes() +{ + return tx.get_num_tx_bytes(); +} + +void rlc_am::reset_metrics() +{ + tx.reset_metrics(); + rx.reset_metrics(); +} + +/**************************************************************************** + * PDCP interface + ***************************************************************************/ + +void rlc_am::write_sdu(byte_buffer_t *sdu, bool blocking) +{ + tx.write_sdu(sdu, blocking); +} + +/**************************************************************************** + * MAC interface + ***************************************************************************/ + +uint32_t rlc_am::get_buffer_state() +{ + return tx.get_buffer_state(); +} + +uint32_t rlc_am::get_total_buffer_state() +{ + return tx.get_total_buffer_state(); +} + +int rlc_am::read_pdu(uint8_t *payload, uint32_t nof_bytes) +{ + return tx.read_pdu(payload, nof_bytes); +} + +void rlc_am::write_pdu(uint8_t *payload, uint32_t nof_bytes) +{ + rx.write_pdu(payload, nof_bytes); +} + +/**************************************************************************** + * Tx subclass implementation + ***************************************************************************/ + +rlc_am::rlc_am_tx::rlc_am_tx(rlc_am* parent_, uint32_t queue_len_) + :parent(parent_) + ,poll_retx_timer(NULL) + ,poll_retx_timer_id(0) + ,status_prohibit_timer(NULL) + ,status_prohibit_timer_id(0) + ,vt_a(0) + ,vt_ms(RLC_AM_WINDOW_SIZE) + ,vt_s(0) + ,status_prohibited(false) + ,poll_sn(0) + ,num_tx_bytes(0) + ,pdu_without_poll(0) + ,byte_without_poll(0) + ,tx_sdu(NULL) + ,tx_sdu_queue(queue_len_) + ,log(NULL) + ,cfg() + ,pool(byte_buffer_pool::get_instance()) + ,tx_enabled(false) +{ + pthread_mutex_init(&mutex, NULL); + ZERO_OBJECT(tx_status); +} + +rlc_am::rlc_am_tx::~rlc_am_tx() +{ + pthread_mutex_destroy(&mutex); +} + +void rlc_am::rlc_am_tx::init() +{ + log = parent->log; + + if (parent->mac_timers != NULL) { + poll_retx_timer_id = parent->mac_timers->timer_get_unique_id(); + poll_retx_timer = parent->mac_timers->timer_get(poll_retx_timer_id); + + status_prohibit_timer_id = parent->mac_timers->timer_get_unique_id(); + status_prohibit_timer = parent->mac_timers->timer_get(status_prohibit_timer_id); } - tx_sdu_queue.reset(); - pthread_mutex_unlock(&mutex); } -void rlc_am::reestablish() { - stop(); +bool rlc_am::rlc_am_tx::configure(srslte_rlc_am_config_t cfg_) +{ + // TODO: add config checks + cfg = cfg_; + + // check timers + if (poll_retx_timer == NULL or status_prohibit_timer == NULL) { + return false; + } + + // configure timers + if (cfg.t_status_prohibit > 0) { + status_prohibit_timer->set(this, static_cast(cfg.t_status_prohibit)); + } + + if (cfg.t_poll_retx > 0) { + poll_retx_timer->set(this, static_cast(cfg.t_poll_retx)); + } + tx_enabled = true; + + return true; } -void rlc_am::stop() +void rlc_am::rlc_am_tx::stop() { - // Empty tx_sdu_queue before locking the mutex - tx_enabled = false; - usleep(100); empty_queue(); pthread_mutex_lock(&mutex); - reordering_timeout.reset(); - if(tx_sdu) { - pool->deallocate(tx_sdu); - tx_sdu = NULL; + + tx_enabled = false; + + if (parent->mac_timers != NULL && poll_retx_timer != NULL) { + poll_retx_timer->stop(); + parent->mac_timers->timer_release_id(poll_retx_timer_id); + poll_retx_timer = NULL; } - if(rx_sdu) { - pool->deallocate(rx_sdu); - rx_sdu = NULL; + + if (parent->mac_timers != NULL && status_prohibit_timer != NULL) { + status_prohibit_timer->stop(); + parent->mac_timers->timer_release_id(status_prohibit_timer_id); + status_prohibit_timer = NULL; } vt_a = 0; @@ -142,37 +268,9 @@ void rlc_am::stop() vt_s = 0; poll_sn = 0; - vr_r = 0; - vr_mr = RLC_AM_WINDOW_SIZE; - vr_x = 0; - vr_ms = 0; - vr_h = 0; - pdu_without_poll = 0; byte_without_poll = 0; - poll_received = false; - do_status = false; - - // Drop all messages in RX segments - std::map::iterator rxsegsit; - std::list::iterator segit; - for(rxsegsit = rx_segments.begin(); rxsegsit != rx_segments.end(); rxsegsit++) { - std::list l = rxsegsit->second.segments; - for(segit = l.begin(); segit != l.end(); segit++) { - pool->deallocate(segit->buf); - } - l.clear(); - } - rx_segments.clear(); - - // Drop all messages in RX window - std::map::iterator rxit; - for(rxit = rx_window.begin(); rxit != rx_window.end(); rxit++) { - pool->deallocate(rxit->second.buf); - } - rx_window.clear(); - // Drop all messages in TX window std::map::iterator txit; for(txit = tx_window.begin(); txit != tx_window.end(); txit++) { @@ -185,212 +283,199 @@ void rlc_am::stop() pthread_mutex_unlock(&mutex); } -rlc_mode_t rlc_am::get_mode() +void rlc_am::rlc_am_tx::empty_queue() { - return RLC_MODE_AM; + pthread_mutex_lock(&mutex); + + // deallocate all SDUs in transmit queue + while(tx_sdu_queue.size() > 0) { + byte_buffer_t *buf; + tx_sdu_queue.read(&buf); + pool->deallocate(buf); + } + + // deallocate SDU that is currently processed + if (tx_sdu != NULL) { + pool->deallocate(tx_sdu); + tx_sdu = NULL; + } + + pthread_mutex_unlock(&mutex); } -uint32_t rlc_am::get_bearer() +void rlc_am::rlc_am_tx::reestablish() { - return lcid; + stop(); + tx_enabled = true; } -/**************************************************************************** - * PDCP interface - ***************************************************************************/ - -void rlc_am::write_sdu(byte_buffer_t *sdu, bool blocking) +bool rlc_am::rlc_am_tx::do_status() { - if (!tx_enabled) { - byte_buffer_pool::get_instance()->deallocate(sdu); - return; - } - if (sdu) { - if (blocking) { - // block on write to queue - tx_sdu_queue.write(sdu); - log->info_hex(sdu->msg, sdu->N_bytes, "%s Tx SDU (%d B, tx_sdu_queue_len=%d)", rrc->get_rb_name(lcid).c_str(), sdu->N_bytes, tx_sdu_queue.size()); - } else { - // non-blocking write - if (tx_sdu_queue.try_write(sdu)) { - log->info_hex(sdu->msg, sdu->N_bytes, "%s Tx SDU (%d B, tx_sdu_queue_len=%d)", rrc->get_rb_name(lcid).c_str(), sdu->N_bytes, tx_sdu_queue.size()); - } else { - log->info_hex(sdu->msg, sdu->N_bytes, "[Dropped SDU] %s Tx SDU (%d B, tx_sdu_queue_len=%d)", rrc->get_rb_name(lcid).c_str(), sdu->N_bytes, tx_sdu_queue.size()); - pool->deallocate(sdu); - } - } - } else { - log->warning("NULL SDU pointer in write_sdu()\n"); - } + return parent->rx.get_do_status(); } -/**************************************************************************** - * MAC interface - ***************************************************************************/ - -uint32_t rlc_am::get_total_buffer_state() +uint32_t rlc_am::rlc_am_tx::get_buffer_state() { pthread_mutex_lock(&mutex); uint32_t n_bytes = 0; uint32_t n_sdus = 0; // Bytes needed for status report - check_reordering_timeout(); - if(do_status && !status_prohibited()) { - n_bytes += prepare_status(); - log->debug("%s Buffer state - total status report: %d bytes\n", rrc->get_rb_name(lcid).c_str(), n_bytes); + if (do_status() && not status_prohibited) { + n_bytes = parent->rx.get_status(&tx_status); + log->debug("%s Buffer state - status report: %d bytes\n", RB_NAME, n_bytes); + goto unlock_and_return; } // Bytes needed for retx - if(retx_queue.size() > 0) { + if (not retx_queue.empty()) { rlc_amd_retx_t retx = retx_queue.front(); log->debug("Buffer state - retx - SN: %d, Segment: %s, %d:%d\n", retx.sn, retx.is_segment ? "true" : "false", retx.so_start, retx.so_end); if(tx_window.end() != tx_window.find(retx.sn)) { int req_bytes = required_buffer_size(retx); if (req_bytes < 0) { - log->error("In get_total_buffer_state(): Removing retx.sn=%d from queue\n", retx.sn); + log->error("In get_buffer_state(): Removing retx.sn=%d from queue\n", retx.sn); retx_queue.pop_front(); - } else { - n_bytes += req_bytes; - log->debug("Buffer state - retx: %d bytes\n", n_bytes); + goto unlock_and_return; } + n_bytes = static_cast(req_bytes); + log->debug("Buffer state - retx: %d bytes\n", n_bytes); + goto unlock_and_return; } } // Bytes needed for tx SDUs - if(tx_window.size() < 1024) { + if (tx_window.size() < 1024) { n_sdus = tx_sdu_queue.size(); - n_bytes += tx_sdu_queue.size_bytes(); - if(tx_sdu) - { + n_bytes = tx_sdu_queue.size_bytes(); + if (tx_sdu != NULL) { n_sdus++; n_bytes += tx_sdu->N_bytes; } } // Room needed for header extensions? (integer rounding) - if(n_sdus > 1) + if (n_sdus > 1) { n_bytes += ((n_sdus-1)*1.5)+0.5; + } // Room needed for fixed header? - if(n_bytes > 0) { + if (n_bytes > 0) { n_bytes += 3; log->debug("Buffer state - tx SDUs: %d bytes\n", n_bytes); } +unlock_and_return: pthread_mutex_unlock(&mutex); return n_bytes; } -uint32_t rlc_am::get_buffer_state() +uint32_t rlc_am::rlc_am_tx::get_total_buffer_state() { pthread_mutex_lock(&mutex); uint32_t n_bytes = 0; uint32_t n_sdus = 0; // Bytes needed for status report - check_reordering_timeout(); - if(do_status && !status_prohibited()) { - n_bytes = prepare_status(); - log->debug("%s Buffer state - status report: %d bytes\n", rrc->get_rb_name(lcid).c_str(), n_bytes); - goto unlock_and_return; - } - - // check if pollRetx timer expired (Section 5.2.2.3 in TS 36.322) - if (poll_retx()) { - // if both tx and retx buffer are empty, retransmit next PDU to be ack'ed - log->debug("Poll reTx timer expired (lcid=%d)\n", lcid); - if ((tx_window.size() > 0 && retx_queue.size() == 0 && tx_sdu_queue.size() == 0)) { - std::map::iterator it = tx_window.find(vt_s - 1); - if (it != tx_window.end()) { - log->info("Schedule last PDU (SN=%d) for reTx.\n", vt_s - 1); - rlc_amd_retx_t retx; - retx.is_segment = false; - retx.so_start = 0; - retx.so_end = tx_window[vt_s - 1].buf->N_bytes; - retx.sn = vt_s - 1; - retx_queue.push_back(retx); - } else { - log->error("Found invalid PDU in tx_window.\n"); - } - poll_retx_timeout.start(cfg.t_poll_retx); - } + if(do_status() && not status_prohibited) { + n_bytes += parent->rx.get_status(&tx_status); + log->debug("%s Buffer state - total status report: %d bytes\n", RB_NAME, n_bytes); } // Bytes needed for retx - if(retx_queue.size() > 0) { + if(not retx_queue.empty()) { rlc_amd_retx_t retx = retx_queue.front(); log->debug("Buffer state - retx - SN: %d, Segment: %s, %d:%d\n", retx.sn, retx.is_segment ? "true" : "false", retx.so_start, retx.so_end); if(tx_window.end() != tx_window.find(retx.sn)) { int req_bytes = required_buffer_size(retx); if (req_bytes < 0) { - log->error("In get_buffer_state(): Removing retx.sn=%d from queue\n", retx.sn); + log->error("In get_total_buffer_state(): Removing retx.sn=%d from queue\n", retx.sn); retx_queue.pop_front(); - goto unlock_and_return; + } else { + n_bytes += req_bytes; + log->debug("Buffer state - retx: %d bytes\n", n_bytes); } - n_bytes = (uint32_t) req_bytes; - log->debug("Buffer state - retx: %d bytes\n", n_bytes); - goto unlock_and_return; } } // Bytes needed for tx SDUs if(tx_window.size() < 1024) { n_sdus = tx_sdu_queue.size(); - n_bytes = tx_sdu_queue.size_bytes(); - if(tx_sdu) - { + n_bytes += tx_sdu_queue.size_bytes(); + if (tx_sdu != NULL) { n_sdus++; n_bytes += tx_sdu->N_bytes; } } // Room needed for header extensions? (integer rounding) - if(n_sdus > 1) + if (n_sdus > 1) { n_bytes += ((n_sdus-1)*1.5)+0.5; + } // Room needed for fixed header? - if(n_bytes > 0) { + if (n_bytes > 0) { n_bytes += 3; log->debug("Buffer state - tx SDUs: %d bytes\n", n_bytes); } -unlock_and_return: pthread_mutex_unlock(&mutex); return n_bytes; } -int rlc_am::read_pdu(uint8_t *payload, uint32_t nof_bytes) +void rlc_am::rlc_am_tx::write_sdu(byte_buffer_t *sdu, bool blocking) +{ + if (!tx_enabled) { + byte_buffer_pool::get_instance()->deallocate(sdu); + return; + } + + if (sdu != NULL) { + if (blocking) { + // block on write to queue + tx_sdu_queue.write(sdu); + log->info_hex(sdu->msg, sdu->N_bytes, "%s Tx SDU (%d B, tx_sdu_queue_len=%d)", RB_NAME, sdu->N_bytes, tx_sdu_queue.size()); + } else { + // non-blocking write + if (tx_sdu_queue.try_write(sdu)) { + log->info_hex(sdu->msg, sdu->N_bytes, "%s Tx SDU (%d B, tx_sdu_queue_len=%d)", RB_NAME, sdu->N_bytes, tx_sdu_queue.size()); + } else { + log->debug_hex(sdu->msg, sdu->N_bytes, "[Dropped SDU] %s Tx SDU (%d B, tx_sdu_queue_len=%d)", RB_NAME, sdu->N_bytes, tx_sdu_queue.size()); + pool->deallocate(sdu); + } + } + } else { + log->warning("NULL SDU pointer in write_sdu()\n"); + } +} + +int rlc_am::rlc_am_tx::read_pdu(uint8_t *payload, uint32_t nof_bytes) { pthread_mutex_lock(&mutex); + int pdu_size = 0; log->debug("MAC opportunity - %d bytes\n", nof_bytes); log->debug("tx_window size - %zu PDUs\n", tx_window.size()); + if (not tx_enabled) { + log->debug("RLC entity not active. Not generating PDU.\n"); + goto unlock_and_exit; + } + // Tx STATUS if requested - if(do_status && !status_prohibited()) { + if(do_status() && not status_prohibited) { pdu_size = build_status_pdu(payload, nof_bytes); goto unlock_and_exit; } - // if tx_window is full and retx_queue empty, retransmit next PDU to be ack'ed - if (tx_window.size() >= RLC_AM_WINDOW_SIZE && retx_queue.size() == 0) { - if (tx_window[vt_a].buf != NULL) { - log->warning("Full Tx window, ReTx'ing first outstanding PDU\n"); - rlc_amd_retx_t retx; - retx.is_segment = false; - retx.so_start = 0; - retx.so_end = tx_window[vt_a].buf->N_bytes; - retx.sn = vt_a; - retx_queue.push_back(retx); - } else { - log->error("Found invalid PDU in tx_window.\n"); - } + // Section 5.2.2.3 in TS 36.311, if tx_window is full and retx_queue empty, retransmit random PDU + if (tx_window.size() >= RLC_AM_WINDOW_SIZE && retx_queue.empty()) { + retransmit_random_pdu(); } // RETX if required - if(retx_queue.size() > 0) { + if (not retx_queue.empty()) { pdu_size = build_retx_pdu(payload, nof_bytes); if (pdu_size > 0) { goto unlock_and_exit; @@ -406,156 +491,120 @@ unlock_and_exit: return pdu_size; } -void rlc_am::write_pdu(uint8_t *payload, uint32_t nof_bytes) +void rlc_am::rlc_am_tx::timer_expired(uint32_t timeout_id) { - if (nof_bytes < 1) return; - pthread_mutex_lock(&mutex); - num_rx_bytes += nof_bytes; - - if(rlc_am_is_control_pdu(payload)) { - handle_control_pdu(payload, nof_bytes); - } else { - rlc_amd_pdu_header_t header; - rlc_am_read_data_pdu_header(&payload, &nof_bytes, &header); - if(header.rf) { - handle_data_pdu_segment(payload, nof_bytes, header); - }else{ - handle_data_pdu(payload, nof_bytes, header); + if (poll_retx_timer != NULL && poll_retx_timer_id == timeout_id) { + log->debug("Poll reTx timer expired for LCID=%d after %d ms\n", parent->lcid, poll_retx_timer->get_timeout()); + // Section 5.2.2.3 in TS 36.311, if tx_window is full and retx_queue empty, retransmit random PDU + if ((tx_window.size() >= RLC_AM_WINDOW_SIZE && retx_queue.empty() && tx_sdu_queue.size() == 0)) { + retransmit_random_pdu(); } + } else + if (status_prohibit_timer != NULL && status_prohibit_timer_id == timeout_id) { + status_prohibited = false; } pthread_mutex_unlock(&mutex); } -uint32_t rlc_am::get_num_tx_bytes() +void rlc_am::rlc_am_tx::retransmit_random_pdu() { - return num_tx_bytes; + // randomly select PDU in tx window for retransmission + std::map::iterator it = tx_window.begin(); + std::advance(it, rand() % tx_window.size()); + + log->info("Schedule SN=%d for reTx.\n", it->first); + rlc_amd_retx_t retx = {}; + retx.is_segment = false; + retx.so_start = 0; + retx.so_end = it->second.buf->N_bytes; + retx.sn = it->first; + retx_queue.push_back(retx); } -uint32_t rlc_am::get_num_rx_bytes() +uint32_t rlc_am::rlc_am_tx::get_num_tx_bytes() { - return num_rx_bytes; + return num_tx_bytes; } -void rlc_am::reset_metrics() +void rlc_am::rlc_am_tx::reset_metrics() { pthread_mutex_lock(&mutex); - num_rx_bytes = 0; num_tx_bytes = 0; pthread_mutex_unlock(&mutex); } - /**************************************************************************** - * Timer checks + * Helper functions ***************************************************************************/ -bool rlc_am::status_prohibited() -{ - return (status_prohibit_timeout.is_running() && !status_prohibit_timeout.expired()); -} - -bool rlc_am::poll_retx() -{ - return (poll_retx_timeout.is_running() && poll_retx_timeout.expired()); -} - -void rlc_am::check_reordering_timeout() +bool rlc_am::rlc_am_tx::poll_required() { - if(reordering_timeout.is_running() && reordering_timeout.expired()) - { - reordering_timeout.reset(); - log->debug("%s reordering timeout expiry - updating vr_ms\n", rrc->get_rb_name(lcid).c_str()); + if (cfg.poll_pdu > 0 && pdu_without_poll > static_cast(cfg.poll_pdu)) { + return true; + } - // 36.322 v10 Section 5.1.3.2.4 - vr_ms = vr_x; - std::map::iterator it = rx_window.find(vr_ms); - while(rx_window.end() != it) - { - vr_ms = (vr_ms + 1)%MOD; - it = rx_window.find(vr_ms); - } - if(poll_received) - do_status = true; + if (cfg.poll_byte > 0 && byte_without_poll > static_cast(cfg.poll_byte)) { + return true; + } - if(RX_MOD_BASE(vr_h) > RX_MOD_BASE(vr_ms)) - { - reordering_timeout.start(cfg.t_reordering); - vr_x = vr_h; + if (poll_retx_timer != NULL) { + if (poll_retx_timer->is_expired()) { + // re-arm timer (will be stopped when status PDU is received) + poll_retx_timer->reset(); + poll_retx_timer->run(); + return true; } - - debug_state(); } -} -/**************************************************************************** - * Helpers - ***************************************************************************/ - -bool rlc_am::poll_required() -{ - if(cfg.poll_pdu > 0 && pdu_without_poll > (uint32_t)cfg.poll_pdu) - return true; - if(cfg.poll_byte > 0 && byte_without_poll > (uint32_t)cfg.poll_byte) - return true; - if(poll_retx()) + if (tx_window.size() >= RLC_AM_WINDOW_SIZE) { return true; + } - if(tx_sdu_queue.size() == 0 && retx_queue.size() == 0) + if (tx_sdu_queue.size() == 0 && retx_queue.empty()) { return true; + } /* According to 5.2.2.1 in 36.322 v13.3.0 a poll should be requested if * the entire AM window is unacknowledged, i.e. no new PDU can be transmitted. * However, it seems more appropiate to request more often if polling * is disabled otherwise, e.g. every N PDUs. */ - if (cfg.poll_pdu == 0 && cfg.poll_byte == 0 && vt_s % poll_periodicity == 0) + if (cfg.poll_pdu == 0 && cfg.poll_byte == 0 && vt_s % poll_periodicity == 0) { return true; - - return false; -} - -int rlc_am::prepare_status() -{ - status.N_nack = 0; - status.ack_sn = vr_ms; - - // We don't use segment NACKs - just NACK the full PDU - - uint32_t i = vr_r; - while(RX_MOD_BASE(i) < RX_MOD_BASE(vr_ms)) - { - if(rx_window.find(i) == rx_window.end()) - status.nacks[status.N_nack++].nack_sn = i; - i = (i + 1)%MOD; } - return rlc_am_packed_length(&status); + return false; } -int rlc_am::build_status_pdu(uint8_t *payload, uint32_t nof_bytes) +int rlc_am::rlc_am_tx::build_status_pdu(uint8_t *payload, uint32_t nof_bytes) { - int pdu_len = rlc_am_packed_length(&status); - if(pdu_len > 0 && nof_bytes >= (uint32_t)pdu_len) - { + int pdu_len = parent->rx.get_status(&tx_status); + if (pdu_len > 0 && nof_bytes >= static_cast(pdu_len)) { log->info("%s Tx status PDU - %s\n", - rrc->get_rb_name(lcid).c_str(), rlc_am_to_string(&status).c_str()); + RB_NAME, rlc_am_to_string(&tx_status).c_str()); - do_status = false; - poll_received = false; + parent->rx.reset_status(); - if(cfg.t_status_prohibit > 0) - status_prohibit_timeout.start(cfg.t_status_prohibit); + if (cfg.t_status_prohibit > 0 && status_prohibit_timer != NULL) { + status_prohibited = true; + + // re-arm timer + status_prohibit_timer->reset(); + status_prohibit_timer->run(); + } debug_state(); - return rlc_am_write_status_pdu(&status, payload); - }else{ + pdu_len = rlc_am_write_status_pdu(&tx_status, payload); + } else{ log->warning("%s Cannot tx status PDU - %d bytes available, %d bytes required\n", - rrc->get_rb_name(lcid).c_str(), nof_bytes, pdu_len); - return 0; + RB_NAME, nof_bytes, pdu_len); + pdu_len = 0; } + + return pdu_len; } -int rlc_am::build_retx_pdu(uint8_t *payload, uint32_t nof_bytes) +int rlc_am::rlc_am_tx::build_retx_pdu(uint8_t *payload, uint32_t nof_bytes) { // Check there is at least 1 element before calling front() if (retx_queue.empty()) { @@ -583,8 +632,9 @@ int rlc_am::build_retx_pdu(uint8_t *payload, uint32_t nof_bytes) retx_queue.pop_front(); return -1; } - if(retx.is_segment || req_size > (int)nof_bytes) { - log->debug("%s build_retx_pdu - resegmentation required\n", rrc->get_rb_name(lcid).c_str()); + + if (retx.is_segment || req_size > static_cast(nof_bytes)) { + log->debug("%s build_retx_pdu - resegmentation required\n", RB_NAME); return build_segment(payload, nof_bytes, retx); } @@ -595,15 +645,17 @@ int rlc_am::build_retx_pdu(uint8_t *payload, uint32_t nof_bytes) // Set poll bit pdu_without_poll++; byte_without_poll += (tx_window[retx.sn].buf->N_bytes + rlc_am_packed_length(&new_header)); - log->info("%s pdu_without_poll: %d\n", rrc->get_rb_name(lcid).c_str(), pdu_without_poll); - log->info("%s byte_without_poll: %d\n", rrc->get_rb_name(lcid).c_str(), byte_without_poll); - if(poll_required()) - { + log->info("%s pdu_without_poll: %d\n", RB_NAME, pdu_without_poll); + log->info("%s byte_without_poll: %d\n", RB_NAME, byte_without_poll); + if (poll_required()) { new_header.p = 1; poll_sn = vt_s; pdu_without_poll = 0; byte_without_poll = 0; - poll_retx_timeout.start(cfg.t_poll_retx); + if (poll_retx_timer != NULL) { + poll_retx_timer->reset(); + poll_retx_timer->run(); + } } uint8_t *ptr = payload; @@ -612,24 +664,27 @@ int rlc_am::build_retx_pdu(uint8_t *payload, uint32_t nof_bytes) retx_queue.pop_front(); tx_window[retx.sn].retx_count++; - if(tx_window[retx.sn].retx_count >= cfg.max_retx_thresh) - rrc->max_retx_attempted(); + if (tx_window[retx.sn].retx_count >= cfg.max_retx_thresh) { + log->warning("%s Signaling max number of reTx=%d for for PDU %d\n", + RB_NAME, tx_window[retx.sn].retx_count, retx.sn); + parent->rrc->max_retx_attempted(); + } + log->info("%s Retx PDU scheduled for tx. SN: %d, retx count: %d\n", - rrc->get_rb_name(lcid).c_str(), retx.sn, tx_window[retx.sn].retx_count); + RB_NAME, retx.sn, tx_window[retx.sn].retx_count); debug_state(); return (ptr-payload) + tx_window[retx.sn].buf->N_bytes; } -int rlc_am::build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t retx) -{ - if (!tx_window[retx.sn].buf) { +int rlc_am::rlc_am_tx::build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t retx) { + if (tx_window[retx.sn].buf == NULL) { log->error("In build_segment: retx.sn=%d has null buffer\n", retx.sn); return 0; } - if(!retx.is_segment){ + if (!retx.is_segment) { retx.so_start = 0; - retx.so_end = tx_window[retx.sn].buf->N_bytes; + retx.so_end = tx_window[retx.sn].buf->N_bytes; } // Construct new header @@ -638,28 +693,30 @@ int rlc_am::build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t r pdu_without_poll++; byte_without_poll += (tx_window[retx.sn].buf->N_bytes + rlc_am_packed_length(&new_header)); - log->info("%s pdu_without_poll: %d\n", rrc->get_rb_name(lcid).c_str(), pdu_without_poll); - log->info("%s byte_without_poll: %d\n", rrc->get_rb_name(lcid).c_str(), byte_without_poll); - - new_header.dc = RLC_DC_FIELD_DATA_PDU; - new_header.rf = 1; - new_header.fi = RLC_FI_FIELD_NOT_START_OR_END_ALIGNED; - new_header.sn = old_header.sn; - new_header.lsf = 0; - new_header.so = retx.so_start; + log->info("%s pdu_without_poll: %d\n", RB_NAME, pdu_without_poll); + log->info("%s byte_without_poll: %d\n", RB_NAME, byte_without_poll); + + new_header.dc = RLC_DC_FIELD_DATA_PDU; + new_header.rf = 1; + new_header.fi = RLC_FI_FIELD_NOT_START_OR_END_ALIGNED; + new_header.sn = old_header.sn; + new_header.lsf = 0; + new_header.so = retx.so_start; new_header.N_li = 0; - new_header.p = 0; - if(poll_required()) - { - log->debug("%s setting poll bit to request status\n", rrc->get_rb_name(lcid).c_str()); - new_header.p = 1; - poll_sn = vt_s; - pdu_without_poll = 0; + new_header.p = 0; + if (poll_required()) { + log->debug("%s setting poll bit to request status\n", RB_NAME); + new_header.p = 1; + poll_sn = vt_s; + pdu_without_poll = 0; byte_without_poll = 0; - poll_retx_timeout.start(cfg.t_poll_retx); + if (poll_retx_timer != NULL) { + poll_retx_timer->reset(); + poll_retx_timer->run(); + } } - uint32_t head_len = 0; + uint32_t head_len = 0; uint32_t pdu_space = 0; head_len = rlc_am_packed_length(&new_header); @@ -668,48 +725,55 @@ int rlc_am::build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t r head_len += 2; } - if(nof_bytes <= head_len) - { + if (nof_bytes <= head_len) { log->warning("%s Cannot build a PDU segment - %d bytes available, %d bytes required for header\n", - rrc->get_rb_name(lcid).c_str(), nof_bytes, head_len); + RB_NAME, nof_bytes, head_len); return 0; } - pdu_space = nof_bytes-head_len; - if(pdu_space < (retx.so_end-retx.so_start)) - retx.so_end = retx.so_start+pdu_space; + pdu_space = nof_bytes - head_len; + if (pdu_space < (retx.so_end - retx.so_start)) { + retx.so_end = retx.so_start + pdu_space; + } // Need to rebuild the li table & update fi based on so_start and so_end - if(retx.so_start == 0 && rlc_am_start_aligned(old_header.fi)) + if (retx.so_start == 0 && rlc_am_start_aligned(old_header.fi)) { new_header.fi &= RLC_FI_FIELD_NOT_END_ALIGNED; // segment is start aligned + } uint32_t lower = 0; uint32_t upper = 0; uint32_t li = 0; for(uint32_t i=0; i= retx.so_end) + if (lower >= retx.so_end) { break; + } - if(pdu_space <= 2) + if (pdu_space <= 2) { break; + } upper += old_header.li[i]; head_len = rlc_am_packed_length(&new_header); pdu_space = nof_bytes-head_len; - if(pdu_space < (retx.so_end-retx.so_start)) - retx.so_end = retx.so_start+pdu_space; + if(pdu_space < (retx.so_end-retx.so_start)) { + retx.so_end = retx.so_start + pdu_space; + } if(upper > retx.so_start && lower < retx.so_end) { // Current SDU is needed li = upper - lower; - if(upper > retx.so_end) + if (upper > retx.so_end) { li -= upper - retx.so_end; - if(lower < retx.so_start) + } + if (lower < retx.so_start) { li -= retx.so_start - lower; - if(lower > 0 && lower == retx.so_start) + } + if (lower > 0 && lower == retx.so_start) { new_header.fi &= RLC_FI_FIELD_NOT_END_ALIGNED; // segment start is aligned with this SDU - if(upper == retx.so_end) { + } + if (upper == retx.so_end) { new_header.fi &= RLC_FI_FIELD_NOT_START_ALIGNED; // segment end is aligned with this SDU } new_header.li[new_header.N_li++] = li; @@ -729,15 +793,17 @@ int rlc_am::build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t r if(tx_window[retx.sn].buf->N_bytes == retx.so_end) { retx_queue.pop_front(); new_header.lsf = 1; - if(rlc_am_end_aligned(old_header.fi)) + if(rlc_am_end_aligned(old_header.fi)) { new_header.fi &= RLC_FI_FIELD_NOT_START_ALIGNED; // segment is end aligned + } } else if(retx_queue.front().so_end == retx.so_end) { retx_queue.pop_front(); } else { retx_queue.front().is_segment = true; retx_queue.front().so_start = retx.so_end; - if(new_header.N_li > 0) + if (new_header.N_li > 0) { new_header.N_li--; + } } // Write header and pdu @@ -748,36 +814,36 @@ int rlc_am::build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t r memcpy(ptr, data, len); log->info("%s Retx PDU segment scheduled for tx. SN: %d, SO: %d\n", - rrc->get_rb_name(lcid).c_str(), retx.sn, retx.so_start); + RB_NAME, retx.sn, retx.so_start); debug_state(); int pdu_len = (ptr-payload) + len; - if(pdu_len > (int)nof_bytes) { + if (pdu_len > static_cast(nof_bytes)) { log->error("%s Retx PDU segment length error. Available: %d, Used: %d\n", - rrc->get_rb_name(lcid).c_str(), nof_bytes, pdu_len); + RB_NAME, nof_bytes, pdu_len); log->debug("%s Retx PDU segment length error. Header len: %ld, Payload len: %d, N_li: %d\n", - rrc->get_rb_name(lcid).c_str(), (ptr-payload), len, new_header.N_li); + RB_NAME, (ptr-payload), len, new_header.N_li); } return pdu_len; } -int rlc_am::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) +int rlc_am::rlc_am_tx::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) { - if(!tx_sdu && tx_sdu_queue.size() == 0) + if(tx_sdu == NULL && tx_sdu_queue.size() == 0) { log->info("No data available to be sent\n"); return 0; } // do not build any more PDU if window is already full - if (!tx_sdu && tx_window.size() >= RLC_AM_WINDOW_SIZE) { + if (tx_sdu == NULL && tx_window.size() >= RLC_AM_WINDOW_SIZE) { log->info("Tx window full.\n"); return 0; } byte_buffer_t *pdu = pool_allocate_blocking; - if (!pdu) { + if (pdu == NULL) { #ifdef RLC_AM_BUFFER_DEBUG log->console("Fatal Error: Could not allocate PDU in build_data_pdu()\n"); log->console("tx_window size: %d PDUs\n", tx_window.size()); @@ -815,17 +881,16 @@ int rlc_am::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) if(pdu_space <= head_len + 1) { log->warning("%s Cannot build a PDU - %d bytes available, %d bytes required for header\n", - rrc->get_rb_name(lcid).c_str(), nof_bytes, head_len); + RB_NAME, nof_bytes, head_len); pool->deallocate(pdu); return 0; } log->debug("%s Building PDU - pdu_space: %d, head_len: %d \n", - rrc->get_rb_name(lcid).c_str(), pdu_space, head_len); + RB_NAME, pdu_space, head_len); // Check for SDU segment - if(tx_sdu) - { + if (tx_sdu != NULL) { to_move = ((pdu_space-head_len) >= tx_sdu->N_bytes) ? tx_sdu->N_bytes : pdu_space-head_len; memcpy(pdu_ptr, tx_sdu->msg, to_move); last_li = to_move; @@ -836,27 +901,28 @@ int rlc_am::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) if(tx_sdu->N_bytes == 0) { log->debug("%s Complete SDU scheduled for tx. Stack latency: %ld us\n", - rrc->get_rb_name(lcid).c_str(), tx_sdu->get_latency_us()); + RB_NAME, tx_sdu->get_latency_us()); pool->deallocate(tx_sdu); tx_sdu = NULL; } - if(pdu_space > to_move) + if (pdu_space > to_move) { pdu_space -= to_move; - else + } else { pdu_space = 0; + } header.fi |= RLC_FI_FIELD_NOT_START_ALIGNED; // First byte does not correspond to first byte of SDU log->debug("%s Building PDU - added SDU segment (len:%d) - pdu_space: %d, head_len: %d \n", - rrc->get_rb_name(lcid).c_str(), to_move, pdu_space, head_len); + RB_NAME, to_move, pdu_space, head_len); } // Pull SDUs from queue - while(pdu_space > head_len + 1 && tx_sdu_queue.size() > 0) - { - if(last_li > 0) + while (pdu_space > head_len + 1 && tx_sdu_queue.size() > 0) { + if (last_li > 0) { header.li[header.N_li++] = last_li; + } head_len = rlc_am_packed_length(&header); - if(head_len >= pdu_space) { + if (head_len >= pdu_space) { header.N_li--; break; } @@ -871,17 +937,18 @@ int rlc_am::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) if(tx_sdu->N_bytes == 0) { log->debug("%s Complete SDU scheduled for tx. Stack latency: %ld us\n", - rrc->get_rb_name(lcid).c_str(), tx_sdu->get_latency_us()); + RB_NAME, tx_sdu->get_latency_us()); pool->deallocate(tx_sdu); tx_sdu = NULL; } - if(pdu_space > to_move) + if(pdu_space > to_move) { pdu_space -= to_move; - else + } else { pdu_space = 0; + } log->debug("%s Building PDU - added SDU segment (len:%d) - pdu_space: %d, head_len: %d \n", - rrc->get_rb_name(lcid).c_str(), to_move, pdu_space, head_len); + RB_NAME, to_move, pdu_space, head_len); } // Make sure, at least one SDU (segment) has been added until this point @@ -890,22 +957,26 @@ int rlc_am::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) return 0; } - if(tx_sdu) + if (tx_sdu != NULL) { header.fi |= RLC_FI_FIELD_NOT_END_ALIGNED; // Last byte does not correspond to last byte of SDU + } // Set Poll bit pdu_without_poll++; byte_without_poll += (pdu->N_bytes + head_len); - log->debug("%s pdu_without_poll: %d\n", rrc->get_rb_name(lcid).c_str(), pdu_without_poll); - log->debug("%s byte_without_poll: %d\n", rrc->get_rb_name(lcid).c_str(), byte_without_poll); + log->debug("%s pdu_without_poll: %d\n", RB_NAME, pdu_without_poll); + log->debug("%s byte_without_poll: %d\n", RB_NAME, byte_without_poll); if(poll_required()) { - log->debug("%s setting poll bit to request status\n", rrc->get_rb_name(lcid).c_str()); + log->debug("%s setting poll bit to request status\n", RB_NAME); header.p = 1; poll_sn = vt_s; pdu_without_poll = 0; byte_without_poll = 0; - poll_retx_timeout.start(cfg.t_poll_retx); + if (poll_retx_timer != NULL) { + poll_retx_timer->reset(); + poll_retx_timer->run(); + } } // Set SN @@ -918,47 +989,352 @@ int rlc_am::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) tx_window[header.sn].is_acked = false; tx_window[header.sn].retx_count = 0; - uint8_t *ptr = payload; - rlc_am_write_data_pdu_header(&header, &ptr); - memcpy(ptr, pdu->msg, pdu->N_bytes); - log->info_hex(payload, pdu->N_bytes, "%s PDU scheduled for tx. SN: %d (%d B)\n", rrc->get_rb_name(lcid).c_str(), header.sn, pdu->N_bytes); + uint8_t *ptr = payload; + rlc_am_write_data_pdu_header(&header, &ptr); + memcpy(ptr, pdu->msg, pdu->N_bytes); + log->info_hex(payload, pdu->N_bytes, "%s PDU scheduled for tx. SN: %d (%d B)\n", RB_NAME, header.sn, pdu->N_bytes); + + debug_state(); + return (ptr-payload) + pdu->N_bytes; +} + +void rlc_am::rlc_am_tx::handle_control_pdu(uint8_t *payload, uint32_t nof_bytes) +{ + pthread_mutex_lock(&mutex); + + log->info_hex(payload, nof_bytes, "%s Rx control PDU", RB_NAME); + + rlc_status_pdu_t status; + rlc_am_read_status_pdu(payload, nof_bytes, &status); + + log->info("%s Rx Status PDU: %s\n", RB_NAME, rlc_am_to_string(&status).c_str()); + + if (poll_retx_timer != NULL) { + poll_retx_timer->reset(); + } + + // flush retx queue to avoid unordered SNs, we expect the Rx to request lost PDUs again + if (status.N_nack > 0) { + retx_queue.clear(); + } + + // Handle ACKs and NACKs + std::map::iterator it; + bool update_vt_a = true; + uint32_t i = vt_a; + + while(TX_MOD_BASE(i) < TX_MOD_BASE(status.ack_sn) && + TX_MOD_BASE(i) < TX_MOD_BASE(vt_s)) + { + bool nack = false; + for(uint32_t j=0;jsecond.buf->N_bytes; + + if(status.nacks[j].has_so) { + // sanity check + if (status.nacks[j].so_start >= it->second.buf->N_bytes) { + // print error but try to send original PDU again + log->info("SO_start is larger than original PDU (%d >= %d)\n", + status.nacks[j].so_start, + it->second.buf->N_bytes); + status.nacks[j].so_start = 0; + } + + // check for special SO_end value + if(status.nacks[j].so_end == 0x7FFF) { + status.nacks[j].so_end = it->second.buf->N_bytes; + }else{ + retx.so_end = status.nacks[j].so_end + 1; + } + + if(status.nacks[j].so_start < it->second.buf->N_bytes && + status.nacks[j].so_end <= it->second.buf->N_bytes) { + retx.is_segment = true; + retx.so_start = status.nacks[j].so_start; + } else { + log->warning("%s invalid segment NACK received for SN %d. so_start: %d, so_end: %d, N_bytes: %d\n", + RB_NAME, i, status.nacks[j].so_start, status.nacks[j].so_end, it->second.buf->N_bytes); + } + } + retx_queue.push_back(retx); + } + } + } + } + + if(!nack) { + //ACKed SNs get marked and removed from tx_window if possible + if(tx_window.count(i) > 0) { + it = tx_window.find(i); + if (it != tx_window.end()) { + if(update_vt_a) { + if (it->second.buf != NULL) { + pool->deallocate(it->second.buf); + it->second.buf = 0; + } + tx_window.erase(it); + vt_a = (vt_a + 1)%MOD; + vt_ms = (vt_ms + 1)%MOD; + } + } + } + } + i = (i+1)%MOD; + } + + debug_state(); + + pthread_mutex_unlock(&mutex); +} + + +void rlc_am::rlc_am_tx::debug_state() +{ + log->debug("%s vt_a = %d, vt_ms = %d, vt_s = %d, poll_sn = %d\n", + RB_NAME, vt_a, vt_ms, vt_s, poll_sn); +} + + +int rlc_am::rlc_am_tx::required_buffer_size(rlc_amd_retx_t retx) +{ + if (!retx.is_segment) { + if (tx_window.count(retx.sn) == 1) { + if (tx_window[retx.sn].buf) { + return rlc_am_packed_length(&tx_window[retx.sn].header) + tx_window[retx.sn].buf->N_bytes; + } else { + log->warning("retx.sn=%d has null ptr in required_buffer_size()\n", retx.sn); + return -1; + } + } else { + log->warning("retx.sn=%d does not exist in required_buffer_size()\n", retx.sn); + return -1; + } + } + + // Construct new header + rlc_amd_pdu_header_t new_header; + rlc_amd_pdu_header_t old_header = tx_window[retx.sn].header; + + new_header.dc = RLC_DC_FIELD_DATA_PDU; + new_header.rf = 1; + new_header.p = 0; + new_header.fi = RLC_FI_FIELD_NOT_START_OR_END_ALIGNED; + new_header.sn = old_header.sn; + new_header.lsf = 0; + new_header.so = retx.so_start; + new_header.N_li = 0; + + // Need to rebuild the li table & update fi based on so_start and so_end + if(retx.so_start != 0 && rlc_am_start_aligned(old_header.fi)) { + new_header.fi &= RLC_FI_FIELD_NOT_END_ALIGNED; // segment is start aligned + } + + uint32_t lower = 0; + uint32_t upper = 0; + uint32_t li = 0; + + for(uint32_t i=0; i= retx.so_end) { + break; + } + + upper += old_header.li[i]; + + if (upper > retx.so_start && lower < retx.so_end) { // Current SDU is needed + li = upper - lower; + if (upper > retx.so_end) { + li -= upper - retx.so_end; + } + if (lower < retx.so_start) { + li -= retx.so_start - lower; + } + if (lower > 0 && lower == retx.so_start) { + new_header.fi &= RLC_FI_FIELD_NOT_END_ALIGNED; // segment start is aligned with this SDU + } + if (upper == retx.so_end) { + new_header.fi &= RLC_FI_FIELD_NOT_START_ALIGNED; // segment end is aligned with this SDU + } + new_header.li[new_header.N_li++] = li; + } + + lower += old_header.li[i]; + } + +// if(tx_window[retx.sn].buf->N_bytes != retx.so_end) { +// if(new_header.N_li > 0) +// new_header.N_li--; // No li for last segment +// } + + return rlc_am_packed_length(&new_header) + (retx.so_end-retx.so_start); +} + +bool rlc_am::rlc_am_tx::retx_queue_has_sn(uint32_t sn) +{ + std::deque::iterator q_it; + for (q_it = retx_queue.begin(); q_it != retx_queue.end(); ++q_it) { + if (q_it->sn == sn) { + return true; + } + } + return false; +} + + + +/**************************************************************************** + * Rx subclass implementation + ***************************************************************************/ + +rlc_am::rlc_am_rx::rlc_am_rx(rlc_am* parent_) + :parent(parent_) + ,pool(byte_buffer_pool::get_instance()) + ,log(NULL) + ,cfg() + ,reordering_timer(NULL) + ,reordering_timer_id(0) + ,vr_r(0) + ,vr_mr(RLC_AM_WINDOW_SIZE) + ,vr_x(0) + ,vr_ms(0) + ,vr_h(0) + ,num_rx_bytes(0) + ,poll_received(false) + ,do_status(false) + ,rx_sdu(NULL) +{ + pthread_mutex_init(&mutex, NULL); +} + +rlc_am::rlc_am_rx::~rlc_am_rx() +{ + pthread_mutex_destroy(&mutex); +} + +void rlc_am::rlc_am_rx::init() +{ + log = parent->log; + if (parent->mac_timers != NULL) { + reordering_timer_id = parent->mac_timers->timer_get_unique_id(); + reordering_timer = parent->mac_timers->timer_get(reordering_timer_id); + } +} + +bool rlc_am::rlc_am_rx::configure(srslte_rlc_am_config_t cfg_) +{ + // TODO: add config checks + cfg = cfg_; + + // check timers + if (reordering_timer == NULL) { + return false; + } + + // configure timer + if (cfg.t_reordering > 0) { + reordering_timer->set(this, static_cast(cfg.t_reordering)); + } + + return true; +} + +void rlc_am::rlc_am_rx::reestablish() +{ + stop(); +} + +void rlc_am::rlc_am_rx::stop() +{ + pthread_mutex_lock(&mutex); + + if (parent->mac_timers != NULL && reordering_timer != NULL) { + reordering_timer->stop(); + parent->mac_timers->timer_release_id(reordering_timer_id); + reordering_timer = NULL; + } + + if (rx_sdu != NULL) { + pool->deallocate(rx_sdu); + rx_sdu = NULL; + } + + vr_r = 0; + vr_mr = RLC_AM_WINDOW_SIZE; + vr_x = 0; + vr_ms = 0; + vr_h = 0; + + poll_received = false; + do_status = false; + + // Drop all messages in RX segments + std::map::iterator rxsegsit; + std::list::iterator segit; + for(rxsegsit = rx_segments.begin(); rxsegsit != rx_segments.end(); rxsegsit++) { + std::list l = rxsegsit->second.segments; + for(segit = l.begin(); segit != l.end(); segit++) { + pool->deallocate(segit->buf); + } + l.clear(); + } + rx_segments.clear(); + + // Drop all messages in RX window + std::map::iterator rxit; + for(rxit = rx_window.begin(); rxit != rx_window.end(); rxit++) { + pool->deallocate(rxit->second.buf); + } + rx_window.clear(); - debug_state(); - return (ptr-payload) + pdu->N_bytes; + pthread_mutex_unlock(&mutex); } -void rlc_am::handle_data_pdu(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header) + +void rlc_am::rlc_am_rx::handle_data_pdu(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header) { std::map::iterator it; log->info_hex(payload, nof_bytes, "%s Rx data PDU SN: %d (%d B), %s", - rrc->get_rb_name(lcid).c_str(), header.sn, nof_bytes, rlc_fi_field_text[header.fi]); + RB_NAME, + header.sn, + nof_bytes, + rlc_fi_field_text[header.fi]); if(!inside_rx_window(header.sn)) { if(header.p) { - log->info("%s Status packet requested through polling bit\n", rrc->get_rb_name(lcid).c_str()); + log->info("%s Status packet requested through polling bit\n", RB_NAME); do_status = true; } log->info("%s SN: %d outside rx window [%d:%d] - discarding\n", - rrc->get_rb_name(lcid).c_str(), header.sn, vr_r, vr_mr); + RB_NAME, header.sn, vr_r, vr_mr); return; } it = rx_window.find(header.sn); if(rx_window.end() != it) { if(header.p) { - log->info("%s Status packet requested through polling bit\n", rrc->get_rb_name(lcid).c_str()); + log->info("%s Status packet requested through polling bit\n", RB_NAME); do_status = true; } log->info("%s Discarding duplicate SN: %d\n", - rrc->get_rb_name(lcid).c_str(), header.sn); + RB_NAME, header.sn); return; } // Write to rx window rlc_amd_rx_pdu_t pdu; pdu.buf = pool_allocate_blocking; - if (!pdu.buf) { + if (pdu.buf == NULL) { #ifdef RLC_AM_BUFFER_DEBUG log->console("Fatal Error: Couldn't allocate PDU in handle_data_pdu().\n"); exit(-1); @@ -971,7 +1347,7 @@ void rlc_am::handle_data_pdu(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_h // check available space for payload if (nof_bytes > pdu.buf->get_tailroom()) { log->error("%s Discarding SN: %d of size %d B (available space %d B)\n", - rrc->get_rb_name(lcid).c_str(), header.sn, nof_bytes, pdu.buf->get_tailroom()); + RB_NAME, header.sn, nof_bytes, pdu.buf->get_tailroom()); pool->deallocate(pdu.buf); return; } @@ -982,21 +1358,20 @@ void rlc_am::handle_data_pdu(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_h rx_window[header.sn] = pdu; // Update vr_h - if(RX_MOD_BASE(header.sn) >= RX_MOD_BASE(vr_h)) - vr_h = (header.sn + 1)%MOD; + if(RX_MOD_BASE(header.sn) >= RX_MOD_BASE(vr_h)) { + vr_h = (header.sn + 1) % MOD; + } // Update vr_ms it = rx_window.find(vr_ms); - while(rx_window.end() != it) - { + while(rx_window.end() != it) { vr_ms = (vr_ms + 1)%MOD; it = rx_window.find(vr_ms); } // Check poll bit - if(header.p) - { - log->info("%s Status packet requested through polling bit\n", rrc->get_rb_name(lcid).c_str()); + if (header.p) { + log->info("%s Status packet requested through polling bit\n", RB_NAME); poll_received = true; // 36.322 v10 Section 5.2.3 @@ -1012,46 +1387,47 @@ void rlc_am::handle_data_pdu(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_h reassemble_rx_sdus(); // Update reordering variables and timers (36.322 v10.0.0 Section 5.1.3.2.3) - if(reordering_timeout.is_running()) - { - if(vr_x == vr_r || (!inside_rx_window(vr_x) && vr_x != vr_mr)) - { - reordering_timeout.reset(); + if (reordering_timer != NULL) { + if (reordering_timer->is_running()) { + if(vr_x == vr_r || (!inside_rx_window(vr_x) && vr_x != vr_mr)) { + reordering_timer->reset(); + } } - } - if(!reordering_timeout.is_running()) - { - if(RX_MOD_BASE(vr_h) > RX_MOD_BASE(vr_r)) - { - reordering_timeout.start(cfg.t_reordering); - vr_x = vr_h; + + if (not reordering_timer->is_running()) { + if(RX_MOD_BASE(vr_h) > RX_MOD_BASE(vr_r)) { + reordering_timer->reset(); + reordering_timer->run(); + vr_x = vr_h; + } } } debug_state(); } -void rlc_am::handle_data_pdu_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header) + +void rlc_am::rlc_am_rx::handle_data_pdu_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header) { std::map::iterator it; log->info_hex(payload, nof_bytes, "%s Rx data PDU segment. SN: %d, SO: %d", - rrc->get_rb_name(lcid).c_str(), header.sn, header.so); + RB_NAME, header.sn, header.so); // Check inside rx window if(!inside_rx_window(header.sn)) { if(header.p) { - log->info("%s Status packet requested through polling bit\n", rrc->get_rb_name(lcid).c_str()); + log->info("%s Status packet requested through polling bit\n", RB_NAME); do_status = true; } log->info("%s SN: %d outside rx window [%d:%d] - discarding\n", - rrc->get_rb_name(lcid).c_str(), header.sn, vr_r, vr_mr); + RB_NAME, header.sn, vr_r, vr_mr); return; } rlc_amd_rx_pdu_t segment; segment.buf = pool_allocate_blocking; - if (!segment.buf) { + if (segment.buf == NULL) { #ifdef RLC_AM_BUFFER_DEBUG log->console("Fatal Error: Couldn't allocate PDU in handle_data_pdu_segment().\n"); exit(-1); @@ -1070,7 +1446,7 @@ void rlc_am::handle_data_pdu_segment(uint8_t *payload, uint32_t nof_bytes, rlc_a if(rx_segments.end() != it) { if(header.p) { - log->info("%s Status packet requested through polling bit\n", rrc->get_rb_name(lcid).c_str()); + log->info("%s Status packet requested through polling bit\n", RB_NAME); do_status = true; } @@ -1092,15 +1468,14 @@ void rlc_am::handle_data_pdu_segment(uint8_t *payload, uint32_t nof_bytes, rlc_a pdu.segments.push_back(segment); rx_segments[header.sn] = pdu; - // Update vr_h - if(RX_MOD_BASE(header.sn) >= RX_MOD_BASE(vr_h)) - vr_h = (header.sn + 1)%MOD; + if (RX_MOD_BASE(header.sn) >= RX_MOD_BASE(vr_h)) { + vr_h = (header.sn + 1) % MOD; + } // Check poll bit - if(header.p) - { - log->info("%s Status packet requested through polling bit\n", rrc->get_rb_name(lcid).c_str()); + if (header.p) { + log->info("%s Status packet requested through polling bit\n", RB_NAME); poll_received = true; // 36.322 v10 Section 5.2.3 @@ -1118,107 +1493,13 @@ void rlc_am::handle_data_pdu_segment(uint8_t *payload, uint32_t nof_bytes, rlc_a debug_state(); } -void rlc_am::handle_control_pdu(uint8_t *payload, uint32_t nof_bytes) -{ - log->info_hex(payload, nof_bytes, "%s Rx control PDU", rrc->get_rb_name(lcid).c_str()); - - rlc_status_pdu_t status; - rlc_am_read_status_pdu(payload, nof_bytes, &status); - - log->info("%s Rx Status PDU: %s\n", rrc->get_rb_name(lcid).c_str(), rlc_am_to_string(&status).c_str()); - - poll_retx_timeout.reset(); - - // flush retx queue to avoid unordered SNs, we expect the Rx to request lost PDUs again - if (status.N_nack > 0) { - retx_queue.clear(); - } - - // Handle ACKs and NACKs - std::map::iterator it; - bool update_vt_a = true; - uint32_t i = vt_a; - - while(TX_MOD_BASE(i) < TX_MOD_BASE(status.ack_sn) && - TX_MOD_BASE(i) < TX_MOD_BASE(vt_s)) - { - bool nack = false; - for(uint32_t j=0;jsecond.buf->N_bytes; - - if(status.nacks[j].has_so) { - // sanity check - if (status.nacks[j].so_start >= it->second.buf->N_bytes) { - // print error but try to send original PDU again - log->info("SO_start is larger than original PDU (%d >= %d)\n", - status.nacks[j].so_start, - it->second.buf->N_bytes); - status.nacks[j].so_start = 0; - } - - // check for special SO_end value - if(status.nacks[j].so_end == 0x7FFF) { - status.nacks[j].so_end = it->second.buf->N_bytes; - }else{ - retx.so_end = status.nacks[j].so_end + 1; - } - - if(status.nacks[j].so_start < it->second.buf->N_bytes && - status.nacks[j].so_end <= it->second.buf->N_bytes) { - retx.is_segment = true; - retx.so_start = status.nacks[j].so_start; - } else { - log->warning("%s invalid segment NACK received for SN %d. so_start: %d, so_end: %d, N_bytes: %d\n", - rrc->get_rb_name(lcid).c_str(), i, status.nacks[j].so_start, status.nacks[j].so_end, it->second.buf->N_bytes); - } - } - - retx.sn = i; - retx_queue.push_back(retx); - } - } - } - } - if(!nack) { - //ACKed SNs get marked and removed from tx_window if possible - if(tx_window.count(i) > 0) { - it = tx_window.find(i); - if (it != tx_window.end()) { - if(update_vt_a) { - if(it->second.buf) { - pool->deallocate(it->second.buf); - it->second.buf = 0; - } - tx_window.erase(it); - vt_a = (vt_a + 1)%MOD; - vt_ms = (vt_ms + 1)%MOD; - } - } - } - } - i = (i+1)%MOD; - } - - debug_state(); -} - -void rlc_am::reassemble_rx_sdus() +void rlc_am::rlc_am_rx::reassemble_rx_sdus() { uint32_t len = 0; - if(!rx_sdu) { + if (rx_sdu == NULL) { rx_sdu = pool_allocate_blocking; - if (!rx_sdu) { + if (rx_sdu == NULL) { #ifdef RLC_AM_BUFFER_DEBUG log->console("Fatal Error: Could not allocate PDU in reassemble_rx_sdus() (1)\n"); exit(-1); @@ -1247,12 +1528,12 @@ void rlc_am::reassemble_rx_sdus() rx_sdu->N_bytes += len; rx_window[vr_r].buf->msg += len; rx_window[vr_r].buf->N_bytes -= len; - log->info_hex(rx_sdu->msg, rx_sdu->N_bytes, "%s Rx SDU (%d B)", rrc->get_rb_name(lcid).c_str(), rx_sdu->N_bytes); + log->info_hex(rx_sdu->msg, rx_sdu->N_bytes, "%s Rx SDU (%d B)", RB_NAME, rx_sdu->N_bytes); rx_sdu->set_timestamp(); - pdcp->write_pdu(lcid, rx_sdu); + parent->pdcp->write_pdu(parent->lcid, rx_sdu); rx_sdu = pool_allocate_blocking; - if (!rx_sdu) { + if (rx_sdu == NULL) { #ifdef RLC_AM_BUFFER_DEBUG log->console("Fatal Error: Could not allocate PDU in reassemble_rx_sdus() (2)\n"); exit(-1); @@ -1285,12 +1566,12 @@ void rlc_am::reassemble_rx_sdus() rx_window.erase(vr_r); } - if(rlc_am_end_aligned(rx_window[vr_r].header.fi)) { - log->info_hex(rx_sdu->msg, rx_sdu->N_bytes, "%s Rx SDU (%d B)", rrc->get_rb_name(lcid).c_str(), rx_sdu->N_bytes); + if (rlc_am_end_aligned(rx_window[vr_r].header.fi)) { + log->info_hex(rx_sdu->msg, rx_sdu->N_bytes, "%s Rx SDU (%d B)", RB_NAME, rx_sdu->N_bytes); rx_sdu->set_timestamp(); - pdcp->write_pdu(lcid, rx_sdu); + parent->pdcp->write_pdu(parent->lcid, rx_sdu); rx_sdu = pool_allocate_blocking; - if (!rx_sdu) { + if (rx_sdu == NULL) { #ifdef RLC_AM_BUFFER_DEBUG log->console("Fatal Error: Could not allocate PDU in reassemble_rx_sdus() (3)\n"); exit(-1); @@ -1310,37 +1591,105 @@ exit: } } -bool rlc_am::inside_tx_window(uint16_t sn) +void rlc_am::rlc_am_rx::reset_status() { - if(RX_MOD_BASE(sn) >= RX_MOD_BASE(vt_a) && - RX_MOD_BASE(sn) < RX_MOD_BASE(vt_ms)) - { - return true; - }else{ - return false; + pthread_mutex_lock(&mutex); + do_status = false; + poll_received = false; + pthread_mutex_unlock(&mutex); +} + +bool rlc_am::rlc_am_rx::get_do_status() +{ + return do_status; +} + +uint32_t rlc_am::rlc_am_rx::get_num_rx_bytes() +{ + return num_rx_bytes; +} + +void rlc_am::rlc_am_rx::reset_metrics() +{ + pthread_mutex_lock(&mutex); + num_rx_bytes = 0; + pthread_mutex_unlock(&mutex); +} + +void rlc_am::rlc_am_rx::write_pdu(uint8_t *payload, uint32_t nof_bytes) +{ + if (nof_bytes < 1) return; + + pthread_mutex_lock(&mutex); + num_rx_bytes += nof_bytes; + + if (rlc_am_is_control_pdu(payload)) { + // unlock mutex and pass to Tx subclass + pthread_mutex_unlock(&mutex); + parent->tx.handle_control_pdu(payload, nof_bytes); + } else { + rlc_amd_pdu_header_t header; + rlc_am_read_data_pdu_header(&payload, &nof_bytes, &header); + if (header.rf) { + handle_data_pdu_segment(payload, nof_bytes, header); + } else{ + handle_data_pdu(payload, nof_bytes, header); + } + pthread_mutex_unlock(&mutex); } } -bool rlc_am::inside_rx_window(uint16_t sn) +void rlc_am::rlc_am_rx::timer_expired(uint32_t timeout_id) { - if(RX_MOD_BASE(sn) >= RX_MOD_BASE(vr_r) && - RX_MOD_BASE(sn) < RX_MOD_BASE(vr_mr)) - { - return true; - }else{ - return false; + pthread_mutex_lock(&mutex); + if (reordering_timer != NULL && reordering_timer_id == timeout_id) { + reordering_timer->reset(); + log->debug("%s reordering timeout expiry - updating vr_ms\n", RB_NAME); + + // 36.322 v10 Section 5.1.3.2.4 + vr_ms = vr_x; + std::map::iterator it = rx_window.find(vr_ms); + while (rx_window.end() != it) { + vr_ms = (vr_ms + 1) % MOD; + it = rx_window.find(vr_ms); + } + + if (poll_received) { + do_status = true; + } + + if (RX_MOD_BASE(vr_h) > RX_MOD_BASE(vr_ms)) { + reordering_timer->reset(); + reordering_timer->run(); + vr_x = vr_h; + } + + debug_state(); } + pthread_mutex_unlock(&mutex); } -void rlc_am::debug_state() +// Called from Tx object (packs status PDU and returns length of it) +int rlc_am::rlc_am_rx::get_status(rlc_status_pdu_t* status) { - log->debug("%s vt_a = %d, vt_ms = %d, vt_s = %d, poll_sn = %d " - "vr_r = %d, vr_mr = %d, vr_x = %d, vr_ms = %d, vr_h = %d\n", - rrc->get_rb_name(lcid).c_str(), vt_a, vt_ms, vt_s, poll_sn, - vr_r, vr_mr, vr_x, vr_ms, vr_h); + pthread_mutex_lock(&mutex); + status->N_nack = 0; + status->ack_sn = vr_ms; + + // We don't use segment NACKs - just NACK the full PDU + uint32_t i = vr_r; + while (RX_MOD_BASE(i) < RX_MOD_BASE(vr_ms) && status->N_nack < RLC_AM_WINDOW_SIZE) { + if(rx_window.find(i) == rx_window.end()) { + status->nacks[status->N_nack].nack_sn = i; + status->N_nack++; + } + i = (i + 1)%MOD; + } + pthread_mutex_unlock(&mutex); + return rlc_am_packed_length(status); } -void rlc_am::print_rx_segments() +void rlc_am::rlc_am_rx::print_rx_segments() { std::map::iterator it; std::stringstream ss; @@ -1354,7 +1703,7 @@ void rlc_am::print_rx_segments() log->debug("%s\n", ss.str().c_str()); } -bool rlc_am::add_segment_and_check(rlc_amd_rx_pdu_segments_t *pdu, rlc_amd_rx_pdu_t *segment) +bool rlc_am::rlc_am_rx::add_segment_and_check(rlc_amd_rx_pdu_segments_t *pdu, rlc_amd_rx_pdu_t *segment) { // Check for first segment if(0 == segment->header.so) { @@ -1384,12 +1733,14 @@ bool rlc_am::add_segment_and_check(rlc_amd_rx_pdu_segments_t *pdu, rlc_amd_rx_pd uint32_t so = 0; std::list::iterator it, tmpit; for(it = pdu->segments.begin(); it != pdu->segments.end(); it++) { - if(so != it->header.so) + if (so != it->header.so) { return false; + } so += it->buf->N_bytes; } - if(!pdu->segments.back().header.lsf) + if (!pdu->segments.back().header.lsf) { return false; + } // We have all segments of the PDU - reconstruct and handle rlc_amd_pdu_header_t header; @@ -1435,7 +1786,7 @@ bool rlc_am::add_segment_and_check(rlc_amd_rx_pdu_segments_t *pdu, rlc_amd_rx_pd // Copy data byte_buffer_t *full_pdu = pool_allocate_blocking; - if (!full_pdu) { + if (full_pdu == NULL) { #ifdef RLC_AM_BUFFER_DEBUG log->console("Fatal Error: Could not allocate PDU in add_segment_and_check()\n"); exit(-1); @@ -1454,88 +1805,24 @@ bool rlc_am::add_segment_and_check(rlc_amd_rx_pdu_segments_t *pdu, rlc_amd_rx_pd return true; } -int rlc_am::required_buffer_size(rlc_amd_retx_t retx) +bool rlc_am::rlc_am_rx::inside_rx_window(uint16_t sn) { - if(!retx.is_segment){ - if (tx_window.count(retx.sn)) { - if (tx_window[retx.sn].buf) { - return rlc_am_packed_length(&tx_window[retx.sn].header) + tx_window[retx.sn].buf->N_bytes; - } else { - log->warning("retx.sn=%d has null ptr in required_buffer_size()\n", retx.sn); - return -1; - } - } else { - log->warning("retx.sn=%d does not exist in required_buffer_size()\n", retx.sn); - return -1; - } - } - - // Construct new header - rlc_amd_pdu_header_t new_header; - rlc_amd_pdu_header_t old_header = tx_window[retx.sn].header; - - new_header.dc = RLC_DC_FIELD_DATA_PDU; - new_header.rf = 1; - new_header.p = 0; - new_header.fi = RLC_FI_FIELD_NOT_START_OR_END_ALIGNED; - new_header.sn = old_header.sn; - new_header.lsf = 0; - new_header.so = retx.so_start; - new_header.N_li = 0; - - uint32_t head_len = 0; - - // Need to rebuild the li table & update fi based on so_start and so_end - if(retx.so_start != 0 && rlc_am_start_aligned(old_header.fi)) - new_header.fi &= RLC_FI_FIELD_NOT_END_ALIGNED; // segment is start aligned - - uint32_t lower = 0; - uint32_t upper = 0; - uint32_t li = 0; - - for(uint32_t i=0; i= retx.so_end) - break; - - upper += old_header.li[i]; - - head_len = rlc_am_packed_length(&new_header); - - if(upper > retx.so_start && lower < retx.so_end) { // Current SDU is needed - li = upper - lower; - if(upper > retx.so_end) - li -= upper - retx.so_end; - if(lower < retx.so_start) - li -= retx.so_start - lower; - if(lower > 0 && lower == retx.so_start) - new_header.fi &= RLC_FI_FIELD_NOT_END_ALIGNED; // segment start is aligned with this SDU - if(upper == retx.so_end) { - new_header.fi &= RLC_FI_FIELD_NOT_START_ALIGNED; // segment end is aligned with this SDU - } - new_header.li[new_header.N_li++] = li; - } - - lower += old_header.li[i]; + if(RX_MOD_BASE(sn) >= RX_MOD_BASE(vr_r) && + RX_MOD_BASE(sn) < RX_MOD_BASE(vr_mr)) + { + return true; + }else{ + return false; } - -// if(tx_window[retx.sn].buf->N_bytes != retx.so_end) { -// if(new_header.N_li > 0) -// new_header.N_li--; // No li for last segment -// } - - return rlc_am_packed_length(&new_header) + (retx.so_end-retx.so_start); } -bool rlc_am::retx_queue_has_sn(uint32_t sn) +void rlc_am::rlc_am_rx::debug_state() { - std::deque::iterator q_it; - for(q_it = retx_queue.begin(); q_it != retx_queue.end(); q_it++) { - if(q_it->sn == sn) - return true; - } - return false; + log->debug("%s vr_r = %d, vr_mr = %d, vr_x = %d, vr_ms = %d, vr_h = %d\n", + RB_NAME, vr_r, vr_mr, vr_x, vr_ms, vr_h); } + /**************************************************************************** * Header pack/unpack helper functions * Ref: 3GPP TS 36.322 v10.0.0 Section 6.2.1 @@ -1555,14 +1842,14 @@ void rlc_am_read_data_pdu_header(uint8_t **payload, uint32_t *nof_bytes, rlc_amd uint8_t ext; uint8_t *ptr = *payload; - header->dc = (rlc_dc_field_t)((*ptr >> 7) & 0x01); + header->dc = static_cast((*ptr >> 7) & 0x01); if(RLC_DC_FIELD_DATA_PDU == header->dc) { // Fixed part header->rf = ((*ptr >> 6) & 0x01); header->p = ((*ptr >> 5) & 0x01); - header->fi = (rlc_fi_field_t)((*ptr >> 3) & 0x03); + header->fi = static_cast((*ptr >> 3) & 0x03); ext = ((*ptr >> 2) & 0x01); header->sn = (*ptr & 0x03) << 8; // 2 bits SN ptr++; @@ -1602,8 +1889,9 @@ void rlc_am_read_data_pdu_header(uint8_t **payload, uint32_t *nof_bytes, rlc_amd } // Account for padding if N_li is odd - if(header->N_li%2 == 1) + if (header->N_li%2 == 1) { ptr++; + } *nof_bytes -= ptr-*payload; *payload = ptr; @@ -1639,7 +1927,7 @@ void rlc_am_write_data_pdu_header(rlc_amd_pdu_header_t *header, uint8_t **payloa ptr++; // Segment part - if(header->rf) + if (header->rf) { *ptr = (header->lsf & 0x01) << 7; *ptr |= (header->so & 0x7F00) >> 8; // 7 bits of SO @@ -1670,8 +1958,9 @@ void rlc_am_write_data_pdu_header(rlc_amd_pdu_header_t *header, uint8_t **payloa } } // Pad if N_li is odd - if(header->N_li%2 == 1) + if (header->N_li%2 == 1) { ptr++; + } *payload = ptr; } @@ -1691,7 +1980,7 @@ void rlc_am_read_status_pdu(uint8_t *payload, uint32_t nof_bytes, rlc_status_pdu srslte_bit_unpack_vector(payload, tmp.msg, nof_bytes*8); tmp.N_bits = nof_bytes*8; - rlc_dc_field_t dc = (rlc_dc_field_t)srslte_bit_pack(&ptr, 1); + rlc_dc_field_t dc = static_cast(srslte_bit_pack(&ptr, 1)); if(RLC_DC_FIELD_CONTROL_PDU == dc) { @@ -1763,7 +2052,9 @@ int rlc_am_write_status_pdu(rlc_status_pdu_t *status, uint8_t *payload) uint32_t rlc_am_packed_length(rlc_amd_pdu_header_t *header) { uint32_t len = 2; // Fixed part is 2 bytes - if(header->rf) len += 2; // Segment header is 2 bytes + if (header->rf) { + len += 2; // Segment header is 2 bytes + } len += header->N_li * 1.5 + 0.5; // Extension part - integer rounding up return len; } @@ -1840,4 +2131,4 @@ bool rlc_am_not_start_aligned(const uint8_t fi) return (fi == RLC_FI_FIELD_NOT_START_ALIGNED || fi == RLC_FI_FIELD_NOT_START_OR_END_ALIGNED); } -} // namespace srsue +} // namespace srslte diff --git a/lib/src/upper/rlc_um.cc b/lib/src/upper/rlc_um.cc index 0c6c8dfec..5a13b55f6 100644 --- a/lib/src/upper/rlc_um.cc +++ b/lib/src/upper/rlc_um.cc @@ -28,8 +28,9 @@ #include "srslte/upper/rlc_um.h" #include #include +#include -#define RX_MOD_BASE(x) (x-vr_uh-cfg.rx_window_size)%cfg.rx_mod +#define RX_MOD_BASE(x) (((x)-vr_uh-cfg.rx_window_size)%cfg.rx_mod) namespace srslte { @@ -75,13 +76,13 @@ bool rlc_um::configure(srslte_rlc_config_t cnfg_) return false; } - log->warning("%s configured in %s mode: t_reordering=%d ms, rx_sn_field_length=%u bits, tx_sn_field_length=%u bits\n", - rb_name.c_str(), rlc_mode_text[cnfg_.rlc_mode], - cfg.t_reordering, rlc_umd_sn_size_num[cfg.rx_sn_field_length], rlc_umd_sn_size_num[cfg.rx_sn_field_length]); - // store config cfg = cnfg_.um; + log->warning("%s configured in %s mode: ft_reordering=%d ms, rx_sn_field_length=%u bits, tx_sn_field_length=%u bits\n", + rb_name.c_str(), rlc_mode_text[cnfg_.rlc_mode], + cfg.t_reordering, rlc_umd_sn_size_num[cfg.rx_sn_field_length], rlc_umd_sn_size_num[cfg.rx_sn_field_length]); + return true; } @@ -95,6 +96,11 @@ bool rlc_um::rlc_um_rx::configure(srslte_rlc_config_t cnfg_, std::string rb_name return false; } + // set reordering timer + if (reordering_timer != NULL) { + reordering_timer->set(this, cfg.t_reordering); + } + rb_name = rb_name_; rx_enabled = true; @@ -149,7 +155,6 @@ void rlc_um::write_sdu(byte_buffer_t *sdu, bool blocking) } else { tx.try_write_sdu(sdu); } - } /**************************************************************************** @@ -199,7 +204,7 @@ void rlc_um::reset_metrics() std::string rlc_um::get_rb_name(srsue::rrc_interface_rlc *rrc, uint32_t lcid, bool is_mrb) { - if(is_mrb) { + if (is_mrb) { std::stringstream ss; ss << "MRB" << lcid; return ss.str(); @@ -465,12 +470,11 @@ int rlc_um::rlc_um_tx::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) vt_us = (vt_us + 1)%cfg.tx_mod; // Add header and TX - log->debug("%s packing PDU with length %d\n", get_rb_name(), pdu->N_bytes); rlc_um_write_data_pdu_header(&header, pdu); memcpy(payload, pdu->msg, pdu->N_bytes); uint32_t ret = pdu->N_bytes; - log->debug("%s returning length %d\n", get_rb_name(), pdu->N_bytes); + log->info("%s Transmitting PDU SN=%d (%d B)\n", get_rb_name(), header.sn, pdu->N_bytes); pool->deallocate(pdu); debug_state(); @@ -546,9 +550,7 @@ void rlc_um::rlc_um_rx::reestablish() void rlc_um::rlc_um_rx::stop() { pthread_mutex_lock(&mutex); - if(reordering_timer) { - reordering_timer->stop(); - } + vr_ur = 0; vr_ux = 0; vr_uh = 0; @@ -560,7 +562,8 @@ void rlc_um::rlc_um_rx::stop() rx_sdu = NULL; } - if (mac_timers && reordering_timer) { + if (mac_timers != NULL && reordering_timer != NULL) { + reordering_timer->stop(); mac_timers->timer_release_id(reordering_timer_id); reordering_timer = NULL; } @@ -643,7 +646,7 @@ void rlc_um::rlc_um_rx::handle_data_pdu(uint8_t *payload, uint32_t nof_bytes) } if(!reordering_timer->is_running()) { if(RX_MOD_BASE(vr_uh) > RX_MOD_BASE(vr_ur)) { - reordering_timer->set(this, cfg.t_reordering); + reordering_timer->reset(); reordering_timer->run(); vr_ux = vr_uh; } @@ -715,7 +718,7 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus() // Handle last segment if (rx_sdu->N_bytes > 0 || rlc_um_start_aligned(rx_window[vr_ur].header.fi)) { - log->debug("Writing last segment in SDU buffer. Lower edge vr_ur=%d, Buffer size=%d, segment size=%d\n", + log->info("Writing last segment in SDU buffer. Lower edge vr_ur=%d, Buffer size=%d, segment size=%d\n", vr_ur, rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes); memcpy(&rx_sdu->msg[rx_sdu->N_bytes], rx_window[vr_ur].buf->msg, rx_window[vr_ur].buf->N_bytes); @@ -754,18 +757,36 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus() // Now update vr_ur until we reach an SN we haven't yet received while(rx_window.end() != rx_window.find(vr_ur)) { + log->debug("Reassemble loop for vr_ur=%d\n", vr_ur); + if ((vr_ur_in_rx_sdu+1)%cfg.rx_mod != vr_ur) { + log->warning("PDU SN=%d lost, dropping remainder of %d\n", vr_ur_in_rx_sdu+1, vr_ur); + rx_sdu->reset(); + } + // Handle any SDU segments for(uint32_t i=0; iN_bytes == 0 && i == 0 && !rlc_um_start_aligned(rx_window[vr_ur].header.fi)) { - log->warning("Dropping PDU %d due to lost start segment\n", vr_ur); + log->warning_hex(rx_window[vr_ur].buf->msg, len, "Dropping first part of SN %d due to lost start segment\n", vr_ur); + // Advance data pointers and continue with next segment rx_window[vr_ur].buf->msg += len; rx_window[vr_ur].buf->N_bytes -= len; rx_sdu->reset(); - break; + + // beginning of next SDU? + if (rx_window[vr_ur].header.fi == RLC_FI_FIELD_NOT_START_OR_END_ALIGNED) { + len = rx_window[vr_ur].buf->N_bytes; + log->info_hex(rx_window[vr_ur].buf->msg, len, "Copying first %d bytes of new SDU\n", len); + memcpy(rx_sdu->msg, rx_window[vr_ur].buf->msg, len); + rx_sdu->N_bytes = len; + rx_window[vr_ur].buf->msg += len; + rx_window[vr_ur].buf->N_bytes -= len; + log->info("Updating vr_ur_in_rx_sdu. old=%d, new=%d\n", vr_ur_in_rx_sdu, vr_ur); + vr_ur_in_rx_sdu = vr_ur; + goto clean_up_rx_window; + } } // Check available space in SDU @@ -775,7 +796,7 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus() goto clean_up_rx_window; } - log->debug("Concatenating %d bytes in to current length %d. rx_window remaining bytes=%d, vr_ur_in_rx_sdu=%d, vr_ur=%d, rx_mod=%d, last_mod=%d\n", + log->info_hex(rx_window[vr_ur].buf->msg, len, "Concatenating %d bytes in to current length %d. rx_window remaining bytes=%d, vr_ur_in_rx_sdu=%d, vr_ur=%d, rx_mod=%d, last_mod=%d\n", len, rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes, vr_ur_in_rx_sdu, vr_ur, cfg.rx_mod, (vr_ur_in_rx_sdu+1)%cfg.rx_mod); memcpy(&rx_sdu->msg[rx_sdu->N_bytes], rx_window[vr_ur].buf->msg, len); rx_sdu->N_bytes += len; @@ -812,11 +833,10 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus() rx_window[vr_ur].buf->N_bytes < SRSLTE_MAX_BUFFER_SIZE_BYTES && rx_window[vr_ur].buf->N_bytes + rx_sdu->N_bytes < SRSLTE_MAX_BUFFER_SIZE_BYTES) { - + log->info_hex(rx_window[vr_ur].buf->msg, rx_window[vr_ur].buf->N_bytes, "Writing last segment in SDU buffer. Updating vr_ur=%d, Buffer size=%d, segment size=%d\n", + vr_ur, rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes); memcpy(&rx_sdu->msg[rx_sdu->N_bytes], rx_window[vr_ur].buf->msg, rx_window[vr_ur].buf->N_bytes); rx_sdu->N_bytes += rx_window[vr_ur].buf->N_bytes; - log->debug("Writing last segment in SDU buffer. Updating vr_ur=%d, Buffer size=%d, segment size=%d\n", - vr_ur, rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes); } else { log->error("Out of bounds while reassembling SDU buffer in UM: sdu_len=%d, window_buffer_len=%d, vr_ur=%d\n", rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes, vr_ur); @@ -843,8 +863,7 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus() pdu_lost = false; } - clean_up_rx_window: - +clean_up_rx_window: // Clean up rx_window pool->deallocate(rx_window[vr_ur].buf); rx_window.erase(vr_ur); @@ -854,16 +873,15 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus() } // Only called when lock is hold +// 36.322 Section 5.1.2.2.1 bool rlc_um::rlc_um_rx::inside_reordering_window(uint16_t sn) { - if(cfg.rx_window_size == 0) { + if (cfg.rx_window_size == 0 || rx_window.empty()) { return true; } - if(RX_MOD_BASE(sn) >= RX_MOD_BASE(vr_uh-cfg.rx_window_size) && - RX_MOD_BASE(sn) < RX_MOD_BASE(vr_uh)) - { + if (RX_MOD_BASE(vr_uh-cfg.rx_window_size) <= RX_MOD_BASE(sn) && RX_MOD_BASE(sn) < RX_MOD_BASE(vr_uh)) { return true; - }else{ + } else { return false; } } @@ -889,8 +907,7 @@ void rlc_um::rlc_um_rx::reset_metrics() void rlc_um::rlc_um_rx::timer_expired(uint32_t timeout_id) { - if(reordering_timer_id == timeout_id) - { + if (reordering_timer_id == timeout_id) { pthread_mutex_lock(&mutex); // 36.322 v10 Section 5.1.2.2.4 @@ -898,19 +915,19 @@ void rlc_um::rlc_um_rx::timer_expired(uint32_t timeout_id) get_rb_name()); log->warning("Lost PDU SN: %d\n", vr_ur); + pdu_lost = true; rx_sdu->reset(); - while(RX_MOD_BASE(vr_ur) < RX_MOD_BASE(vr_ux)) - { + + while(RX_MOD_BASE(vr_ur) < RX_MOD_BASE(vr_ux)) { vr_ur = (vr_ur + 1)%cfg.rx_mod; log->debug("Entering Reassemble from timeout id=%d\n", timeout_id); reassemble_rx_sdus(); log->debug("Finished reassemble from timeout id=%d\n", timeout_id); } reordering_timer->stop(); - if(RX_MOD_BASE(vr_uh) > RX_MOD_BASE(vr_ur)) - { - reordering_timer->set(this, cfg.t_reordering); + if (RX_MOD_BASE(vr_uh) > RX_MOD_BASE(vr_ur)) { + reordering_timer->reset(); reordering_timer->run(); vr_ux = vr_uh; } diff --git a/lib/test/phy/phy_dl_test.c b/lib/test/phy/phy_dl_test.c index 50ea25a1e..6ed22c5d9 100644 --- a/lib/test/phy/phy_dl_test.c +++ b/lib/test/phy/phy_dl_test.c @@ -190,8 +190,8 @@ uint32_t prbset_to_bitmask() { int main(int argc, char **argv) { struct timeval t[3] = {}; size_t tx_nof_bits = 0, rx_nof_bits = 0; - srslte_enb_dl_t enb_dl = {}; - srslte_ue_dl_t ue_dl = {}; + srslte_enb_dl_t enb_dl; + srslte_ue_dl_t ue_dl; srslte_softbuffer_tx_t *softbuffer_tx[SRSLTE_MAX_TB] = {}; srslte_softbuffer_rx_t *softbuffer_rx[SRSLTE_MAX_TB] = {}; uint8_t *data_tx[SRSLTE_MAX_TB] = {}; @@ -206,6 +206,9 @@ int main(int argc, char **argv) { cf_t *signal_buffer[SRSLTE_MAX_PORTS] = {NULL}; + bzero(&enb_dl, sizeof(enb_dl)); + bzero(&ue_dl, sizeof(ue_dl)); + /* * Allocate Memory */ @@ -304,9 +307,12 @@ int main(int argc, char **argv) { /* * Run eNodeB */ - srslte_ra_dl_dci_t dci = {}; + srslte_ra_dl_dci_t dci; srslte_dci_format_t dci_format = SRSLTE_DCI_FORMAT1A; - srslte_ra_dl_grant_t grant = {}; + srslte_ra_dl_grant_t grant; + + bzero(&dci, sizeof(dci)); + bzero(&grant, sizeof(grant)); prbset_num = (int) ceilf((float) cell.nof_prb / srslte_ra_type0_P(cell.nof_prb)); last_prbset_num = prbset_num; diff --git a/lib/test/upper/CMakeLists.txt b/lib/test/upper/CMakeLists.txt index e3fcd4f4b..66982463f 100644 --- a/lib/test/upper/CMakeLists.txt +++ b/lib/test/upper/CMakeLists.txt @@ -36,6 +36,11 @@ add_test(rlc_am_stress_test rlc_stress_test --mode=AM --loglevel 1 --sdu_gen_del add_test(rlc_um_stress_test rlc_stress_test --mode=UM --loglevel 1) add_test(rlc_tm_stress_test rlc_stress_test --mode=TM --loglevel 1 --opp_sdu_ratio=1.0) +# Run clang-tidy if available +if(CLANG_TIDY_BIN) + set_target_properties(rlc_stress_test PROPERTIES CXX_CLANG_TIDY "${DO_CLANG_TIDY}") +endif() + add_executable(rlc_um_data_test rlc_um_data_test.cc) target_link_libraries(rlc_um_data_test srslte_upper srslte_phy srslte_common) add_test(rlc_um_data_test rlc_um_data_test) diff --git a/lib/test/upper/rlc_am_test.cc b/lib/test/upper/rlc_am_test.cc index 00793e7d3..38d2e4641 100644 --- a/lib/test/upper/rlc_am_test.cc +++ b/lib/test/upper/rlc_am_test.cc @@ -42,15 +42,21 @@ class mac_dummy_timers :public srslte::mac_interface_timers { public: - srslte::timers::timer* timer_get(uint32_t timer_id) - { - return &t; + mac_dummy_timers() : timers(8) {} + srslte::timers::timer* timer_get(uint32_t timer_id) { + return timers.get(timer_id); + } + void timer_release_id(uint32_t timer_id) { + timers.release_id(timer_id); + } + uint32_t timer_get_unique_id() { + return timers.get_unique_id(); + } + void step_all() { + timers.step_all(); } - uint32_t timer_get_unique_id(){return 0;} - void timer_release_id(uint32_t id){} - private: - srslte::timers::timer t; + srslte::timers timers; }; class rlc_am_tester @@ -135,7 +141,7 @@ private: bool running; }; -void basic_test() +bool basic_test() { srslte::log_filter log1("RLC_AM_1"); srslte::log_filter log2("RLC_AM_2"); @@ -166,8 +172,13 @@ void basic_test() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 5 SDUs into RLC1 byte_buffer_t sdu_bufs[NBUFS]; @@ -215,11 +226,17 @@ void basic_test() } // Check statistics - assert(rlc1.get_num_tx_bytes() == rlc2.get_num_rx_bytes()); - assert(rlc2.get_num_tx_bytes() == rlc1.get_num_rx_bytes()); + if (rlc1.get_num_tx_bytes() != rlc2.get_num_rx_bytes()) { + return -1; + } + if (rlc2.get_num_tx_bytes() != rlc1.get_num_rx_bytes()) { + return -1; + } + + return 0; } -void concat_test() +bool concat_test() { srslte::log_filter log1("RLC_AM_1"); srslte::log_filter log2("RLC_AM_2"); @@ -250,8 +267,13 @@ void concat_test() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 5 SDUs into RLC1 byte_buffer_t sdu_bufs[NBUFS]; @@ -284,11 +306,17 @@ void concat_test() } // check statistics - assert(rlc1.get_num_tx_bytes() == rlc2.get_num_rx_bytes()); - assert(rlc2.get_num_tx_bytes() == rlc1.get_num_rx_bytes()); + if (rlc1.get_num_tx_bytes() != rlc2.get_num_rx_bytes()) { + return -1; + } + if (rlc2.get_num_tx_bytes() != rlc1.get_num_rx_bytes()) { + return -1; + } + + return 0; } -void segment_test() +bool segment_test() { srslte::log_filter log1("RLC_AM_1"); srslte::log_filter log2("RLC_AM_2"); @@ -319,8 +347,13 @@ void segment_test() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 5 SDUs into RLC1 byte_buffer_t sdu_bufs[NBUFS]; @@ -370,11 +403,17 @@ void segment_test() assert(tester.sdus[i]->msg[j] == j); } - assert(rlc1.get_num_tx_bytes() == rlc2.get_num_rx_bytes()); - assert(rlc2.get_num_tx_bytes() == rlc1.get_num_rx_bytes()); + if (rlc1.get_num_tx_bytes() != rlc2.get_num_rx_bytes()) { + return -1; + } + if (rlc2.get_num_tx_bytes() != rlc1.get_num_rx_bytes()) { + return -1; + } + + return 0; } -void retx_test() +bool retx_test() { srslte::log_filter log1("RLC_AM_1"); srslte::log_filter log2("RLC_AM_2"); @@ -405,8 +444,13 @@ void retx_test() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 5 SDUs into RLC1 byte_buffer_t sdu_bufs[NBUFS]; @@ -436,8 +480,11 @@ void retx_test() rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes); } - // Sleep to let reordering timeout expire - usleep(10000); + // Step timers until reordering timeout expires + int cnt = 5; + while (cnt--) { + timers.step_all(); + } assert(4 == rlc2.get_buffer_state()); @@ -462,12 +509,14 @@ void retx_test() assert(tester.n_sdus == 5); for(int i=0; iN_bytes == 1); - assert(*(tester.sdus[i]->msg) == i); + if (tester.sdus[i]->N_bytes != 1) return -1; + if (*(tester.sdus[i]->msg) != i) return -1; } + + return 0; } -void resegment_test_1() +bool resegment_test_1() { // SDUs: | 10 | 10 | 10 | 10 | 10 | // PDUs: | 10 | 10 | 10 | 10 | 10 | @@ -502,8 +551,13 @@ void resegment_test_1() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 5 SDUs into RLC1 byte_buffer_t sdu_bufs[NBUFS]; @@ -534,8 +588,11 @@ void resegment_test_1() rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes); } - // Sleep to let reordering timeout expire - usleep(10000); + // Step timers until reordering timeout expires + int cnt = 5; + while (cnt--) { + timers.step_all(); + } assert(4 == rlc2.get_buffer_state()); @@ -570,13 +627,15 @@ void resegment_test_1() assert(tester.n_sdus == 5); for(int i=0; iN_bytes == 10); + if (tester.sdus[i]->N_bytes != 10) return -1; for(int j=0;j<10;j++) - assert(tester.sdus[i]->msg[j] == j); + if (tester.sdus[i]->msg[j] != j) return -1; } + + return 0; } -void resegment_test_2() +bool resegment_test_2() { // SDUs: | 10 | 10 | 10 | 10 | 10 | @@ -612,8 +671,13 @@ void resegment_test_2() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 5 SDUs into RLC1 byte_buffer_t sdu_bufs[NBUFS]; @@ -644,8 +708,11 @@ void resegment_test_2() rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes); } - // Sleep to let reordering timeout expire - usleep(10000); + // Step timers until reordering timeout expires + int cnt = 5; + while (cnt--) { + timers.step_all(); + } assert(4 == rlc2.get_buffer_state()); @@ -677,13 +744,15 @@ void resegment_test_2() assert(tester.n_sdus == 5); for(int i=0; iN_bytes == 10); + if (tester.sdus[i]->N_bytes != 10) return -1; for(int j=0;j<10;j++) - assert(tester.sdus[i]->msg[j] == j); + if (tester.sdus[i]->msg[j] != j) return -1; } + + return 0; } -void resegment_test_3() +bool resegment_test_3() { // SDUs: | 10 | 10 | 10 | 10 | 10 | @@ -719,8 +788,13 @@ void resegment_test_3() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 5 SDUs into RLC1 byte_buffer_t sdu_bufs[NBUFS]; @@ -751,8 +825,11 @@ void resegment_test_3() rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes); } - // Sleep to let reordering timeout expire - usleep(10000); + // Step timers until reordering timeout expires + int cnt = 5; + while (cnt--) { + timers.step_all(); + } assert(4 == rlc2.get_buffer_state()); @@ -780,15 +857,16 @@ void resegment_test_3() assert(tester.n_sdus == 5); for(int i=0; iN_bytes == 10); + if (tester.sdus[i]->N_bytes != 10) return -1; for(int j=0;j<10;j++) - assert(tester.sdus[i]->msg[j] == j); + if (tester.sdus[i]->msg[j] != j) return -1; } + + return 0; } -void resegment_test_4() +bool resegment_test_4() { - // SDUs: | 10 | 10 | 10 | 10 | 10 | // PDUs: | 5 | 5| 30 | 5 | 5| // Retx PDU segments: | 15 | 15 | @@ -822,8 +900,13 @@ void resegment_test_4() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 5 SDUs into RLC1 byte_buffer_t sdu_bufs[NBUFS]; @@ -854,8 +937,11 @@ void resegment_test_4() rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes); } - // Sleep to let reordering timeout expire - usleep(10000); + // Step timers until reordering timeout expires + int cnt = 5; + while (cnt--) { + timers.step_all(); + } assert(4 == rlc2.get_buffer_state()); @@ -883,15 +969,16 @@ void resegment_test_4() assert(tester.n_sdus == 5); for(int i=0; iN_bytes == 10); + if (tester.sdus[i]->N_bytes != 10) return -1; for(int j=0;j<10;j++) - assert(tester.sdus[i]->msg[j] == j); + if (tester.sdus[i]->msg[j] != j) return -1; } + + return 0; } -void resegment_test_5() +bool resegment_test_5() { - // SDUs: | 10 | 10 | 10 | 10 | 10 | // PDUs: |2|3| 40 |3|2| // Retx PDU segments: | 20 | 20 | @@ -908,8 +995,6 @@ void resegment_test_5() rlc_am rlc1; rlc_am rlc2; - int len; - log1.set_level(srslte::LOG_LEVEL_DEBUG); log2.set_level(srslte::LOG_LEVEL_DEBUG); @@ -925,8 +1010,13 @@ void resegment_test_5() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 5 SDUs into RLC1 byte_buffer_t sdu_bufs[NBUFS]; @@ -957,8 +1047,11 @@ void resegment_test_5() rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes); } - // Sleep to let reordering timeout expire - usleep(10000); + // Step timers until reordering timeout expires + int cnt = 5; + while (cnt--) { + timers.step_all(); + } assert(4 == rlc2.get_buffer_state()); @@ -986,13 +1079,15 @@ void resegment_test_5() assert(tester.n_sdus == 5); for(int i=0; iN_bytes == 10); + if (tester.sdus[i]->N_bytes != 10) return -1; for(int j=0;j<10;j++) - assert(tester.sdus[i]->msg[j] == j); + if (tester.sdus[i]->msg[j] != j) return -1; } + + return 0; } -void resegment_test_6() +bool resegment_test_6() { // SDUs: |10|10|10| 54 | 54 | 54 | 54 | 54 | 54 | // PDUs: |10|10|10| 270 | 54 | @@ -1027,8 +1122,13 @@ void resegment_test_6() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push SDUs into RLC1 byte_buffer_t sdu_bufs[9]; @@ -1069,8 +1169,11 @@ void resegment_test_6() rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes); } - // Sleep to let reordering timeout expire - usleep(10000); + // Step timers until reordering timeout expires + int cnt = 5; + while (cnt--) { + timers.step_all(); + } assert(4 == rlc2.get_buffer_state()); @@ -1111,14 +1214,17 @@ void resegment_test_6() } for(int i=3;i<9;i++) { - assert(tester.sdus[i]->N_bytes == 54); - for(int j=0;j<54;j++) - assert(tester.sdus[i]->msg[j] == j); + if(tester.sdus[i]->N_bytes != 54) return -1; + for(int j=0;j<54;j++) { + if (tester.sdus[i]->msg[j] != j) return -1; + } } + + return 0; } // Retransmission of PDU segments of the same size -void resegment_test_7() +bool resegment_test_7() { // SDUs: | 30 | 30 | // PDUs: | 13 | 13 | 11 | 13 | 10 | @@ -1165,8 +1271,13 @@ void resegment_test_7() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 2 SDUs into RLC1 byte_buffer_t sdu_bufs[N_SDU_BUFS]; @@ -1201,8 +1312,11 @@ void resegment_test_7() } } - // Sleep to let reordering timeout expire - usleep(10000); + // Step timers until reordering timeout expires + int cnt = 5; + while (cnt--) { + timers.step_all(); + } assert(12 == rlc1.get_buffer_state()); @@ -1222,8 +1336,6 @@ void resegment_test_7() } } - usleep(10000); - // Read status PDU from RLC2 assert(rlc2.get_buffer_state()); byte_buffer_t status_buf; @@ -1237,7 +1349,6 @@ void resegment_test_7() assert(15 == rlc1.get_buffer_state()); - // second round of retx, forcing resegmentation byte_buffer_t retx2[4]; for (uint32_t i = 0; i < 4; i++) { @@ -1253,26 +1364,47 @@ void resegment_test_7() // check buffer states assert(0 == rlc1.get_buffer_state()); + + // Step timers until poll_retx timeout expires + cnt = 5; + while (cnt--) { + timers.step_all(); + } + + // Read status PDU from RLC2 + assert(rlc2.get_buffer_state()); + status_buf.N_bytes = rlc2.read_pdu(status_buf.msg, 10); // 10 bytes is enough to hold the status + + // Write status PDU to RLC1 + rlc1.write_pdu(status_buf.msg, status_buf.N_bytes); +#if HAVE_PCAP + pcap.write_ul_am_ccch(status_buf.msg, status_buf.N_bytes); +#endif + + // check status again + assert(0 == rlc1.get_buffer_state()); assert(0 == rlc2.get_buffer_state()); // Check number of SDUs and their content assert(tester.n_sdus == N_SDU_BUFS); for(int i=0; iN_bytes == sdu_size); + if (tester.sdus[i]->N_bytes != sdu_size) return -1; for(uint32_t j=0;jmsg[j] == i); + if (tester.sdus[i]->msg[j] != i) return -1; } } #if HAVE_PCAP pcap.close(); #endif + + return 0; } // Retransmission of PDU segments with different size -void resegment_test_8() +bool resegment_test_8() { // SDUs: | 30 | 30 | // PDUs: | 15 | 15 | 15 | 15 | 15 | @@ -1318,8 +1450,13 @@ void resegment_test_8() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); - rlc2.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } + + if (not rlc2.configure(&cnfg)) { + return -1; + } // Push 2 SDUs into RLC1 byte_buffer_t sdu_bufs[N_SDU_BUFS]; @@ -1354,8 +1491,11 @@ void resegment_test_8() } } - // Sleep to let reordering timeout expire - usleep(10000); + // Step timers until reordering timeout expires + int cnt = 5; + while (cnt--) { + timers.step_all(); + } assert(12 == rlc1.get_buffer_state()); @@ -1375,7 +1515,11 @@ void resegment_test_8() } } - usleep(20000); + // Step timers until reordering timeout expires + cnt = 7; + while (cnt--) { + timers.step_all(); + } // Read status PDU from RLC2 assert(rlc2.get_buffer_state()); @@ -1402,27 +1546,41 @@ void resegment_test_8() #endif } + // get BSR from RLC2 + status_buf.N_bytes = rlc2.read_pdu(status_buf.msg, 10); // 10 bytes is enough to hold the status + + // Write status PDU to RLC1 + rlc1.write_pdu(status_buf.msg, status_buf.N_bytes); +#if HAVE_PCAP + pcap.write_ul_am_ccch(status_buf.msg, status_buf.N_bytes); +#endif + // check buffer states - assert(0 == rlc1.get_buffer_state()); - assert(0 == rlc2.get_buffer_state()); + if (rlc1.get_buffer_state() != 0) { + return -1; + }; + if (rlc2.get_buffer_state() != 0) { + return -1; + }; // Check number of SDUs and their content assert(tester.n_sdus == N_SDU_BUFS); - for(int i=0; iN_bytes == sdu_size); + for(int i=0; iN_bytes != sdu_size) return -1; for(uint32_t j=0;jmsg[j] == i); + if (tester.sdus[i]->msg[j] != i) return -1; } } #if HAVE_PCAP pcap.close(); #endif + + return 0; } -void reset_test() +bool reset_test() { srslte::log_filter log1("RLC_AM_1"); srslte::log_filter log2("RLC_AM_2"); @@ -1449,7 +1607,9 @@ void reset_test() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } // Push 1 SDU of size 10 into RLC1 byte_buffer_t sdu_buf; @@ -1473,10 +1633,14 @@ void reset_test() len = rlc1.read_pdu(pdu_bufs.msg, 100); pdu_bufs.N_bytes = len; - assert(0 == rlc1.get_buffer_state()); + if (0 != rlc1.get_buffer_state()) { + return -1; + } + + return 0; } -void stop_test() +bool stop_test() { srslte::log_filter log1("RLC_AM_1"); log1.set_level(srslte::LOG_LEVEL_DEBUG); @@ -1499,7 +1663,9 @@ void stop_test() cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4; cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5; - rlc1.configure(&cnfg); + if (not rlc1.configure(&cnfg)) { + return -1; + } // start thread reading ul_writer writer(&rlc1); @@ -1510,48 +1676,93 @@ void stop_test() // stop RLC1 rlc1.stop(); + + return 0; } -int main(int argc, char **argv) { - basic_test(); +int main(int argc, char **argv) +{ + if (basic_test()) { + printf("basic_test failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - concat_test(); + if (concat_test()) { + printf("concat_test failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - segment_test(); + if (segment_test()) { + printf("segment_test failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - retx_test(); + if (retx_test()) { + printf("retx_test failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - resegment_test_1(); + if (resegment_test_1()) { + printf("resegment_test_1 failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - resegment_test_2(); + if (resegment_test_2()) { + printf("resegment_test_2 failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - resegment_test_3(); + if (resegment_test_3()) { + printf("resegment_test_3 failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - resegment_test_4(); + if (resegment_test_4()) { + printf("resegment_test_4 failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - resegment_test_5(); + if (resegment_test_5()) { + printf("resegment_test_5 failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - resegment_test_6(); + if (resegment_test_6()) { + printf("resegment_test_6 failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - resegment_test_7(); + if (resegment_test_7()) { + printf("resegment_test_7 failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - resegment_test_8(); + if (resegment_test_8()) { + printf("resegment_test_8 failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - reset_test(); + if (reset_test()) { + printf("reset_test failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); - - stop_test(); + + if (stop_test()) { + printf("stop_test failed\n"); + exit(-1); + }; byte_buffer_pool::get_instance()->cleanup(); } diff --git a/lib/test/upper/rlc_stress_test.cc b/lib/test/upper/rlc_stress_test.cc index ec02a699a..631c2015f 100644 --- a/lib/test/upper/rlc_stress_test.cc +++ b/lib/test/upper/rlc_stress_test.cc @@ -25,7 +25,7 @@ */ #include -#include +#include #include #include "srslte/common/log_filter.h" #include "srslte/common/logger_stdout.h" @@ -34,10 +34,10 @@ #include "srslte/upper/rlc.h" #include #include -#include +#include #include -#define SDU_SIZE 1500 +#define LOG_HEX_LIMIT (-1) using namespace std; using namespace srsue; @@ -46,6 +46,7 @@ namespace bpo = boost::program_options; typedef struct { std::string mode; + uint32_t sdu_size; uint32_t test_duration_sec; float error_rate; uint32_t sdu_gen_delay_usec; @@ -55,6 +56,8 @@ typedef struct { bool single_tx; bool write_pcap; float opp_sdu_ratio; + bool zero_seed; + bool pedantic; } stress_test_args_t; void parse_args(stress_test_args_t *args, int argc, char *argv[]) { @@ -71,6 +74,7 @@ void parse_args(stress_test_args_t *args, int argc, char *argv[]) { common.add_options() ("mode", bpo::value(&args->mode)->default_value("AM"), "Whether to test RLC acknowledged or unacknowledged mode (AM/UM)") ("duration", bpo::value(&args->test_duration_sec)->default_value(5), "Duration (sec)") + ("sdu_size", bpo::value(&args->sdu_size)->default_value(1500), "Size of SDUs") ("sdu_gen_delay", bpo::value(&args->sdu_gen_delay_usec)->default_value(0), "SDU generation delay (usec)") ("pdu_tx_delay", bpo::value(&args->pdu_tx_delay_usec)->default_value(0), "Delay in MAC for transfering PDU from tx'ing RLC to rx'ing RLC (usec)") ("error_rate", bpo::value(&args->error_rate)->default_value(0.1), "Rate at which RLC PDUs are dropped") @@ -78,7 +82,9 @@ void parse_args(stress_test_args_t *args, int argc, char *argv[]) { ("reestablish", bpo::value(&args->reestablish)->default_value(false), "Mimic RLC reestablish during execution") ("loglevel", bpo::value(&args->log_level)->default_value(srslte::LOG_LEVEL_DEBUG), "Log level (1=Error,2=Warning,3=Info,4=Debug)") ("singletx", bpo::value(&args->single_tx)->default_value(false), "If set to true, only one node is generating data") - ("pcap", bpo::value(&args->write_pcap)->default_value(false), "Whether to write all RLC PDU to PCAP file"); + ("pcap", bpo::value(&args->write_pcap)->default_value(false), "Whether to write all RLC PDU to PCAP file") + ("zeroseed", bpo::value(&args->zero_seed)->default_value(false), "Whether to initialize random seed to zero") + ("pedantic", bpo::value(&args->pedantic)->default_value(true), "Whether to perform strict SDU size checking at receiver"); // these options are allowed on the command line bpo::options_description cmdline_options; @@ -90,7 +96,7 @@ void parse_args(stress_test_args_t *args, int argc, char *argv[]) { bpo::notify(vm); // help option was given - print usage and exit - if (vm.count("help")) { + if (vm.count("help") > 0) { cout << "Usage: " << argv[0] << " [OPTIONS] config_file" << endl << endl; cout << common << endl << general << endl; exit(0); @@ -102,21 +108,23 @@ void parse_args(stress_test_args_t *args, int argc, char *argv[]) { } } -class mac_reader - :public thread +class mac_dummy + :public srslte::mac_interface_timers + ,public thread { public: - mac_reader(rlc_interface_mac *rlc1_, rlc_interface_mac *rlc2_, float fail_rate_, float opp_sdu_ratio_, uint32_t pdu_tx_delay_usec_, rlc_pcap *pcap_, uint32_t lcid_, bool is_dl_ = true) + mac_dummy(rlc_interface_mac *rlc1_, rlc_interface_mac *rlc2_, stress_test_args_t args_, uint32_t lcid_, rlc_pcap* pcap_ = NULL) + :timers(8) + ,run_enable(true) + ,rlc1(rlc1_) + ,rlc2(rlc2_) + ,args(args_) + ,pcap(pcap_) + ,lcid(lcid_) + ,log("MAC ") { - rlc1 = rlc1_; - rlc2 = rlc2_; - fail_rate = fail_rate_; - opp_sdu_ratio = opp_sdu_ratio_; - run_enable = true; - pdu_tx_delay_usec = pdu_tx_delay_usec_; - pcap = pcap_; - is_dl = is_dl_; - lcid = lcid_; + log.set_level(static_cast(args.log_level)); + log.set_hex_limit(LOG_HEX_LIMIT); } void stop() @@ -125,99 +133,94 @@ public: wait_thread_finish(); } + srslte::timers::timer* timer_get(uint32_t timer_id) + { + return timers.get(timer_id); + } + uint32_t timer_get_unique_id() { + return timers.get_unique_id(); + } + void timer_release_id(uint32_t timer_id) { + timers.release_id(timer_id); + } + void step_timer() { + timers.step_all(); + } + private: - void run_thread() + void run_tti(rlc_interface_mac *tx_rlc, rlc_interface_mac *rx_rlc, bool is_dl) { - byte_buffer_t *pdu = byte_buffer_pool::get_instance()->allocate("mac_reader::run_thread"); + byte_buffer_t *pdu = byte_buffer_pool::get_instance()->allocate(__PRETTY_FUNCTION__); if (!pdu) { printf("Fatal Error: Could not allocate PDU in mac_reader::run_thread\n"); exit(-1); } - while(run_enable) { - // generate MAC opportunities of random size or with fixed ratio - float r = opp_sdu_ratio ? opp_sdu_ratio : (float)rand()/RAND_MAX; - int opp_size = r*SDU_SIZE; - uint32_t buf_state = rlc1->get_buffer_state(lcid); - if (buf_state) { - int read = rlc1->read_pdu(lcid, pdu->msg, opp_size); - if (pdu_tx_delay_usec) usleep(pdu_tx_delay_usec); - if(((float)rand()/RAND_MAX > fail_rate) && read>0) { - pdu->N_bytes = read; - rlc2->write_pdu(lcid, pdu->msg, pdu->N_bytes); - if (is_dl) { - pcap->write_dl_am_ccch(pdu->msg, pdu->N_bytes); - } else { - pcap->write_ul_am_ccch(pdu->msg, pdu->N_bytes); - } + float r = args.opp_sdu_ratio ? args.opp_sdu_ratio : static_cast(rand())/RAND_MAX; + int opp_size = r*args.sdu_size; + uint32_t buf_state = tx_rlc->get_buffer_state(lcid); + if (buf_state > 0) { + int read = tx_rlc->read_pdu(lcid, pdu->msg, opp_size); + pdu->N_bytes = read; + if (args.pdu_tx_delay_usec > 0) { + usleep(args.pdu_tx_delay_usec); + } + if(((float)rand()/RAND_MAX > args.error_rate) && read>0) { + rx_rlc->write_pdu(lcid, pdu->msg, pdu->N_bytes); + if (is_dl) { + pcap->write_dl_am_ccch(pdu->msg, pdu->N_bytes); + } else { + pcap->write_ul_am_ccch(pdu->msg, pdu->N_bytes); } + } else { + log.warning_hex(pdu->msg, pdu->N_bytes, "Dropping RLC PDU (%d B)\n", pdu->N_bytes); } } byte_buffer_pool::get_instance()->deallocate(pdu); } - rlc_interface_mac *rlc1; - rlc_interface_mac *rlc2; - float fail_rate; - float opp_sdu_ratio; - uint32_t pdu_tx_delay_usec; - rlc_pcap *pcap; - uint32_t lcid; - bool is_dl; - bool run_enable; -}; - -class mac_dummy - :public srslte::mac_interface_timers -{ -public: - mac_dummy(rlc_interface_mac *rlc1_, rlc_interface_mac *rlc2_, float fail_rate_, float opp_sdu_ratio_, int32_t pdu_tx_delay, uint32_t lcid, rlc_pcap* pcap = NULL) - :r1(rlc1_, rlc2_, fail_rate_, opp_sdu_ratio_, pdu_tx_delay, pcap, lcid, true) - ,r2(rlc2_, rlc1_, fail_rate_, opp_sdu_ratio_, pdu_tx_delay, pcap, lcid, false) + void run_thread() { - } + while (run_enable) { + // Downlink direction first (RLC1->RLC2) + run_tti(rlc1, rlc2, true); - void start() - { - r1.start(7); - r2.start(7); - } + // UL direction (RLC2->RLC1) + run_tti(rlc2, rlc1, false); - void stop() - { - r1.stop(); - r2.stop(); - } - - srslte::timers::timer* timer_get(uint32_t timer_id) - { - return &t; + // step timer + step_timer(); + } } - uint32_t timer_get_unique_id(){return 0;} - void timer_release_id(uint32_t id){} -private: - srslte::timers::timer t; - - mac_reader r1; - mac_reader r2; + rlc_interface_mac *rlc1; + rlc_interface_mac *rlc2; + srslte::timers timers; + bool run_enable; + stress_test_args_t args; + rlc_pcap *pcap; + uint32_t lcid; + srslte::log_filter log; }; - class rlc_tester :public pdcp_interface_rlc ,public rrc_interface_rlc ,public thread { public: - rlc_tester(rlc_interface_pdcp *rlc_, std::string name_, uint32_t sdu_gen_delay_usec_, uint32_t lcid_){ - rlc = rlc_; - run_enable = true; - rx_pdus = 0; - name = name_; - sdu_gen_delay_usec = sdu_gen_delay_usec_; - lcid = lcid_; + rlc_tester(rlc_interface_pdcp *rlc_, std::string name_, stress_test_args_t args_, uint32_t lcid_) + :log("Testr") + ,rlc(rlc_) + ,run_enable(true) + ,rx_pdus() + ,name(name_) + ,args(args_) + ,lcid(lcid_) + { + log.set_level(srslte::LOG_LEVEL_ERROR); + log.set_hex_limit(LOG_HEX_LIMIT); } void stop() @@ -230,9 +233,12 @@ public: void write_pdu(uint32_t rx_lcid, byte_buffer_t *sdu) { assert(rx_lcid == lcid); - if (sdu->N_bytes != SDU_SIZE) { - printf("Received PDU with size %d, expected %d. Exiting.\n", sdu->N_bytes, SDU_SIZE); - exit(-1); + if (sdu->N_bytes != args.sdu_size) { + log.error_hex(sdu->msg, sdu->N_bytes, "Received SDU with size %d, expected %d.\n", sdu->N_bytes, args.sdu_size); + // exit if in pedantic mode or SDU is not a multiple of the expected size + if (args.pedantic || sdu->N_bytes % args.sdu_size != 0) { + exit(-1); + } } byte_buffer_pool::get_instance()->deallocate(sdu); @@ -245,39 +251,41 @@ public: // RRC interface void max_retx_attempted(){} - std::string get_rb_name(uint32_t rx_lcid) { return std::string(""); } + std::string get_rb_name(uint32_t rx_lcid) { return std::string("DRB1"); } int get_nof_rx_pdus() { return rx_pdus; } private: - void run_thread() - { + void run_thread() { uint8_t sn = 0; while(run_enable) { byte_buffer_t *pdu = byte_buffer_pool::get_instance()->allocate("rlc_tester::run_thread"); - if (!pdu) { + if (pdu == NULL) { printf("Error: Could not allocate PDU in rlc_tester::run_thread\n\n\n"); // backoff for a bit usleep(1000); continue; } - for (uint32_t i = 0; i < SDU_SIZE; i++) { + for (uint32_t i = 0; i < args.sdu_size; i++) { pdu->msg[i] = sn; } sn++; - pdu->N_bytes = SDU_SIZE; + pdu->N_bytes = args.sdu_size; rlc->write_sdu(lcid, pdu); - if (sdu_gen_delay_usec) usleep(sdu_gen_delay_usec); + if (args.sdu_gen_delay_usec > 0) { + usleep(args.sdu_gen_delay_usec); + } } } bool run_enable; - long rx_pdus; + uint64_t rx_pdus; uint32_t lcid; + srslte::log_filter log; std::string name; - uint32_t sdu_gen_delay_usec; + stress_test_args_t args; rlc_interface_pdcp *rlc; }; @@ -286,10 +294,10 @@ void stress_test(stress_test_args_t args) { srslte::log_filter log1("RLC_1"); srslte::log_filter log2("RLC_2"); - log1.set_level((LOG_LEVEL_ENUM)args.log_level); - log2.set_level((LOG_LEVEL_ENUM)args.log_level); - log1.set_hex_limit(-1); - log2.set_hex_limit(-1); + log1.set_level(static_cast(args.log_level)); + log2.set_level(static_cast(args.log_level)); + log1.set_hex_limit(LOG_HEX_LIMIT); + log2.set_hex_limit(LOG_HEX_LIMIT); rlc_pcap pcap; uint32_t lcid = 1; @@ -327,9 +335,9 @@ void stress_test(stress_test_args_t args) rlc rlc1; rlc rlc2; - rlc_tester tester1(&rlc1, "tester1", args.sdu_gen_delay_usec, lcid); - rlc_tester tester2(&rlc2, "tester2", args.sdu_gen_delay_usec, lcid); - mac_dummy mac(&rlc1, &rlc2, args.error_rate, args.opp_sdu_ratio, args.pdu_tx_delay_usec, lcid, &pcap); + rlc_tester tester1(&rlc1, "tester1", args, lcid); + rlc_tester tester2(&rlc2, "tester2", args, lcid); + mac_dummy mac(&rlc1, &rlc2, args, lcid, &pcap); ue_interface ue; rlc1.init(&tester1, &tester1, &ue, &log1, &mac, 0); @@ -347,6 +355,10 @@ void stress_test(stress_test_args_t args) } mac.start(); + if (args.test_duration_sec < 1) { + args.test_duration_sec = 1; + } + for (uint32_t i = 0; i < args.test_duration_sec; i++) { // if enabled, mimic reestablishment every second if (args.reestablish) { @@ -356,41 +368,55 @@ void stress_test(stress_test_args_t args) usleep(1e6); } + printf("Test finished, tearing down ..\n"); + // Stop RLC instances first to release blocking writers rlc1.stop(); rlc2.stop(); + printf("RLC entities stopped.\n"); + + // Stop upper layer writers tester1.stop(); tester2.stop(); + + printf("Writers stopped.\n"); + mac.stop(); if (args.write_pcap) { pcap.close(); } - rlc_metrics_t metrics; + rlc_metrics_t metrics = {}; rlc1.get_metrics(metrics); - printf("RLC1 received %d SDUs in %ds (%.2f PDU/s), Throughput: DL=%4.2f Mbps, UL=%4.2f Mbps\n", + printf("RLC1 received %d SDUs in %ds (%.2f/s), Throughput: DL=%4.2f Mbps, UL=%4.2f Mbps\n", tester1.get_nof_rx_pdus(), args.test_duration_sec, - (float)tester1.get_nof_rx_pdus()/args.test_duration_sec, - metrics.dl_tput_mbps, - metrics.ul_tput_mbps); + static_cast(tester1.get_nof_rx_pdus()/args.test_duration_sec), + metrics.dl_tput_mbps[lcid], + metrics.ul_tput_mbps[lcid]); rlc2.get_metrics(metrics); - printf("RLC2 received %d SDUs in %ds (%.2f PDU/s), Throughput: DL=%4.2f Mbps, UL=%4.2f Mbps\n", + printf("RLC2 received %d SDUs in %ds (%.2f/s), Throughput: DL=%4.2f Mbps, UL=%4.2f Mbps\n", tester2.get_nof_rx_pdus(), args.test_duration_sec, - (float)tester2.get_nof_rx_pdus()/args.test_duration_sec, - metrics.dl_tput_mbps, - metrics.ul_tput_mbps); + static_cast(tester2.get_nof_rx_pdus()/args.test_duration_sec), + metrics.dl_tput_mbps[lcid], + metrics.ul_tput_mbps[lcid]); } int main(int argc, char **argv) { - stress_test_args_t args; + stress_test_args_t args = {}; parse_args(&args, argc, argv); + if (args.zero_seed) { + srand(0); + } else { + srand(time(NULL)); + } + stress_test(args); byte_buffer_pool::get_instance()->cleanup(); diff --git a/srsenb/enb.conf.example b/srsenb/enb.conf.example index 0eb65bd92..53e43e799 100644 --- a/srsenb/enb.conf.example +++ b/srsenb/enb.conf.example @@ -141,18 +141,20 @@ nof_ctrl_symbols = 3 ##################################################################### # Expert configuration options # -# pdsch_max_its: Maximum number of turbo decoder iterations (Default 4) +# pusch_max_its: Maximum number of turbo decoder iterations (Default 4) +# pusch_8bit_decoder: Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental) # nof_phy_threads: Selects the number of PHY threads (maximum 4, minimum 1, default 2) # metrics_period_secs: Sets the period at which metrics are requested from the UE. # pregenerate_signals: Pregenerate uplink signals after attach. Improves CPU performance. # tx_amplitude: Transmit amplitude factor (set 0-1 to reduce PAPR) # link_failure_nof_err: Number of PUSCH failures after which a radio-link failure is triggered. # a link failure is when SNR<0 and CRC=KO -# max_prach_offset_us: Maximum allowed RACH offset (in us) +# max_prach_offset_us: Maximum allowed RACH offset (in us) # ##################################################################### [expert] -#pdsch_max_its = 4 +#pusch_max_its = 8 # These are half iterations +#pusch_8bit_decoder = false #nof_phy_threads = 2 #pregenerate_signals = false #tx_amplitude = 0.6 diff --git a/srsenb/hdr/phy/phch_common.h b/srsenb/hdr/phy/phch_common.h index 03cff5eaf..47e1add77 100644 --- a/srsenb/hdr/phy/phch_common.h +++ b/srsenb/hdr/phy/phch_common.h @@ -28,6 +28,7 @@ #define SRSENB_PHCH_COMMON_H #include +#include #include "srslte/interfaces/enb_interfaces.h" #include "srslte/interfaces/enb_metrics_interface.h" #include "srslte/common/gen_mch_tables.h" @@ -36,11 +37,13 @@ #include "srslte/common/thread_pool.h" #include "srslte/radio/radio.h" #include + namespace srsenb { typedef struct { float max_prach_offset_us; int pusch_max_its; + bool pusch_8bit_decoder; float tx_amplitude; int nof_phy_threads; std::string equalizer_mode; @@ -72,29 +75,16 @@ class phch_common { public: - - phch_common(uint32_t max_mutex_) : tx_mutex(max_mutex_) { - nof_mutex = 0; - max_mutex = max_mutex_; - params.max_prach_offset_us = 20; - radio = NULL; - mac = NULL; - is_first_tx = false; - is_first_of_burst = false; - pdsch_p_b = 0; - nof_workers = 0; - bzero(&pusch_cfg, sizeof(pusch_cfg)); - bzero(&hopping_cfg, sizeof(hopping_cfg)); - bzero(&pucch_cfg, sizeof(pucch_cfg)); - bzero(&ul_grants, sizeof(ul_grants)); - } - + + phch_common(uint32_t nof_workers); + ~phch_common(); + + void set_nof_workers(uint32_t nof_workers); + bool init(srslte_cell_t *cell, srslte::radio *radio_handler, mac_interface_phy *mac); void reset(); void stop(); - void set_nof_mutex(uint32_t nof_mutex); - void worker_end(uint32_t tx_mutex_cnt, cf_t *buffer[SRSLTE_MAX_PORTS], uint32_t nof_samples, srslte_timestamp_t tx_time); // Common objects @@ -147,13 +137,12 @@ public: private: - std::vector tx_mutex; + std::vector tx_sem; bool is_first_tx; bool is_first_of_burst; uint32_t nof_workers; - uint32_t nof_mutex; - uint32_t max_mutex; + uint32_t max_workers; pthread_mutex_t user_mutex; diff --git a/srsenb/hdr/phy/phch_worker.h b/srsenb/hdr/phy/phch_worker.h index 776ba0a1b..24e27a181 100644 --- a/srsenb/hdr/phy/phch_worker.h +++ b/srsenb/hdr/phy/phch_worker.h @@ -46,7 +46,7 @@ public: void reset(); cf_t *get_buffer_rx(uint32_t antenna_idx); - void set_time(uint32_t tti, uint32_t tx_mutex_cnt, srslte_timestamp_t tx_time); + void set_time(uint32_t tti, uint32_t tx_worker_cnt, srslte_timestamp_t tx_time); int add_rnti(uint16_t rnti); void rem_rnti(uint16_t rnti); @@ -93,8 +93,9 @@ private: cf_t *signal_buffer_rx[SRSLTE_MAX_PORTS]; cf_t *signal_buffer_tx[SRSLTE_MAX_PORTS]; uint32_t tti_rx, tti_tx_dl, tti_tx_ul; - uint32_t sf_rx, sf_tx, tx_mutex_cnt; + uint32_t sf_rx, sf_tx; uint32_t t_rx, t_tx_dl, t_tx_ul; + uint32_t tx_worker_cnt; srslte_enb_dl_t enb_dl; srslte_enb_ul_t enb_ul; srslte_softbuffer_tx_t temp_mbsfn_softbuffer; diff --git a/srsenb/hdr/phy/prach_worker.h b/srsenb/hdr/phy/prach_worker.h index 8ca70aa60..c1c4c978c 100644 --- a/srsenb/hdr/phy/prach_worker.h +++ b/srsenb/hdr/phy/prach_worker.h @@ -30,25 +30,24 @@ #include "srslte/interfaces/enb_interfaces.h" #include "srslte/common/log.h" #include "srslte/common/threads.h" +#include "srslte/common/block_queue.h" +#include "srslte/common/buffer_pool.h" namespace srsenb { - + class prach_worker : thread { public: - prach_worker() : initiated(false), prach_nof_det(0), max_prach_offset_us(0), pending_tti(0), processed_tti(0), + prach_worker() : initiated(false), prach_nof_det(0), max_prach_offset_us(0), buffer_pool(8), running(false), nof_sf(0), sf_cnt(0) { log_h = NULL; mac = NULL; - signal_buffer_rx = NULL; bzero(&prach, sizeof(srslte_prach_t)); bzero(&prach_indices, sizeof(prach_indices)); bzero(&prach_offsets, sizeof(prach_offsets)); bzero(&prach_p2avg, sizeof(prach_p2avg)); bzero(&cell, sizeof(cell)); bzero(&prach_cfg, sizeof(prach_cfg)); - bzero(&mutex, sizeof(mutex)); - bzero(&cvar, sizeof(cvar)); } int init(srslte_cell_t *cell, srslte_prach_cfg_t *prach_cfg, mac_interface_phy *mac, srslte::log *log_h, int priority); @@ -57,10 +56,7 @@ public: void stop(); private: - void run_thread(); - int run_tti(uint32_t tti); - - uint32_t prach_nof_det; + uint32_t prach_nof_det; uint32_t prach_indices[165]; float prach_offsets[165]; float prach_p2avg[165]; @@ -69,20 +65,32 @@ private: srslte_prach_cfg_t prach_cfg; srslte_prach_t prach; - pthread_mutex_t mutex; - pthread_cond_t cvar; + const static int sf_buffer_sz = 128*1024; + class sf_buffer { + public: + sf_buffer() { nof_samples = 0; tti = 0; } + void reset() { nof_samples = 0; tti = 0; } + cf_t samples[sf_buffer_sz]; + uint32_t nof_samples; + uint32_t tti; + char debug_name[SRSLTE_BUFFER_POOL_LOG_NAME_LEN]; + }; + srslte::buffer_pool buffer_pool; + srslte::block_queue pending_buffers; + sf_buffer* current_buffer; - cf_t *signal_buffer_rx; - srslte::log* log_h; mac_interface_phy *mac; float max_prach_offset_us; bool initiated; - uint32_t pending_tti; - int processed_tti; bool running; uint32_t nof_sf; uint32_t sf_cnt; + + void run_thread(); + int run_tti(sf_buffer *b); + + }; } #endif // SRSENB_PRACH_WORKER_H diff --git a/srsenb/hdr/phy/txrx.h b/srsenb/hdr/phy/txrx.h index 69a119900..8833f2b6a 100644 --- a/srsenb/hdr/phy/txrx.h +++ b/srsenb/hdr/phy/txrx.h @@ -50,8 +50,6 @@ public: uint32_t prio); void stop(); - const static int MUTEX_X_WORKER = 4; - private: void run_thread(); @@ -61,12 +59,12 @@ private: srslte::thread_pool *workers_pool; prach_worker *prach; phch_common *worker_com; - - uint32_t tx_mutex_cnt; - uint32_t nof_tx_mutex; - + // Main system TTI counter - uint32_t tti; + uint32_t tti; + + uint32_t tx_worker_cnt; + uint32_t nof_workers; bool running; }; diff --git a/srsenb/hdr/upper/common_enb.h b/srsenb/hdr/upper/common_enb.h index 2e0b28917..2458cbc19 100644 --- a/srsenb/hdr/upper/common_enb.h +++ b/srsenb/hdr/upper/common_enb.h @@ -36,7 +36,6 @@ namespace srsenb { #define ENB_METRICS_MAX_USERS 64 - #define SRSENB_RRC_MAX_N_PLMN_IDENTITIES 6 #define SRSENB_N_SRB 3 @@ -74,111 +73,6 @@ static const char rb_id_text[RB_ID_N_ITEMS][20] = { "SRB0", #define SRSENB_MAX_BUFFER_SIZE_BITS 102048 #define SRSENB_MAX_BUFFER_SIZE_BYTES 12756 #define SRSENB_BUFFER_HEADER_OFFSET 1024 - -/****************************************************************************** - * Convert PLMN to BCD-coded MCC and MNC. - * Digits are represented by 4-bit nibbles. Unused nibbles are filled with 0xF. - * MNC 001 represented as 0xF001 - * MNC 01 represented as 0xFF01 - * PLMN encoded as per TS 36.413 sec 9.2.3.8 - *****************************************************************************/ -inline void s1ap_plmn_to_mccmnc(uint32_t plmn, uint16_t *mcc, uint16_t *mnc) -{ - uint8_t nibbles[6]; - nibbles[0] = (plmn & 0xF00000) >> 20; - nibbles[1] = (plmn & 0x0F0000) >> 16; - nibbles[2] = (plmn & 0x00F000) >> 12; - nibbles[3] = (plmn & 0x000F00) >> 8; - nibbles[4] = (plmn & 0x0000F0) >> 4; - nibbles[5] = (plmn & 0x00000F); - - *mcc = 0xF000; - *mnc = 0xF000; - *mcc |= nibbles[1] << 8; // MCC digit 1 - *mcc |= nibbles[0] << 4; // MCC digit 2 - *mcc |= nibbles[3]; // MCC digit 3 - - if(nibbles[2] == 0xF) { - // 2-digit MNC - *mnc |= 0x0F00; // MNC digit 1 - *mnc |= nibbles[5] << 4; // MNC digit 2 - *mnc |= nibbles[4]; // MNC digit 3 - } else { - // 3-digit MNC - *mnc |= nibbles[5] << 8; // MNC digit 1 - *mnc |= nibbles[4] << 4; // MNC digit 2 - *mnc |= nibbles[2] ; // MNC digit 3 - } -} - -/****************************************************************************** - * Convert BCD-coded MCC and MNC to PLMN. - * Digits are represented by 4-bit nibbles. Unused nibbles are filled with 0xF. - * MNC 001 represented as 0xF001 - * MNC 01 represented as 0xFF01 - * PLMN encoded as per TS 36.413 sec 9.2.3.8 - *****************************************************************************/ -inline void s1ap_mccmnc_to_plmn(uint16_t mcc, uint16_t mnc, uint32_t *plmn) -{ - uint8_t nibbles[6]; - nibbles[1] = (mcc & 0x0F00) >> 8; // MCC digit 1 - nibbles[0] = (mcc & 0x00F0) >> 4; // MCC digit 2 - nibbles[3] = (mcc & 0x000F); // MCC digit 3 - - if((mnc & 0xFF00) == 0xFF00) { - // 2-digit MNC - nibbles[2] = 0x0F; // MNC digit 1 - nibbles[5] = (mnc & 0x00F0) >> 4; // MNC digit 2 - nibbles[4] = (mnc & 0x000F); // MNC digit 3 - } else { - // 3-digit MNC - nibbles[5] = (mnc & 0x0F00) >> 8; // MNC digit 1 - nibbles[4] = (mnc & 0x00F0) >> 4; // MNC digit 2 - nibbles[2] = (mnc & 0x000F); // MNC digit 3 - } - - *plmn = 0x000000; - *plmn |= nibbles[0] << 20; - *plmn |= nibbles[1] << 16; - *plmn |= nibbles[2] << 12; - *plmn |= nibbles[3] << 8; - *plmn |= nibbles[4] << 4; - *plmn |= nibbles[5]; -} - -/****************************************************************************** - * Safe conversions between byte buffers and integer types. - * Note: these don't perform endian conversion - use e.g. htonl/ntohl if required - *****************************************************************************/ - -inline void uint8_to_uint32(uint8_t *buf, uint32_t *i) -{ - *i = (uint32_t)buf[0] << 24 | - (uint32_t)buf[1] << 16 | - (uint32_t)buf[2] << 8 | - (uint32_t)buf[3]; -} - -inline void uint32_to_uint8(uint32_t i, uint8_t *buf) -{ - buf[0] = (i >> 24) & 0xFF; - buf[1] = (i >> 16) & 0xFF; - buf[2] = (i >> 8) & 0xFF; - buf[3] = i & 0xFF; -} - -inline void uint8_to_uint16(uint8_t *buf, uint16_t *i) -{ - *i = (uint32_t)buf[0] << 8 | - (uint32_t)buf[1]; -} - -inline void uint16_to_uint8(uint16_t i, uint8_t *buf) -{ - buf[0] = (i >> 8) & 0xFF; - buf[1] = i & 0xFF; -} - } // namespace srsenb #endif // SRSENB_COMMON_ENB_H diff --git a/srsenb/hdr/upper/rrc.h b/srsenb/hdr/upper/rrc.h index 26a60e971..c640df197 100644 --- a/srsenb/hdr/upper/rrc.h +++ b/srsenb/hdr/upper/rrc.h @@ -296,6 +296,7 @@ public: int cqi_sched_sf_idx; int cqi_sched_prb_idx; int get_drbid_config(LIBLTE_RRC_DRB_TO_ADD_MOD_STRUCT *drb, int drbid); + srslte::byte_buffer_t erab_info; }; @@ -333,8 +334,7 @@ private: srslte::byte_buffer_pool *pool; srslte::bit_buffer_t bit_buf; srslte::bit_buffer_t bit_buf_paging; - srslte::byte_buffer_t erab_info; - + phy_interface_rrc *phy; mac_interface_rrc *mac; rlc_interface_rrc *rlc; diff --git a/srsenb/src/enb_cfg_parser.cc b/srsenb/src/enb_cfg_parser.cc index b4a4e67fb..0e4771e9f 100644 --- a/srsenb/src/enb_cfg_parser.cc +++ b/srsenb/src/enb_cfg_parser.cc @@ -210,38 +210,38 @@ int enb::parse_sib2(std::string filename, LIBLTE_RRC_SYS_INFO_BLOCK_TYPE_2_STRUC ("time_alignment_timer", &data->time_alignment_timer, liblte_rrc_time_alignment_timer_text, LIBLTE_RRC_TIME_ALIGNMENT_TIMER_N_ITEMS) ); - - - sib2.add_field( + + parser::section mbsfnSubframeConfigList("mbsfnSubframeConfigList"); + sib2.add_subsection(&mbsfnSubframeConfigList); + + bool mbsfn_present=false; + mbsfnSubframeConfigList.set_optional(&mbsfn_present); + + if (mbsfn_present) { + data->mbsfn_subfr_cnfg_list_size = 1; + } + + mbsfnSubframeConfigList.add_field( new parser::field - ("mbsfnSubframeConfigListLength", &data->mbsfn_subfr_cnfg_list_size) + ("subframeAllocation", &data->mbsfn_subfr_cnfg_list[0].subfr_alloc) + ); + + mbsfnSubframeConfigList.add_field( + new parser::field + ("radioframeAllocationOffset", &data->mbsfn_subfr_cnfg_list[0].radio_fr_alloc_offset) + ); + + mbsfnSubframeConfigList.add_field( + new parser::field_enum_str + ("subframeAllocationNumFrames", &data->mbsfn_subfr_cnfg_list[0].subfr_alloc_num_frames, + liblte_rrc_subframe_allocation_num_frames_text,LIBLTE_RRC_SUBFRAME_ALLOCATION_NUM_FRAMES_N_ITEMS) + ); + + mbsfnSubframeConfigList.add_field( + new parser::field_enum_str + ("radioframeAllocationPeriod", &data->mbsfn_subfr_cnfg_list[0].radio_fr_alloc_period, + liblte_rrc_radio_frame_allocation_period_text, LIBLTE_RRC_RADIO_FRAME_ALLOCATION_PERIOD_N_ITEMS) ); - - - parser::section mbsfnSubframeConfigList("mbsfnSubframeConfigList"); - sib2.add_subsection(&mbsfnSubframeConfigList); - - mbsfnSubframeConfigList.add_field( - new parser::field - ("subframeAllocation", &data->mbsfn_subfr_cnfg_list[0].subfr_alloc) - ); - - mbsfnSubframeConfigList.add_field( - new parser::field - ("radioframeAllocationOffset", &data->mbsfn_subfr_cnfg_list[0].radio_fr_alloc_offset) - ); - - mbsfnSubframeConfigList.add_field( - new parser::field_enum_str - ("subframeAllocationNumFrames", &data->mbsfn_subfr_cnfg_list[0].subfr_alloc_num_frames, - liblte_rrc_subframe_allocation_num_frames_text,LIBLTE_RRC_SUBFRAME_ALLOCATION_NUM_FRAMES_N_ITEMS) - ); - - mbsfnSubframeConfigList.add_field( - new parser::field_enum_str - ("radioframeAllocationPeriod", &data->mbsfn_subfr_cnfg_list[0].radio_fr_alloc_period, - liblte_rrc_radio_frame_allocation_period_text, LIBLTE_RRC_RADIO_FRAME_ALLOCATION_PERIOD_N_ITEMS) - ); parser::section freqinfo("freqInfo"); sib2.add_subsection(&freqinfo); diff --git a/srsenb/src/mac/mac.cc b/srsenb/src/mac/mac.cc index fb1a71c4a..3673ce1b0 100644 --- a/srsenb/src/mac/mac.cc +++ b/srsenb/src/mac/mac.cc @@ -489,7 +489,7 @@ int mac::rach_detected(uint32_t tti, uint32_t preamble_idx, uint32_t time_adv) // Save RA info pending_rars[ra_id].preamble_idx = preamble_idx; - pending_rars[ra_id].ta_cmd = 2*time_adv; + pending_rars[ra_id].ta_cmd = time_adv; pending_rars[ra_id].temp_crnti = last_rnti; // Add new user to the scheduler so that it can RX/TX SRB0 diff --git a/srsenb/src/main.cc b/srsenb/src/main.cc index 9c15ef25c..941a6fff9 100644 --- a/srsenb/src/main.cc +++ b/srsenb/src/main.cc @@ -157,10 +157,14 @@ void parse_args(all_args_t *args, int argc, char* argv[]) { "Pregenerate uplink signals after attach. Improves CPU performance.") ("expert.pusch_max_its", - bpo::value(&args->expert.phy.pusch_max_its)->default_value(4), + bpo::value(&args->expert.phy.pusch_max_its)->default_value(8), "Maximum number of turbo decoder iterations") - ("expert.tx_amplitude", + ("expert.pusch_8bit_decoder", + bpo::value(&args->expert.phy.pusch_8bit_decoder)->default_value(false), + "Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental)") + + ("expert.tx_amplitude", bpo::value(&args->expert.phy.tx_amplitude)->default_value(0.6), "Transmit amplitude factor") diff --git a/srsenb/src/phy/phch_common.cc b/srsenb/src/phy/phch_common.cc index 536b0a358..29c0254a3 100644 --- a/srsenb/src/phy/phch_common.cc +++ b/srsenb/src/phy/phch_common.cc @@ -41,9 +41,35 @@ using namespace std; namespace srsenb { -void phch_common::set_nof_mutex(uint32_t nof_mutex_) { - nof_mutex = nof_mutex_; - assert(nof_mutex <= max_mutex); +phch_common::phch_common(uint32_t max_workers) : tx_sem(max_workers) +{ + this->nof_workers = nof_workers; + params.max_prach_offset_us = 20; + radio = NULL; + mac = NULL; + is_first_tx = false; + is_first_of_burst = false; + pdsch_p_b = 0; + this->max_workers = max_workers; + bzero(&pusch_cfg, sizeof(pusch_cfg)); + bzero(&hopping_cfg, sizeof(hopping_cfg)); + bzero(&pucch_cfg, sizeof(pucch_cfg)); + bzero(&ul_grants, sizeof(ul_grants)); + + for (uint32_t i=0;inof_workers = nof_workers; } void phch_common::reset() { @@ -61,35 +87,42 @@ bool phch_common::init(srslte_cell_t *cell_, srslte::radio* radio_h_, mac_interf is_first_of_burst = true; is_first_tx = true; - for (uint32_t i=0;iset_tti(tx_mutex_cnt); + // Wait for the green light to transmit in the current TTI + sem_wait(&tx_sem[tti%nof_workers]); + + radio->set_tti(tti); radio->tx((void **) buffer, nof_samples, tx_time); - - // Trigger next transmission - pthread_mutex_unlock(&tx_mutex[(tx_mutex_cnt+1)%nof_mutex]); + + // Allow next TTI to transmit + sem_post(&tx_sem[(tti+1)%nof_workers]); // Trigger MAC clock mac->tti_clock(); diff --git a/srsenb/src/phy/phch_worker.cc b/srsenb/src/phy/phch_worker.cc index 36a260b6c..103cc6961 100644 --- a/srsenb/src/phy/phch_worker.cc +++ b/srsenb/src/phy/phch_worker.cc @@ -153,7 +153,11 @@ void phch_worker::init(phch_common* phy_, srslte::log *log_h_) srslte_enb_dl_set_amp(&enb_dl, phy->params.tx_amplitude); Info("Worker %d configured cell %d PRB\n", get_id(), phy->cell.nof_prb); - + + if (phy->params.pusch_8bit_decoder) { + enb_ul.pusch.llr_is_8bit = true; + enb_ul.pusch.ul_sch.llr_is_8bit = true; + } initiated = true; running = true; @@ -202,7 +206,7 @@ cf_t* phch_worker::get_buffer_rx(uint32_t antenna_idx) return signal_buffer_rx[antenna_idx]; } -void phch_worker::set_time(uint32_t tti_, uint32_t tx_mutex_cnt_, srslte_timestamp_t tx_time_) +void phch_worker::set_time(uint32_t tti_, uint32_t tx_worker_cnt_, srslte_timestamp_t tx_time_) { tti_rx = tti_; tti_tx_dl = TTI_TX(tti_rx); @@ -215,7 +219,7 @@ void phch_worker::set_time(uint32_t tti_, uint32_t tx_mutex_cnt_, srslte_timesta t_rx = TTIMOD(tti_rx); t_tx_ul = TTIMOD(tti_tx_ul); - tx_mutex_cnt = tx_mutex_cnt_; + tx_worker_cnt = tx_worker_cnt_; memcpy(&tx_time, &tx_time_, sizeof(srslte_timestamp_t)); } @@ -479,7 +483,7 @@ void phch_worker::work_imp() pthread_mutex_unlock(&mutex); Debug("Sending to radio\n"); - phy->worker_end(tx_mutex_cnt, signal_buffer_tx, SRSLTE_SF_LEN_PRB(phy->cell.nof_prb), tx_time); + phy->worker_end(tx_worker_cnt, signal_buffer_tx, SRSLTE_SF_LEN_PRB(phy->cell.nof_prb), tx_time); is_worker_running = false; @@ -512,9 +516,6 @@ unlock: int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) { - srslte_uci_data_t uci_data; - ZERO_OBJECT(uci_data); - uint32_t wideband_cqi_value = 0, wideband_pmi = 0; bool wideband_pmi_present = false; @@ -522,6 +523,8 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) for (uint32_t i=0;iwarning("PRACH thread did not finish processing TTI=%d\n", pending_tti); + if (sf_cnt == 0) { + current_buffer = buffer_pool.allocate(); + if (!current_buffer) { + log_h->warning("PRACH skipping tti=%d due to lack of available buffers\n", tti_rx); + return 0; } - pthread_mutex_lock(&mutex); - if (tti_rx+1 > nof_sf) { - pending_tti = tti_rx+1-nof_sf; - } else { - pending_tti = 10240+(tti_rx+1-nof_sf); + } + if (!current_buffer) { + log_h->error("PRACH: Expected available current_buffer\n"); + return -1; + } + if (current_buffer->nof_samples+SRSLTE_SF_LEN_PRB(cell.nof_prb) < sf_buffer_sz) { + memcpy(¤t_buffer->samples[sf_cnt*SRSLTE_SF_LEN_PRB(cell.nof_prb)], buffer_rx, sizeof(cf_t)*SRSLTE_SF_LEN_PRB(cell.nof_prb)); + current_buffer->nof_samples += SRSLTE_SF_LEN_PRB(cell.nof_prb); + if (sf_cnt == 0) { + current_buffer->tti = tti_rx; } - pthread_cond_signal(&cvar); - pthread_mutex_unlock(&mutex); + } else { + log_h->error("PRACH: Not enough space in current_buffer\n"); + return -1; + } + sf_cnt++; + if (sf_cnt == nof_sf) { + sf_cnt = 0; + pending_buffers.push(current_buffer); } } return 0; } -int prach_worker::run_tti(uint32_t tti_rx) +int prach_worker::run_tti(sf_buffer *b) { - if (srslte_prach_tti_opportunity(&prach, tti_rx, -1)) + if (srslte_prach_tti_opportunity(&prach, b->tti, -1)) { // Detect possible PRACHs if (srslte_prach_detect_offset(&prach, prach_cfg.freq_offset, - &signal_buffer_rx[prach.N_cp], + &b->samples[prach.N_cp], nof_sf*SRSLTE_SF_LEN_PRB(cell.nof_prb)-prach.N_cp, prach_indices, prach_offsets, @@ -135,7 +128,7 @@ int prach_worker::run_tti(uint32_t tti_rx) i, prach_nof_det, prach_indices[i], prach_offsets[i]*1e6, prach_p2avg[i], max_prach_offset_us); if (prach_offsets[i]*1e6 < max_prach_offset_us) { - mac->rach_detected(tti_rx, prach_indices[i], (uint32_t) (prach_offsets[i]*1e6)); + mac->rach_detected(b->tti, prach_indices[i], (uint32_t) (prach_offsets[i]*1e6)); } } } @@ -147,18 +140,15 @@ void prach_worker::run_thread() { running = true; while(running) { - pthread_mutex_lock(&mutex); - while(processed_tti == (int) pending_tti) { - pthread_cond_wait(&cvar, &mutex); - } - pthread_mutex_unlock(&mutex); - log_h->debug("Processing pending_tti=%d\n", pending_tti); - if (running) { - if (run_tti(pending_tti)) { - running = false; + sf_buffer* b = pending_buffers.wait_pop(); + if (running && b) { + int ret = run_tti(b); + b->reset(); + buffer_pool.deallocate(b); + if (ret) { + running = false; + } } - processed_tti = pending_tti; - } } } diff --git a/srsenb/src/phy/txrx.cc b/srsenb/src/phy/txrx.cc index 7f6503b06..da92ad921 100644 --- a/srsenb/src/phy/txrx.cc +++ b/srsenb/src/phy/txrx.cc @@ -42,7 +42,7 @@ using namespace std; namespace srsenb { -txrx::txrx() : tx_mutex_cnt(0), nof_tx_mutex(0), tti(0) { +txrx::txrx() : tx_worker_cnt(0), nof_workers(0), tti(0) { running = false; radio_h = NULL; log_h = NULL; @@ -58,11 +58,11 @@ bool txrx::init(srslte::radio* radio_h_, srslte::thread_pool* workers_pool_, phc workers_pool = workers_pool_; worker_com = worker_com_; prach = prach_; - tx_mutex_cnt = 0; + tx_worker_cnt = 0; running = true; - nof_tx_mutex = MUTEX_X_WORKER*workers_pool->get_nof_workers(); - worker_com->set_nof_mutex(nof_tx_mutex); + nof_workers = workers_pool->get_nof_workers(); + worker_com->set_nof_workers(nof_workers); start(prio_); return true; @@ -126,12 +126,12 @@ void txrx::run_thread() srslte_timestamp_add(&tx_time, 0, HARQ_DELAY_MS*1e-3); Debug("Settting TTI=%d, tx_mutex=%d, tx_time=%ld:%f to worker %d\n", - tti, tx_mutex_cnt, + tti, tx_worker_cnt, tx_time.full_secs, tx_time.frac_secs, worker->get_id()); - worker->set_time(tti, tx_mutex_cnt, tx_time); - tx_mutex_cnt = (tx_mutex_cnt+1)%nof_tx_mutex; + worker->set_time(tti, tx_worker_cnt, tx_time); + tx_worker_cnt = (tx_worker_cnt+1)%nof_workers; // Trigger phy worker execution workers_pool->start_worker(worker); diff --git a/srsenb/src/upper/rrc.cc b/srsenb/src/upper/rrc.cc index 2e481ce6e..269e5dcaf 100644 --- a/srsenb/src/upper/rrc.cc +++ b/srsenb/src/upper/rrc.cc @@ -29,10 +29,12 @@ #include "srsenb/hdr/upper/rrc.h" #include "srslte/srslte.h" #include "srslte/asn1/liblte_mme.h" - +#include "srslte/common/int_helpers.h" using srslte::byte_buffer_t; using srslte::bit_buffer_t; +using srslte::uint32_to_uint8; +using srslte::uint8_to_uint32; namespace srsenb { @@ -1225,8 +1227,9 @@ void rrc::ue::setup_erab(uint8_t id, LIBLTE_S1AP_E_RABLEVELQOSPARAMETERS_STRUCT parent->gtpu->add_bearer(rnti, lcid, addr_, erabs[id].teid_out, &(erabs[id].teid_in)); if(nas_pdu) { - memcpy(parent->erab_info.msg, nas_pdu->buffer, nas_pdu->n_octets); - parent->erab_info.N_bytes = nas_pdu->n_octets; + memcpy(erab_info.buffer, nas_pdu->buffer, nas_pdu->n_octets); + erab_info.N_bytes = nas_pdu->n_octets; + parent->rrc_log->info_hex(erab_info.buffer, erab_info.N_bytes, "setup_erab nas_pdu -> erab_info rnti 0x%x", rnti); } } @@ -1665,9 +1668,11 @@ void rrc::ue::send_connection_reconf(srslte::byte_buffer_t *pdu) // Add NAS Attach accept conn_reconf->N_ded_info_nas = 1; - conn_reconf->ded_info_nas_list[0].N_bytes = parent->erab_info.N_bytes; - memcpy(conn_reconf->ded_info_nas_list[0].msg, parent->erab_info.msg, parent->erab_info.N_bytes); - + + parent->rrc_log->info_hex(erab_info.buffer, erab_info.N_bytes, "connection_reconf erab_info -> nas_info rnti 0x%x\n", rnti); + conn_reconf->ded_info_nas_list[0].N_bytes = erab_info.N_bytes; + memcpy(conn_reconf->ded_info_nas_list[0].msg, erab_info.buffer, erab_info.N_bytes); + // Reuse same PDU pdu->reset(); @@ -1723,8 +1728,9 @@ void rrc::ue::send_connection_reconf_new_bearer(LIBLTE_S1AP_E_RABTOBESETUPLISTBE // DRB has already been configured in GTPU through bearer setup // Add NAS message - conn_reconf->ded_info_nas_list[conn_reconf->N_ded_info_nas].N_bytes = parent->erab_info.N_bytes; - memcpy(conn_reconf->ded_info_nas_list[conn_reconf->N_ded_info_nas].msg, parent->erab_info.msg, parent->erab_info.N_bytes); + parent->rrc_log->info_hex(erab_info.buffer, erab_info.N_bytes, "reconf_new_bearer erab_info -> nas_info rnti 0x%x\n", rnti); + conn_reconf->ded_info_nas_list[conn_reconf->N_ded_info_nas].N_bytes = erab_info.N_bytes; + memcpy(conn_reconf->ded_info_nas_list[conn_reconf->N_ded_info_nas].msg, erab_info.buffer, erab_info.N_bytes); conn_reconf->N_ded_info_nas++; } diff --git a/srsenb/src/upper/s1ap.cc b/srsenb/src/upper/s1ap.cc index 1e2b25987..e2faf83f0 100644 --- a/srsenb/src/upper/s1ap.cc +++ b/srsenb/src/upper/s1ap.cc @@ -26,6 +26,8 @@ #include "srsenb/hdr/upper/s1ap.h" #include "srsenb/hdr/upper/common_enb.h" +#include "srslte/common/bcd_helpers.h" +#include "srslte/common/int_helpers.h" #include #include @@ -36,6 +38,9 @@ #include #include //for inet_ntop() +using srslte::s1ap_mccmnc_to_plmn; +using srslte::uint32_to_uint8; + namespace srsenb{ bool s1ap::init(s1ap_args_t args_, rrc_interface_s1ap *rrc_, srslte::log *s1ap_log_) diff --git a/srsenb/test/upper/plmn_test.cc b/srsenb/test/upper/plmn_test.cc index c8e9bdd69..448931129 100644 --- a/srsenb/test/upper/plmn_test.cc +++ b/srsenb/test/upper/plmn_test.cc @@ -10,6 +10,7 @@ #include #include "srsenb/hdr/upper/common_enb.h" #include "srslte/asn1/liblte_rrc.h" +#include "srslte/common/bcd_helpers.h" void rrc_plmn_test() { @@ -55,17 +56,17 @@ void s1ap_plmn_test() uint32_t plmn; // 2-digit MNC test - srsenb::s1ap_mccmnc_to_plmn(mcc, mnc, &plmn); + srslte::s1ap_mccmnc_to_plmn(mcc, mnc, &plmn); assert(plmn == 0x21F354); - srsenb::s1ap_plmn_to_mccmnc(plmn, &mcc, &mnc); + srslte::s1ap_plmn_to_mccmnc(plmn, &mcc, &mnc); assert(mcc == 0xF123); assert(mnc == 0xFF45); // 3-digit MNC test mnc = 0xF456; - srsenb::s1ap_mccmnc_to_plmn(mcc, mnc, &plmn); - assert(plmn == 0x216354); - srsenb::s1ap_plmn_to_mccmnc(plmn, &mcc, &mnc); + srslte::s1ap_mccmnc_to_plmn(mcc, mnc, &plmn); + assert(plmn == 0x214365); + srslte::s1ap_plmn_to_mccmnc(plmn, &mcc, &mnc); assert(mcc == 0xF123); assert(mnc == 0xF456); } diff --git a/srsepc/hdr/hss/hss.h b/srsepc/hdr/hss/hss.h index 922c7cd85..7d7e2e83b 100644 --- a/srsepc/hdr/hss/hss.h +++ b/srsepc/hdr/hss/hss.h @@ -42,6 +42,11 @@ #include #include +#define LTE_FDD_ENB_IND_HE_N_BITS 5 +#define LTE_FDD_ENB_IND_HE_MASK 0x1FUL +#define LTE_FDD_ENB_IND_HE_MAX_VALUE 31 +#define LTE_FDD_ENB_SEQ_HE_MAX_VALUE 0x07FFFFFFFFFFUL + namespace srsepc{ typedef struct{ @@ -106,6 +111,7 @@ private: void get_uint_vec_from_hex_str(const std::string &key_str, uint8_t *key, uint len); void increment_ue_sqn(uint64_t imsi); + void increment_seq_after_resync(uint64_t imsi); void increment_sqn(uint8_t *sqn, uint8_t *next_sqn); void set_sqn(uint64_t imsi, uint8_t *sqn); diff --git a/srsepc/src/hss/hss.cc b/srsepc/src/hss/hss.cc index 844a3a92b..4d12f8619 100644 --- a/srsepc/src/hss/hss.cc +++ b/srsepc/src/hss/hss.cc @@ -532,7 +532,7 @@ hss::resync_sqn(uint64_t imsi, uint8_t *auts) ret = resync_sqn_milenage(imsi, auts); break; } - increment_ue_sqn(imsi); + increment_seq_after_resync(imsi); return ret; } @@ -594,6 +594,10 @@ hss::resync_sqn_milenage(uint64_t imsi, uint8_t *auts) uint8_t mac_s_tmp[8]; + for(int i=0; i<2; i++){ + amf[i] = 0; + } + security_milenage_f1_star(k, opc, last_rand, sqn_ms, amf, mac_s_tmp); m_hss_log->debug_hex(mac_s_tmp, 8, "MAC calc : "); @@ -621,21 +625,86 @@ hss::increment_ue_sqn(uint64_t imsi) void hss::increment_sqn(uint8_t *sqn, uint8_t *next_sqn) { - // Awkward 48 bit sqn and doing arithmetic - uint64_t tmp_sqn = 0; - uint8_t *p = (uint8_t *)&tmp_sqn; + // The following SQN incrementation function is implemented according to 3GPP TS 33.102 version 11.5.1 Annex C + + uint64_t seq; + uint64_t ind; + uint64_t sqn64; + + sqn64 =0; + + for(int i=0; i<6; i++) + { + sqn64 |= (uint64_t)sqn[i] << (5-i)*8; + } + + seq = sqn64 >> LTE_FDD_ENB_IND_HE_N_BITS; + ind = sqn64 & LTE_FDD_ENB_IND_HE_MASK; + + uint64_t nextseq; + uint64_t nextind; + uint64_t nextsqn; + + nextseq = (seq + 1) % LTE_FDD_ENB_SEQ_HE_MAX_VALUE; + nextind = (ind + 1) % LTE_FDD_ENB_IND_HE_MAX_VALUE; + nextsqn = (nextseq << LTE_FDD_ENB_IND_HE_N_BITS) | nextind; + + for(int i=0; i<6; i++) + { + next_sqn[i] = (nextsqn >> (5-i)*8) & 0xFF; + } + + return; + +} + + +void +hss::increment_seq_after_resync(uint64_t imsi) +{ + +// This function only increment the SEQ part of the SQN for resynchronization purpose - for(int i = 0; i < 6; i++) { - p[5-i] = sqn[i]; + hss_ue_ctx_t *ue_ctx = NULL; + bool ret = get_ue_ctx(imsi, &ue_ctx); + if(ret == false) + { + return; } - tmp_sqn++; - for(int i = 0; i < 6; i++){ - next_sqn[i] = p[5-i]; + uint8_t *sqn = ue_ctx->sqn; + + uint64_t seq; + uint64_t ind; + uint64_t sqn64; + + sqn64 =0; + + for(int i=0; i<6; i++) + { + sqn64 |= (uint64_t)sqn[i] << (5-i)*8; } + + seq = sqn64 >> LTE_FDD_ENB_IND_HE_N_BITS; + ind = sqn64 & LTE_FDD_ENB_IND_HE_MASK; + + uint64_t nextseq; + uint64_t nextsqn; + + nextseq = (seq + 1) % LTE_FDD_ENB_SEQ_HE_MAX_VALUE; + nextsqn = (nextseq << LTE_FDD_ENB_IND_HE_N_BITS) | ind; + + for(int i=0; i<6; i++) + { + sqn[i] = (nextsqn >> (5-i)*8) & 0xFF; + } + return; + } + + void hss::set_sqn(uint64_t imsi, uint8_t *sqn) { diff --git a/srsepc/src/mme/s1ap_ctx_mngmt_proc.cc b/srsepc/src/mme/s1ap_ctx_mngmt_proc.cc index ad06285f6..1cea56cae 100644 --- a/srsepc/src/mme/s1ap_ctx_mngmt_proc.cc +++ b/srsepc/src/mme/s1ap_ctx_mngmt_proc.cc @@ -24,11 +24,11 @@ * */ -//#include "srslte/upper/s1ap_common.h" #include "srslte/common/bcd_helpers.h" #include "srsepc/hdr/mme/s1ap.h" #include "srsepc/hdr/mme/s1ap_ctx_mngmt_proc.h" #include "srslte/common/liblte_security.h" +#include "srslte/common/int_helpers.h" namespace srsepc{ @@ -127,8 +127,8 @@ s1ap_ctx_mngmt_proc::send_initial_context_setup_request(nas *nas_ctx, uint16_t e uint8_t *tmp_ptr = erab_ctx_req->transportLayerAddress.buffer; liblte_value_2_bits(sgw_s1u_ip, &tmp_ptr, 32); - uint32_t sgw_s1u_teid = esm_ctx->sgw_s1u_fteid.teid; - memcpy(erab_ctx_req->gTP_TEID.buffer, &sgw_s1u_teid, sizeof(uint32_t)); + uint32_t sgw_s1u_teid = erab_ctx->sgw_s1u_fteid.teid; + srslte::uint32_to_uint8(sgw_s1u_teid,erab_ctx_req->gTP_TEID.buffer); //Set UE security capabilities and k_enb bzero(in_ctxt_req->UESecurityCapabilities.encryptionAlgorithms.buffer,sizeof(uint8_t)*16); diff --git a/srsue/hdr/mac/mac_metrics.h b/srsue/hdr/mac/mac_metrics.h index a201d2a7f..56d20dcdc 100644 --- a/srsue/hdr/mac/mac_metrics.h +++ b/srsue/hdr/mac/mac_metrics.h @@ -39,6 +39,8 @@ struct mac_metrics_t int rx_errors; int rx_brate; int ul_buffer; + float dl_retx_avg; + float ul_retx_avg; }; } // namespace srsue diff --git a/srsue/hdr/phy/phch_common.h b/srsue/hdr/phy/phch_common.h index 90379c854..5db0dd2cb 100644 --- a/srsue/hdr/phy/phch_common.h +++ b/srsue/hdr/phy/phch_common.h @@ -33,6 +33,7 @@ #include #include #include +#include #include "srslte/srslte.h" #include "srslte/interfaces/ue_interfaces.h" #include "srslte/radio/radio.h" @@ -116,7 +117,8 @@ typedef struct { uint8_t last_ri; uint8_t last_pmi; - phch_common(uint32_t max_mutex = 3); + phch_common(uint32_t max_workers); + ~phch_common(); void init(phy_interface_rrc::phy_cfg_t *config, phy_args_t *args, srslte::log *_log, @@ -144,8 +146,7 @@ typedef struct { void worker_end(uint32_t tti, bool tx_enable, cf_t *buffer, uint32_t nof_samples, srslte_timestamp_t tx_time); - void set_nof_mutex(uint32_t nof_mutex); - + void set_nof_workers(uint32_t nof_workers); bool sr_enabled; int sr_last_tx_tti; @@ -179,7 +180,9 @@ typedef struct { - std::vector tx_mutex; + std::vector tx_sem; + uint32_t nof_workers; + uint32_t max_workers; bool is_first_of_burst; srslte::radio *radio_h; @@ -208,10 +211,6 @@ typedef struct { bool is_first_tx; - uint32_t nof_workers; - uint32_t nof_mutex; - uint32_t max_mutex; - srslte_cell_t cell; dl_metrics_t dl_metrics; diff --git a/srsue/hdr/phy/phch_recv.h b/srsue/hdr/phy/phch_recv.h index f58bc64a1..6e9e62a94 100644 --- a/srsue/hdr/phy/phch_recv.h +++ b/srsue/hdr/phy/phch_recv.h @@ -83,7 +83,6 @@ public: void force_freq(float dl_freq, float ul_freq); // Other functions - const static int MUTEX_X_WORKER = 4; double set_rx_gain(double gain); int radio_recv_fnc(cf_t *data[SRSLTE_MAX_PORTS], uint32_t nsamples, srslte_timestamp_t *rx_time); int scell_recv_fnc(cf_t *data[SRSLTE_MAX_PORTS], uint32_t nsamples, srslte_timestamp_t *rx_time); @@ -438,9 +437,9 @@ private: float time_adv_sec, next_time_adv_sec; uint32_t tti; bool do_agc; - - uint32_t nof_tx_mutex; - uint32_t tx_mutex_cnt; + + uint32_t tx_worker_cnt; + uint32_t nof_workers; float ul_dl_factor; int current_earfcn; diff --git a/srsue/hdr/phy/phch_worker.h b/srsue/hdr/phy/phch_worker.h index 244657975..085c9d1ce 100644 --- a/srsue/hdr/phy/phch_worker.h +++ b/srsue/hdr/phy/phch_worker.h @@ -52,7 +52,7 @@ public: /* Functions used by main PHY thread */ cf_t* get_buffer(uint32_t antenna_idx); - void set_tti(uint32_t tti, uint32_t tx_tti); + void set_tti(uint32_t tti, uint32_t tx_worker_cnt); void set_tx_time(srslte_timestamp_t tx_time, uint32_t next_offset); void set_prach(cf_t *prach_ptr, float prach_power); void set_cfo(float cfo); diff --git a/srsue/hdr/upper/nas.h b/srsue/hdr/upper/nas.h index 70ccbea35..e83edfbf8 100644 --- a/srsue/hdr/upper/nas.h +++ b/srsue/hdr/upper/nas.h @@ -130,6 +130,7 @@ private: bool have_guti; bool have_ctxt; nas_sec_ctxt ctxt; + bool auth_request; uint32_t ip_addr; uint8_t eps_bearer_id; @@ -180,7 +181,7 @@ private: void gen_service_request(byte_buffer_t *msg); // Senders - void send_identity_response(); + void send_identity_response(uint32_t lcid, uint8 id_type); void send_service_request(); void send_esm_information_response(const uint8 proc_transaction_id); void send_authentication_response(const uint8_t* res, const size_t res_len, const uint8_t sec_hdr_type); diff --git a/srsue/src/mac/mac.cc b/srsue/src/mac/mac.cc index ffa0ca1a3..964e394e9 100644 --- a/srsue/src/mac/mac.cc +++ b/srsue/src/mac/mac.cc @@ -83,6 +83,7 @@ bool mac::init(phy_interface_mac *phy, rlc_interface_mac *rlc, rrc_interface_mac void mac::stop() { srslte_softbuffer_rx_free(&pch_softbuffer); + srslte_softbuffer_rx_free(&mch_softbuffer); pdu_process_thread.stop(); stop_thread(); @@ -476,6 +477,8 @@ void mac::get_metrics(mac_metrics_t &m) ul_harq.get_average_retx()); metrics.ul_buffer = (int) bsr_procedure.get_buffer_state(); + metrics.dl_retx_avg = dl_harq.get_average_retx(); + metrics.ul_retx_avg = ul_harq.get_average_retx(); m = metrics; bzero(&metrics, sizeof(mac_metrics_t)); } diff --git a/srsue/src/mac/proc_ra.cc b/srsue/src/mac/proc_ra.cc index 5b5b56ee7..11e53195c 100644 --- a/srsue/src/mac/proc_ra.cc +++ b/srsue/src/mac/proc_ra.cc @@ -237,7 +237,8 @@ void ra_proc::step_resource_selection() { } if (sel_group == RA_GROUP_A) { if (nof_groupA_preambles) { - sel_preamble = preambleTransmissionCounter%nof_groupA_preambles; + // randomly choose preamble from [0 nof_groupA_preambles) + sel_preamble = rand() % nof_groupA_preambles; } else { rError("Selected group preamble A but nof_groupA_preambles=0\n"); state = RA_PROBLEM; @@ -245,7 +246,8 @@ void ra_proc::step_resource_selection() { } } else { if (nof_groupB_preambles) { - sel_preamble = nof_groupA_preambles + rand()%nof_groupB_preambles; + // randomly choose preamble from [nof_groupA_preambles nof_groupB_preambles) + sel_preamble = nof_groupA_preambles + rand() % nof_groupB_preambles; } else { rError("Selected group preamble B but nof_groupA_preambles=0\n"); state = RA_PROBLEM; @@ -254,7 +256,7 @@ void ra_proc::step_resource_selection() { } sel_maskIndex = 0; } - + rDebug("Selected preambleIndex=%d maskIndex=%d GroupA=%d, GroupB=%d\n", sel_preamble, sel_maskIndex,nof_groupA_preambles, nof_groupB_preambles); state = PREAMBLE_TRANSMISSION; diff --git a/srsue/src/main.cc b/srsue/src/main.cc index cc1638169..8204808b9 100644 --- a/srsue/src/main.cc +++ b/srsue/src/main.cc @@ -214,7 +214,7 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { "Sets the noise estimation algorithm. (Default refs)") ("expert.pdsch_max_its", - bpo::value(&args->expert.phy.pdsch_max_its)->default_value(4), + bpo::value(&args->expert.phy.pdsch_max_its)->default_value(8), "Maximum number of turbo decoder iterations") ("expert.attach_enable_64qam", @@ -307,7 +307,11 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { bpo::value(&args->expert.phy.pdsch_csi_enabled)->default_value(true), "Stores the Channel State Information and uses it for weightening the softbits. It is only used in TM1.") - ("rf_calibration.tx_corr_dc_gain", bpo::value(&args->rf_cal.tx_corr_dc_gain)->default_value(0.0), + ("expert.pdsch_8bit_decoder", + bpo::value(&args->expert.phy.pdsch_8bit_decoder)->default_value(false), + "Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental)") + + ("rf_calibration.tx_corr_dc_gain", bpo::value(&args->rf_cal.tx_corr_dc_gain)->default_value(0.0), "TX DC offset gain correction") ("rf_calibration.tx_corr_dc_phase", bpo::value(&args->rf_cal.tx_corr_dc_phase)->default_value(0.0), "TX DC offset phase correction") diff --git a/srsue/src/phy/phch_common.cc b/srsue/src/phy/phch_common.cc index d09338c4c..0cc3c6e93 100644 --- a/srsue/src/phy/phch_common.cc +++ b/srsue/src/phy/phch_common.cc @@ -39,15 +39,14 @@ namespace srsue { cf_t zeros[50000]; -phch_common::phch_common(uint32_t max_mutex_) : tx_mutex(max_mutex_) +phch_common::phch_common(uint32_t max_workers) : tx_sem(max_workers) { config = NULL; args = NULL; log_h = NULL; radio_h = NULL; - mac = NULL; - max_mutex = max_mutex_; - nof_mutex = 0; + mac = NULL; + this->max_workers = max_workers; rx_gain_offset = 0; last_ri = 0; last_pmi = 0; @@ -65,17 +64,28 @@ phch_common::phch_common(uint32_t max_mutex_) : tx_mutex(max_mutex_) bzero(zeros, 50000*sizeof(cf_t)); - // FIXME: This is an ugly fix to avoid the TX filters to empty - /* - for (int i=0;i<50000;i++) { - zeros[i] = 0.01*cexpf(((float) i/50000)*0.1*_Complex_I); - }*/ + for (uint32_t i=0;inof_workers = nof_workers; +} void phch_common::init(phy_interface_rrc::phy_cfg_t *_config, phy_args_t *_args, srslte::log *_log, srslte::radio *_radio, rrc_interface_phy *_rrc, mac_interface_phy *_mac) { @@ -87,15 +97,6 @@ void phch_common::init(phy_interface_rrc::phy_cfg_t *_config, phy_args_t *_args, args = _args; is_first_tx = true; sr_last_tx_tti = -1; - - for (uint32_t i=0;iset_tti(tti); + // Wait for the green light to transmit in the current TTI + sem_wait(&tx_sem[tti%nof_workers]); + + radio_h->set_tti(tti); if (tx_enable) { radio_h->tx_single(buffer, nof_samples, tx_time); is_first_of_burst = false; @@ -263,8 +271,9 @@ void phch_common::worker_end(uint32_t tti, bool tx_enable, } } } - // Trigger next transmission - pthread_mutex_unlock(&tx_mutex[(tti+1)%nof_mutex]); + + // Allow next TTI to transmit + sem_post(&tx_sem[(tti+1)%nof_workers]); } diff --git a/srsue/src/phy/phch_recv.cc b/srsue/src/phy/phch_recv.cc index c7b63cd4f..3cac3509c 100644 --- a/srsue/src/phy/phch_recv.cc +++ b/srsue/src/phy/phch_recv.cc @@ -53,6 +53,7 @@ phch_recv::phch_recv() { ul_freq = -1; bzero(&cell, sizeof(srslte_cell_t)); bzero(&metrics, sizeof(sync_metrics_t)); + cellsearch_earfcn_index = 0; running = false; worker_com = NULL; } @@ -81,8 +82,8 @@ void phch_recv::init(srslte::radio_multi *_radio_handler, mac_interface_phy *_ma return; } - nof_tx_mutex = MUTEX_X_WORKER * workers_pool->get_nof_workers(); - worker_com->set_nof_mutex(nof_tx_mutex); + nof_workers = workers_pool->get_nof_workers(); + worker_com->set_nof_workers(nof_workers); // Initialize cell searcher search_p.init(sf_buffer, log_h, nof_rx_antennas, this); @@ -128,7 +129,7 @@ void phch_recv::reset() radio_overflow_return = false; in_sync_cnt = 0; out_of_sync_cnt = 0; - tx_mutex_cnt = 0; + tx_worker_cnt = 0; time_adv_sec = 0; next_offset = 0; srate_mode = SRATE_NONE; @@ -454,13 +455,13 @@ void phch_recv::run_thread() worker->set_prach(prach_ptr?&prach_ptr[prach_sf_cnt*SRSLTE_SF_LEN_PRB(cell.nof_prb)]:NULL, prach_power); worker->set_cfo(get_tx_cfo()); - worker->set_tti(tti, tx_mutex_cnt); + worker->set_tti(tti, tx_worker_cnt); worker->set_tx_time(tx_time, next_offset); next_offset = 0; if (next_time_adv_sec != time_adv_sec) { time_adv_sec = next_time_adv_sec; } - tx_mutex_cnt = (tx_mutex_cnt+1) % nof_tx_mutex; + tx_worker_cnt = (tx_worker_cnt+1) % nof_workers; // Advance/reset prach subframe pointer if (prach_ptr) { diff --git a/srsue/src/phy/phch_worker.cc b/srsue/src/phy/phch_worker.cc index 7c1f8329a..ee2dc132b 100644 --- a/srsue/src/phy/phch_worker.cc +++ b/srsue/src/phy/phch_worker.cc @@ -61,6 +61,7 @@ phch_worker::phch_worker() : tr_exec(10240) chest_loop = NULL; bzero(signal_buffer, sizeof(cf_t*)*SRSLTE_MAX_PORTS); + ZERO_OBJECT(cell); mem_initiated = false; cell_initiated = false; @@ -139,6 +140,10 @@ bool phch_worker::init(uint32_t max_prb, srslte::log *log_h, srslte::log *log_ph return false; } + if (phy->args->pdsch_8bit_decoder) { + ue_dl.pdsch.llr_is_8bit = true; + ue_dl.pdsch.dl_sch.llr_is_8bit = true; + } srslte_chest_dl_set_rsrp_neighbour(&ue_dl.chest, true); srslte_chest_dl_average_subframe(&ue_dl.chest, phy->args->average_subframe_enabled); @@ -189,10 +194,10 @@ cf_t* phch_worker::get_buffer(uint32_t antenna_idx) return signal_buffer[antenna_idx]; } -void phch_worker::set_tti(uint32_t tti_, uint32_t tx_tti_) +void phch_worker::set_tti(uint32_t tti_, uint32_t tx_worker_cnt) { - tti = tti_; - tx_tti = tx_tti_; + tti = tti_; + tx_tti = tx_worker_cnt; log_h->step(tti); if (log_phy_lib_h) { log_phy_lib_h->step(tti); diff --git a/srsue/src/phy/phy.cc b/srsue/src/phy/phy.cc index 47354156d..50bec5f09 100644 --- a/srsue/src/phy/phy.cc +++ b/srsue/src/phy/phy.cc @@ -52,7 +52,7 @@ namespace srsue { phy::phy() : workers_pool(MAX_WORKERS), workers(MAX_WORKERS), - workers_common(phch_recv::MUTEX_X_WORKER*MAX_WORKERS),nof_coworkers(0) + workers_common(MAX_WORKERS),nof_coworkers(0) { } diff --git a/srsue/src/phy/prach.cc b/srsue/src/phy/prach.cc index 05aa56a7f..390ff8f9d 100644 --- a/srsue/src/phy/prach.cc +++ b/srsue/src/phy/prach.cc @@ -191,8 +191,8 @@ cf_t *prach::generate(float cfo, uint32_t *nof_sf, float *target_power) { *target_power = target_power_dbm; } - Info("PRACH: Transmitted preamble=%d, CFO=%.2f KHz, nof_sf=%d, target_power=%.1f dBm\n", - preamble_idx, cfo*15, nsf, target_power_dbm); + Info("PRACH: Transmitted preamble=%d, tti=%d, CFO=%.2f KHz, nof_sf=%d, target_power=%.1f dBm\n", + preamble_idx, transmitted_tti, cfo*15, nsf, target_power_dbm); preamble_idx = -1; return signal_buffer; diff --git a/srsue/src/ue.cc b/srsue/src/ue.cc index 56e8b6c44..daccfca8b 100644 --- a/srsue/src/ue.cc +++ b/srsue/src/ue.cc @@ -181,8 +181,10 @@ bool ue::init(all_args_t *args_) { // Set RF options if (args->rf.time_adv_nsamples.compare("auto")) { - radio.set_tx_adv(atoi(args->rf.time_adv_nsamples.c_str())); - } + int t = atoi(args->rf.time_adv_nsamples.c_str()); + radio.set_tx_adv(abs(t)); + radio.set_tx_adv_neg(t<0); + } if (args->rf.burst_preamble.compare("auto")) { radio.set_burst_preamble(atof(args->rf.burst_preamble.c_str())); } diff --git a/srsue/src/upper/nas.cc b/srsue/src/upper/nas.cc index 0109be17f..197dd4367 100644 --- a/srsue/src/upper/nas.cc +++ b/srsue/src/upper/nas.cc @@ -46,7 +46,7 @@ namespace srsue { ********************************************************************/ nas::nas() - : state(EMM_STATE_DEREGISTERED), have_guti(false), have_ctxt(false), ip_addr(0), eps_bearer_id(0) + : state(EMM_STATE_DEREGISTERED), have_guti(false), have_ctxt(false), auth_request(false), ip_addr(0), eps_bearer_id(0) { ctxt.rx_count = 0; ctxt.tx_count = 0; @@ -322,9 +322,14 @@ void nas::write_pdu(uint32_t lcid, byte_buffer_t *pdu) { case LIBLTE_MME_SECURITY_HDR_TYPE_INTEGRITY: break; case LIBLTE_MME_SECURITY_HDR_TYPE_INTEGRITY_AND_CIPHERED: - mac_valid = integrity_check(pdu); - cipher_decrypt(pdu); - break; + if((mac_valid = integrity_check(pdu))) { + cipher_decrypt(pdu); + break; + } else { + nas_log->error("Not handling NAS message with integrity check error\n"); + pool->deallocate(pdu); + return; + } case LIBLTE_MME_SECURITY_HDR_TYPE_INTEGRITY_AND_CIPHERED_WITH_NEW_EPS_SECURITY_CONTEXT: break; default: @@ -781,6 +786,7 @@ void nas::parse_authentication_request(uint32_t lcid, byte_buffer_t *pdu, const nas_log->info("Network authentication successful\n"); send_authentication_response(res, res_len, sec_hdr_type); nas_log->info_hex(ctxt.k_asme, 32, "Generated k_asme:\n"); + auth_request = true; } else if (auth_result == AUTH_SYNCH_FAILURE) { nas_log->error("Network authentication synchronization failure.\n"); send_authentication_failure(LIBLTE_MME_EMM_CAUSE_SYNCH_FAILURE, res); @@ -805,36 +811,19 @@ void nas::parse_identity_request(uint32_t lcid, byte_buffer_t *pdu) { ZERO_OBJECT(id_resp); liblte_mme_unpack_identity_request_msg((LIBLTE_BYTE_MSG_STRUCT *) pdu, &id_req); - nas_log->info("Received Identity Request. ID type: %d\n", id_req.id_type); - switch(id_req.id_type) { - case LIBLTE_MME_MOBILE_ID_TYPE_IMSI: - id_resp.mobile_id.type_of_id = LIBLTE_MME_MOBILE_ID_TYPE_IMSI; - usim->get_imsi_vec(id_resp.mobile_id.imsi, 15); - break; - case LIBLTE_MME_MOBILE_ID_TYPE_IMEI: - id_resp.mobile_id.type_of_id = LIBLTE_MME_MOBILE_ID_TYPE_IMEI; - usim->get_imei_vec(id_resp.mobile_id.imei, 15); - break; - default: - nas_log->error("Unhandled ID type: %d\n", id_req.id_type); - pool->deallocate(pdu); - return; - } + // Deallocate PDU after parsing + pool->deallocate(pdu); - pdu->reset(); - liblte_mme_pack_identity_response_msg(&id_resp, (LIBLTE_BYTE_MSG_STRUCT *) pdu); + ctxt.rx_count++; - if(pcap != NULL) { - pcap->write_nas(pdu->msg, pdu->N_bytes); - } + nas_log->info("Received Identity Request. ID type: %d\n", id_req.id_type); - rrc->write_sdu(lcid, pdu); + send_identity_response(lcid, id_req.id_type); } void nas::parse_security_mode_command(uint32_t lcid, byte_buffer_t *pdu) { - if (!pdu) { nas_log->error("Invalid PDU\n"); return; @@ -883,9 +872,12 @@ void nas::parse_security_mode_command(uint32_t lcid, byte_buffer_t *pdu) return; } - // Reset counters (as per 24.301 5.4.3.2) - ctxt.rx_count = 0; - ctxt.tx_count = 0; + // Reset counters (as per 24.301 5.4.3.2), only needed for initial security mode command + if (auth_request) { + ctxt.rx_count = 0; + ctxt.tx_count = 0; + auth_request = false; + } ctxt.cipher_algo = (CIPHERING_ALGORITHM_ID_ENUM) sec_mode_cmd.selected_nas_sec_algs.type_of_eea; ctxt.integ_algo = (INTEGRITY_ALGORITHM_ID_ENUM) sec_mode_cmd.selected_nas_sec_algs.type_of_eia; @@ -1176,7 +1168,8 @@ void nas::send_detach_request(bool switch_off) return; } - LIBLTE_MME_DETACH_REQUEST_MSG_STRUCT detach_request = {}; + LIBLTE_MME_DETACH_REQUEST_MSG_STRUCT detach_request; + bzero(&detach_request, sizeof(detach_request)); if (switch_off) { detach_request.detach_type.switch_off = 1; detach_request.detach_type.type_of_detach = LIBLTE_MME_SO_FLAG_SWITCH_OFF; @@ -1328,7 +1321,40 @@ void nas::send_authentication_failure(const uint8_t cause, const uint8_t* auth_f } -void nas::send_identity_response() {} +void nas::send_identity_response(uint32_t lcid, uint8 id_type) +{ + LIBLTE_MME_ID_RESPONSE_MSG_STRUCT id_resp; + ZERO_OBJECT(id_resp); + + switch(id_type) { + case LIBLTE_MME_MOBILE_ID_TYPE_IMSI: + id_resp.mobile_id.type_of_id = LIBLTE_MME_MOBILE_ID_TYPE_IMSI; + usim->get_imsi_vec(id_resp.mobile_id.imsi, 15); + break; + case LIBLTE_MME_MOBILE_ID_TYPE_IMEI: + id_resp.mobile_id.type_of_id = LIBLTE_MME_MOBILE_ID_TYPE_IMEI; + usim->get_imei_vec(id_resp.mobile_id.imei, 15); + break; + default: + nas_log->error("Unhandled ID type: %d\n", id_type); + return; + } + + byte_buffer_t *pdu = pool_allocate_blocking; + if (!pdu) { + nas_log->error("Fatal Error: Couldn't allocate PDU in send_identity_response().\n"); + return; + } + + liblte_mme_pack_identity_response_msg(&id_resp, (LIBLTE_BYTE_MSG_STRUCT *) pdu); + + if(pcap != NULL) { + pcap->write_nas(pdu->msg, pdu->N_bytes); + } + + rrc->write_sdu(lcid, pdu); + ctxt.tx_count++; +} void nas::send_service_request() { byte_buffer_t *msg = pool_allocate_blocking; diff --git a/srsue/src/upper/pcsc_usim.cc b/srsue/src/upper/pcsc_usim.cc index 0d423c73c..0375a9fe7 100644 --- a/srsue/src/upper/pcsc_usim.cc +++ b/srsue/src/upper/pcsc_usim.cc @@ -64,7 +64,7 @@ int pcsc_usim::init(usim_args_t *args, srslte::log *log_) log->error("Error reading IMSI from SIM.\n"); return ret; } - imsi_str = tmp; + imsi_str.assign(tmp, tmp_len); // Check extracted IMSI and convert if(15 == imsi_str.length()) { diff --git a/srsue/src/upper/rrc.cc b/srsue/src/upper/rrc.cc index e4ffe6a82..080de4b27 100644 --- a/srsue/src/upper/rrc.cc +++ b/srsue/src/upper/rrc.cc @@ -729,10 +729,10 @@ uint32_t rrc::sib_start_tti(uint32_t tti, uint32_t period, uint32_t offset, uint */ bool rrc::si_acquire(uint32_t sib_index) { - uint32_t tti; + uint32_t tti = 0; uint32_t si_win_start=0, si_win_len=0; - uint16_t period; - uint32_t sched_index; + uint16_t period = 0; + uint32_t sched_index = 0; uint32_t x, sf, offset; uint32_t last_win_start = 0; @@ -1942,12 +1942,31 @@ void rrc::write_sdu(uint32_t lcid, byte_buffer_t *sdu) { void rrc::write_pdu(uint32_t lcid, byte_buffer_t *pdu) { rrc_log->info_hex(pdu->msg, pdu->N_bytes, "RX %s PDU", get_rb_name(lcid).c_str()); + // If the message contains a ConnectionSetup, acknowledge the transmission to avoid blocking of paging procedure + if (lcid == 0) { + // FIXME: We unpack and process this message twice to check if it's ConnectionSetup + srslte_bit_unpack_vector(pdu->msg, bit_buf.msg, pdu->N_bytes * 8); + bit_buf.N_bits = pdu->N_bytes * 8; + bzero(&dl_ccch_msg, sizeof(LIBLTE_RRC_DL_CCCH_MSG_STRUCT)); + liblte_rrc_unpack_dl_ccch_msg((LIBLTE_BIT_MSG_STRUCT *) &bit_buf, &dl_ccch_msg); + if (dl_ccch_msg.msg_type == LIBLTE_RRC_DL_CCCH_MSG_TYPE_RRC_CON_SETUP) { + // Must enter CONNECT before stopping T300 + state = RRC_STATE_CONNECTED; + + mac_timers->timer_get(t300)->stop(); + mac_timers->timer_get(t302)->stop(); + rrc_log->console("RRC Connected\n"); + + } + } + // add PDU to command queue cmd_msg_t msg; msg.pdu = pdu; msg.command = cmd_msg_t::PDU; msg.lcid = lcid; cmd_q.push(msg); + } void rrc::process_pdu(uint32_t lcid, byte_buffer_t *pdu) @@ -2561,12 +2580,6 @@ void rrc::handle_con_setup(LIBLTE_RRC_CONNECTION_SETUP_STRUCT *setup) { // Apply the Radio Resource configuration apply_rr_config_dedicated(&setup->rr_cnfg); - // Must enter CONNECT before stopping T300 - state = RRC_STATE_CONNECTED; - - rrc_log->console("RRC Connected\n"); - mac_timers->timer_get(t300)->stop(); - mac_timers->timer_get(t302)->stop(); nas->set_barring(nas_interface_rrc::BARRING_NONE); if (dedicatedInfoNAS) { diff --git a/srsue/ue.conf.example b/srsue/ue.conf.example index 5822ab0c9..4073ad96f 100644 --- a/srsue/ue.conf.example +++ b/srsue/ue.conf.example @@ -204,6 +204,8 @@ enable = false # pdsch_csi_enabled: Stores the Channel State Information and uses it for weightening the softbits. It is only # used in TM1. It is True by default. # +# pdsch_8bit_decoder: Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental) +# ##################################################################### [expert] #ip_netmask = 255.255.255.0 @@ -215,7 +217,7 @@ enable = false #cqi_fixed = 10 #snr_ema_coeff = 0.1 #snr_estim_alg = refs -#pdsch_max_its = 4 +#pdsch_max_its = 8 # These are half iterations #attach_enable_64qam = false #nof_phy_threads = 2 #equalizer_mode = mmse @@ -234,6 +236,7 @@ enable = false #metrics_period_secs = 1 #metrics_csv_filename = /tmp/ue_metrics.csv #pdsch_csi_enabled = true +#pdsch_8bit_decoder = false # CFO related values #cfo_is_doppler = false