master
yagoda 6 years ago
commit 824dafddc6

@ -75,7 +75,8 @@ option(ENABLE_HARDSIM "Enable support for SIM cards" ON)
option(BUILD_STATIC "Attempt to statically link external deps" OFF)
option(RPATH "Enable RPATH" OFF)
option(ENABLE_ASAN "Enable gcc address sanitizer" OFF)
option(ENABLE_ASAN "Enable gcc/clang address sanitizer" OFF)
option(ENABLE_MSAN "Enable clang memory sanitizer" OFF)
option(USE_LTE_RATES "Use standard LTE sampling rates" OFF)
@ -279,8 +280,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ggdb -DBUILD_TYPE_RELWITHDEBINFO")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -DBUILD_TYPE_RELWITHDEBINFO")
else(${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -DBUILD_TYPE_RELEASE")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DBUILD_TYPE_RELEASE")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -fno-trapping-math -fno-math-errno -DBUILD_TYPE_RELEASE")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fno-trapping-math -fno-math-errno -DBUILD_TYPE_RELEASE")
endif(${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
endif(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
@ -315,7 +316,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
endif(HAVE_SSE)
endif(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=native -DIS_ARM -DHAVE_NEON")
message(STATUS "have ARM")
@ -332,10 +332,21 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
if(NOT WIN32)
ADD_CXX_COMPILER_FLAG_IF_AVAILABLE(-fvisibility=hidden HAVE_VISIBILITY_HIDDEN)
endif(NOT WIN32)
if (ENABLE_ASAN AND ENABLE_MSAN)
message(FATAL_ERROR "ASAN and MSAN cannot be enabled at the same time.")
endif (ENABLE_ASAN AND ENABLE_MSAN)
if (ENABLE_ASAN)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
endif (ENABLE_ASAN)
if (ENABLE_MSAN AND CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=memory -fno-omit-frame-pointer -fPIE -pie")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=memory -fno-omit-frame-pointer -fPIE -pie")
endif (ENABLE_MSAN AND CMAKE_C_COMPILER_ID MATCHES "Clang")
endif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
@ -346,6 +357,23 @@ endif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
message(STATUS "CMAKE_C_FLAGS is ${CMAKE_C_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS is ${CMAKE_CXX_FLAGS}")
########################################################################
# clang-tidy check
########################################################################
find_program(
CLANG_TIDY_BIN
NAMES "clang-tidy"
DOC "Path to clang-tidy executable"
)
if(NOT CLANG_TIDY_BIN)
message(STATUS "clang-tidy not found.")
else()
message(STATUS "clang-tidy found: ${CLANG_TIDY_BIN}")
set(DO_CLANG_TIDY "${CLANG_TIDY_BIN}" "-checks=*,-clang-analyzer-alpha.*,-modernize-*,-cppcoreguidelines-pro-type-vararg,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-cppcoreguidelines-pro-bounds-constant-array-index")
endif()
########################################################################
# Create uninstall targets
########################################################################

@ -14,7 +14,7 @@ if (ENABLE_SSE)
#
# Check compiler for SSE4_1 intrinsics
#
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG )
if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
set(CMAKE_REQUIRED_FLAGS "-msse4.1")
check_c_source_runs("
#include <emmintrin.h>
@ -38,7 +38,7 @@ if (ENABLE_SSE)
#
# Check compiler for AVX intrinsics
#
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG )
if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
set(CMAKE_REQUIRED_FLAGS "-mavx")
check_c_source_runs("
#include <immintrin.h>
@ -72,7 +72,7 @@ if (ENABLE_SSE)
#
# Check compiler for AVX intrinsics
#
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG )
if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
set(CMAKE_REQUIRED_FLAGS "-mavx2")
check_c_source_runs("
#include <immintrin.h>
@ -106,7 +106,7 @@ if (ENABLE_SSE)
#
# Check compiler for AVX intrinsics
#
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG )
if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
set(CMAKE_REQUIRED_FLAGS "-mfma")
check_c_source_runs("
#include <immintrin.h>
@ -141,7 +141,7 @@ if (ENABLE_SSE)
#
# Check compiler for AVX intrinsics
#
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG )
if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
set(CMAKE_REQUIRED_FLAGS "-mavx512f")
check_c_source_runs("
#include <immintrin.h>

@ -150,9 +150,9 @@ inline void s1ap_plmn_to_mccmnc(uint32_t plmn, uint16_t *mcc, uint16_t *mnc)
*mnc |= nibbles[4]; // MNC digit 3
} else {
// 3-digit MNC
*mnc |= nibbles[5] << 8; // MNC digit 1
*mnc |= nibbles[4] << 4; // MNC digit 2
*mnc |= nibbles[2] ; // MNC digit 3
*mnc |= nibbles[2] << 8; // MNC digit 1
*mnc |= nibbles[5] << 4; // MNC digit 2
*mnc |= nibbles[4] ; // MNC digit 3
}
}
@ -177,9 +177,9 @@ inline void s1ap_mccmnc_to_plmn(uint16_t mcc, uint16_t mnc, uint32_t *plmn)
nibbles[4] = (mnc & 0x000F); // MNC digit 3
} else {
// 3-digit MNC
nibbles[5] = (mnc & 0x0F00) >> 8; // MNC digit 1
nibbles[4] = (mnc & 0x00F0) >> 4; // MNC digit 2
nibbles[2] = (mnc & 0x000F); // MNC digit 3
nibbles[2] = (mnc & 0x0F00) >> 8; // MNC digit 1
nibbles[5] = (mnc & 0x00F0) >> 4; // MNC digit 2
nibbles[4] = (mnc & 0x000F); // MNC digit 3
}
*plmn = 0x000000;

@ -122,7 +122,7 @@ public:
}
void clear() { // remove all items
myobj item;
myobj *item = NULL;
while (try_pop(item));
}

@ -59,10 +59,10 @@
#define ASYNC_DL_SCHED (HARQ_DELAY_MS <= 4)
// Cat 3 UE - Max number of DL-SCH transport block bits received within a TTI
// Cat 4 UE - Max number of DL-SCH transport block bits received within a TTI
// 3GPP 36.306 Table 4.1.1
#define SRSLTE_MAX_BUFFER_SIZE_BITS 102048
#define SRSLTE_MAX_BUFFER_SIZE_BYTES 12756
#define SRSLTE_MAX_BUFFER_SIZE_BITS 150752
#define SRSLTE_MAX_BUFFER_SIZE_BYTES (SRSLTE_MAX_BUFFER_SIZE_BITS/8)
#define SRSLTE_BUFFER_HEADER_OFFSET 1020
#define SRSLTE_BUFFER_POOL_LOG_ENABLED
@ -122,7 +122,9 @@ public:
byte_buffer_t():N_bytes(0)
{
bzero(buffer, SRSLTE_MAX_BUFFER_SIZE_BYTES);
#ifdef ENABLE_TIMESTAMP
timestamp_is_set = false;
#endif
msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET];
next = NULL;
#ifdef SRSLTE_BUFFER_POOL_LOG_ENABLED
@ -132,6 +134,9 @@ public:
byte_buffer_t(const byte_buffer_t& buf)
{
bzero(buffer, SRSLTE_MAX_BUFFER_SIZE_BYTES);
msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET];
next = NULL;
// copy actual contents
N_bytes = buf.N_bytes;
memcpy(msg, buf.msg, N_bytes);
}
@ -141,6 +146,8 @@ public:
if (&buf == this)
return *this;
bzero(buffer, SRSLTE_MAX_BUFFER_SIZE_BYTES);
msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET];
next = NULL;
N_bytes = buf.N_bytes;
memcpy(msg, buf.msg, N_bytes);
return *this;
@ -149,7 +156,9 @@ public:
{
msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET];
N_bytes = 0;
#ifdef ENABLE_TIMESTAMP
timestamp_is_set = false;
#endif
}
uint32_t get_headroom()
{
@ -183,8 +192,10 @@ public:
private:
#ifdef ENABLE_TIMESTAMP
struct timeval timestamp[3];
bool timestamp_is_set;
#endif
byte_buffer_t *next;
};
@ -199,6 +210,9 @@ struct bit_buffer_t{
bit_buffer_t():N_bits(0)
{
msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET];
#ifdef ENABLE_TIMESTAMP
timestamp_is_set = false;
#endif
}
bit_buffer_t(const bit_buffer_t& buf){
N_bits = buf.N_bits;
@ -216,7 +230,9 @@ struct bit_buffer_t{
{
msg = &buffer[SRSLTE_BUFFER_HEADER_OFFSET];
N_bits = 0;
#ifdef ENABLE_TIMESTAMP
timestamp_is_set = false;
#endif
}
uint32_t get_headroom()
{
@ -242,9 +258,10 @@ struct bit_buffer_t{
}
private:
#ifdef ENABLE_TIMESTAMP
struct timeval timestamp[3];
bool timestamp_is_set;
#endif
};
} // namespace srslte

@ -0,0 +1,66 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsUE library.
*
* srsUE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsUE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#ifndef SRSLTE_INT_HELPERS_H
#define SRSLTE_INT_HELPERS_H
namespace srslte {
/******************************************************************************
* Safe conversions between byte buffers and integer types.
* Note: these don't perform endian conversion - use e.g. htonl/ntohl if required
*****************************************************************************/
inline void uint8_to_uint32(uint8_t *buf, uint32_t *i)
{
*i = (uint32_t)buf[0] << 24 |
(uint32_t)buf[1] << 16 |
(uint32_t)buf[2] << 8 |
(uint32_t)buf[3];
}
inline void uint32_to_uint8(uint32_t i, uint8_t *buf)
{
buf[0] = (i >> 24) & 0xFF;
buf[1] = (i >> 16) & 0xFF;
buf[2] = (i >> 8) & 0xFF;
buf[3] = i & 0xFF;
}
inline void uint8_to_uint16(uint8_t *buf, uint16_t *i)
{
*i = (uint32_t)buf[0] << 8 |
(uint32_t)buf[1];
}
inline void uint16_to_uint8(uint16_t i, uint8_t *buf)
{
buf[0] = (i >> 8) & 0xFF;
buf[1] = i & 0xFF;
}
}; //namespace
#endif // SRSLTE_INT_HELPERS_H

@ -54,6 +54,7 @@ public:
log_filter();
log_filter(std::string layer);
log_filter(std::string layer, logger *logger_, bool tti=false);
~log_filter();
void init(std::string layer, logger *logger_, bool tti=false);

@ -52,6 +52,14 @@
#endif
// Useful macros for templates
#define CONCAT(a, b) a##b
#define CONCAT2(a, b) CONCAT(a,b)
#define STRING2(x) #x
#define STRING(x) STRING2(x)
// Common error codes
#define SRSLTE_SUCCESS 0
#define SRSLTE_ERROR -1

@ -240,6 +240,7 @@ public:
virtual void reset() = 0;
virtual void write_sdu(uint32_t lcid, srslte::byte_buffer_t *sdu, bool blocking = true) = 0;
virtual void add_bearer(uint32_t lcid, srslte::srslte_pdcp_config_t cnfg = srslte::srslte_pdcp_config_t()) = 0;
virtual void change_lcid(uint32_t old_lcid, uint32_t new_lcid) = 0;
virtual void config_security(uint32_t lcid,
uint8_t *k_enc_,
uint8_t *k_int_,
@ -273,10 +274,12 @@ class rlc_interface_rrc
public:
virtual void reset() = 0;
virtual void reestablish() = 0;
virtual void reestablish(uint32_t lcid) = 0;
virtual void add_bearer(uint32_t lcid) = 0;
virtual void add_bearer(uint32_t lcid, srslte::srslte_rlc_config_t cnfg) = 0;
virtual void add_bearer_mrb(uint32_t lcid) = 0;
virtual void del_bearer(uint32_t lcid) = 0;
virtual void change_lcid(uint32_t old_lcid, uint32_t new_lcid) = 0;
};
// RLC interface for PDCP
@ -545,6 +548,7 @@ typedef struct {
bool sic_pss_enabled;
float rx_gain_offset;
bool pdsch_csi_enabled;
bool pdsch_8bit_decoder;
uint32_t intra_freq_meas_len_ms;
uint32_t intra_freq_meas_period_ms;
} phy_args_t;

@ -44,6 +44,7 @@ typedef struct SRSLTE_API {
uint8_t *c_bytes;
float *c_float;
short *c_short;
int8_t *c_char;
uint32_t cur_len;
uint32_t max_len;
} srslte_sequence_t;

@ -36,6 +36,7 @@
#define SRSLTE_RM_TURBO_H
#include "srslte/config.h"
#include "srslte/phy/fec/turbodecoder.h"
#ifndef SRSLTE_RX_NULL
#define SRSLTE_RX_NULL 10000
@ -47,7 +48,6 @@
#include "srslte/config.h"
SRSLTE_API int srslte_rm_turbo_tx(uint8_t *w_buff,
uint32_t buff_len,
uint8_t *input,
@ -84,5 +84,17 @@ SRSLTE_API int srslte_rm_turbo_rx_lut(int16_t *input,
uint32_t cb_idx,
uint32_t rv_idx);
SRSLTE_API int srslte_rm_turbo_rx_lut_(int16_t *input,
int16_t *output,
uint32_t in_len,
uint32_t cb_idx,
uint32_t rv_idx,
bool enable_input_tdec);
SRSLTE_API int srslte_rm_turbo_rx_lut_8bit(int8_t *input,
int8_t *output,
uint32_t in_len,
uint32_t cb_idx,
uint32_t rv_idx);
#endif // SRSLTE_RM_TURBO_H

@ -47,8 +47,9 @@ typedef struct SRSLTE_API {
SRSLTE_API int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h,
uint32_t long_cb);
SRSLTE_API int srslte_tc_interl_UMTS_gen(srslte_tc_interl_t *h,
uint32_t long_cb);
SRSLTE_API int srslte_tc_interl_LTE_gen_interl(srslte_tc_interl_t *h,
uint32_t long_cb,
uint32_t interl_win);
SRSLTE_API int srslte_tc_interl_init(srslte_tc_interl_t *h,
uint32_t max_long_cb);

@ -70,10 +70,12 @@ SRSLTE_API int srslte_tcod_encode(srslte_tcod_t *h,
uint32_t long_cb);
SRSLTE_API int srslte_tcod_encode_lut(srslte_tcod_t *h,
srslte_crc_t *crc,
srslte_crc_t *crc_tb,
srslte_crc_t *crc_cb,
uint8_t *input,
uint8_t *parity,
uint32_t cblen_idx);
uint32_t cblen_idx,
bool last_cb);
SRSLTE_API void srslte_tcod_gentable();

@ -47,46 +47,81 @@
#define SRSLTE_TCOD_TOTALTAIL 12
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
#include "srslte/phy/fec/turbodecoder_gen.h"
#include "srslte/phy/fec/turbodecoder_simd.h"
// Expect the input to be aligned for sub-block window processing.
#define SRSLTE_TDEC_EXPECT_INPUT_SB 1
// Include interfaces for 8 and 16 bit decoder implementations
#define LLR_IS_8BIT
#include "srslte/phy/fec/turbodecoder_impl.h"
#undef LLR_IS_8BIT
#define LLR_IS_16BIT
#include "srslte/phy/fec/turbodecoder_impl.h"
#undef LLR_IS_16BIT
#define SRSLTE_TDEC_NOF_AUTO_MODES_8 2
#define SRSLTE_TDEC_NOF_AUTO_MODES_16 3
typedef enum {SRSLTE_TDEC_8, SRSLTE_TDEC_16} srslte_tdec_llr_type_t;
typedef struct SRSLTE_API {
float *input_conv;
union {
srslte_tdec_simd_t tdec_simd;
srslte_tdec_gen_t tdec_gen;
};
uint32_t max_long_cb;
void *dec8_hdlr[SRSLTE_TDEC_NOF_AUTO_MODES_8];
void *dec16_hdlr[SRSLTE_TDEC_NOF_AUTO_MODES_16];
srslte_tdec_8bit_impl_t *dec8[SRSLTE_TDEC_NOF_AUTO_MODES_8];
srslte_tdec_16bit_impl_t *dec16[SRSLTE_TDEC_NOF_AUTO_MODES_16];
int nof_blocks8[SRSLTE_TDEC_NOF_AUTO_MODES_8];
int nof_blocks16[SRSLTE_TDEC_NOF_AUTO_MODES_16];
// Declare as void types as can be int8 or int16
void *app1;
void *app2;
void *ext1;
void *ext2;
void *syst0;
void *parity0;
void *parity1;
void *input_conv;
bool force_not_sb;
srslte_tdec_impl_type_t dec_type;
srslte_tdec_llr_type_t current_llr_type;
uint32_t current_dec;
uint32_t current_long_cb;
uint32_t current_inter_idx;
int current_cbidx;
srslte_tc_interl_t interleaver[4][SRSLTE_NOF_TC_CB_SIZES];
int n_iter;
} srslte_tdec_t;
SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h,
uint32_t max_long_cb);
SRSLTE_API int srslte_tdec_init_manual(srslte_tdec_t * h,
uint32_t max_long_cb,
srslte_tdec_impl_type_t dec_type);
SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h);
SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h,
SRSLTE_API void srslte_tdec_force_not_sb(srslte_tdec_t *h);
SRSLTE_API int srslte_tdec_new_cb(srslte_tdec_t * h,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_reset_cb(srslte_tdec_t * h,
uint32_t cb_idx);
SRSLTE_API int srslte_tdec_get_nof_iterations(srslte_tdec_t * h);
SRSLTE_API int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h,
uint32_t cb_idx);
SRSLTE_API uint32_t srslte_tdec_autoimp_get_subblocks(uint32_t long_cb);
SRSLTE_API uint32_t srslte_tdec_get_nof_parallel(srslte_tdec_t * h);
SRSLTE_API uint32_t srslte_tdec_autoimp_get_subblocks_8bit(uint32_t long_cb);
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
int16_t* input,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h,
uint8_t *output,
uint32_t long_cb);
uint8_t *output);
SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
int16_t * input,
@ -94,27 +129,15 @@ SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
uint32_t nof_iterations,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_iteration_par(srslte_tdec_t * h,
int16_t* input[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_par(srslte_tdec_t * h,
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_byte_par(srslte_tdec_t * h,
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_iteration_8bit(srslte_tdec_t * h,
int8_t* input,
uint8_t *output);
SRSLTE_API void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h,
SRSLTE_API int srslte_tdec_run_all_8bit(srslte_tdec_t * h,
int8_t * input,
uint8_t *output,
uint32_t cb_idx,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_run_all_par(srslte_tdec_t * h,
int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations,
uint32_t long_cb);
#endif // SRSLTE_TURBODECODER_H

@ -47,53 +47,16 @@
#define SRSLTE_TCOD_TOTALTAIL 12
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef struct SRSLTE_API {
int max_long_cb;
float *beta;
} srslte_map_gen_vl_t;
typedef struct SRSLTE_API {
int max_long_cb;
srslte_map_gen_vl_t dec;
float *llr1;
float *llr2;
float *w;
float *syst;
float *parity;
int current_cbidx;
uint32_t current_cb_len;
uint32_t n_iter;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
} srslte_tdec_gen_t;
SRSLTE_API int srslte_tdec_gen_init(srslte_tdec_gen_t * h,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_gen_free(srslte_tdec_gen_t * h);
SRSLTE_API int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb);
SRSLTE_API void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h,
float * input,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_gen_decision(srslte_tdec_gen_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h,
float * input,
uint8_t *output,
uint32_t nof_iterations,
uint32_t long_cb);
uint32_t max_long_cb;
int16_t *beta;
} tdec_gen_t;
int tdec_gen_init(void **h, uint32_t max_long_cb);
void tdec_gen_free(void *h);
void tdec_gen_dec(void *h, int16_t * input, int16_t *app, int16_t * parity, int16_t *output, uint32_t long_cb);
void tdec_gen_extract_input(int16_t *input, int16_t *syst, int16_t *parity0, int16_t *parity1, int16_t *app2, uint32_t long_cb);
void tdec_gen_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb);
#endif // SRSLTE_TURBODECODER_GEN_H

@ -0,0 +1,68 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#ifndef SRSLTE_TURBODECODER_IMPL_H
#define SRSLTE_TURBODECODER_IMPL_H
#include "srslte/config.h"
/* Interface for internal decoder implementation */
typedef enum SRSLTE_API {
SRSLTE_TDEC_AUTO = 0,
SRSLTE_TDEC_GENERIC,
SRSLTE_TDEC_SSE,
SRSLTE_TDEC_SSE_WINDOW,
SRSLTE_TDEC_AVX_WINDOW,
SRSLTE_TDEC_SSE8_WINDOW,
SRSLTE_TDEC_AVX8_WINDOW,
SRSLTE_TDEC_NOF_IMP
} srslte_tdec_impl_type_t;
#endif
#ifdef LLR_IS_8BIT
#define llr_t int8_t
#define type_name srslte_tdec_8bit_impl_t
#else
#ifdef LLR_IS_16BIT
#define llr_t int16_t
#define type_name srslte_tdec_16bit_impl_t
#else
#error "Unsupported LLR mode"
#endif
#endif
typedef struct SRSLTE_API {
int (*tdec_init)(void **h, uint32_t max_long_cb);
void (*tdec_free)(void *h);
void (*tdec_dec)(void *h, llr_t * input, llr_t *app, llr_t * parity, llr_t *output, uint32_t long_cb);
void (*tdec_extract_input)(llr_t *input, llr_t *syst, llr_t *parity0, llr_t *parity1, llr_t *app2, uint32_t long_cb);
void (*tdec_decision_byte)(llr_t *app1, uint8_t *output, uint32_t long_cb);
} type_name;
#undef llr_t
#undef type_name

@ -0,0 +1,158 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include "srslte/config.h"
#define MAKE_CALL(a) CONCAT2(a,type_name)
#define MAKE_VEC(a) CONCAT2(a,vec_suffix)
#define PRINT CONCAT2(srslte_vec_fprint,print_suffix)
#ifdef LLR_IS_8BIT
#define llr_t int8_t
#define type_name _8bit
#define vec_suffix _bbb
#define print_suffix _bs
#define decptr h->dec8[h->current_dec]
#define dechdlr h->dec8_hdlr[h->current_dec]
#define input_is_interleaved 1
#else
#ifdef LLR_IS_16BIT
#define llr_t int16_t
#define vec_suffix _sss
#define print_suffix _s
#define decptr h->dec16[h->current_dec]
#define dechdlr h->dec16_hdlr[h->current_dec]
#define input_is_interleaved (h->current_dec > 0)
#define type_name _16bit
#else
#warning "Unsupported LLR mode"
#endif
#endif
#define debug_enabled_iter 0
#define debug_len 20
#define debug_vec(a) if (debug_enabled_iter) {printf("%s it=%d: ", STRING(a), n_iter);PRINT(stdout, a, debug_len);}
static void MAKE_CALL(extract_input_tail_sb)(llr_t *input, llr_t *syst, llr_t *app2, llr_t *parity0, llr_t *parity1, uint32_t long_cb)
{
for (int i = long_cb; i < long_cb + 3; i++) {
syst[i] = input[3*(long_cb+32) + 2*(i - long_cb)];
parity0[i] = input[3*(long_cb+32)+ 2*(i - long_cb) + 1];
app2[i] = input[3*(long_cb+32) + 6 + 2*(i - long_cb)];
parity1[i] = input[3*(long_cb+32) + 6 + 2*(i - long_cb) + 1];
}
}
/* Runs 1 turbo decoder iteration */
void MAKE_CALL(run_tdec_iteration)(srslte_tdec_t * h, llr_t * input)
{
if (h->current_cbidx >= 0) {
uint16_t *inter = h->interleaver[h->current_inter_idx][h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_inter_idx][h->current_cbidx].reverse;
llr_t *syst = (llr_t*) h->syst0;
llr_t *parity0 = (llr_t*) h->parity0;
llr_t *parity1 = (llr_t*) h->parity1;
llr_t *app1 = (llr_t*) h->app1;
llr_t *app2 = (llr_t*) h->app2;
llr_t *ext1 = (llr_t*) h->ext1;
llr_t *ext2 = (llr_t*) h->ext2;
uint32_t long_cb = h->current_long_cb;
uint32_t n_iter = h->n_iter;
if (SRSLTE_TDEC_EXPECT_INPUT_SB && !h->force_not_sb && input_is_interleaved) {
syst = input;
// align to 32 bytes (warning: must be same alignment as in rm_turbo.c)
parity0 = &input[long_cb+32];
parity1 = &input[2*(long_cb+32)];
if (n_iter == 0) {
MAKE_CALL(extract_input_tail_sb)(input, syst, app2, parity0, parity1, long_cb);
}
} else {
if (n_iter == 0) {
decptr->tdec_extract_input(input, syst, app2, parity0, parity1, long_cb);
}
}
if ((n_iter%2) == 0) {
// Add apriori information to decoder 1
if (n_iter) {
MAKE_VEC(srslte_vec_sub)(app1, ext1, app1, long_cb);
}
// Run MAP DEC #1
decptr->tdec_dec(dechdlr, syst, n_iter ? app1 : NULL, parity0, ext1, long_cb);
}
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2
if (n_iter%2) {
// Convert aposteriori information into extrinsic information
if (n_iter > 1) {
MAKE_VEC(srslte_vec_sub)(ext1, app1, ext1, long_cb);
}
MAKE_VEC(srslte_vec_lut)(ext1, deinter, app2, long_cb);
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
decptr->tdec_dec(dechdlr, app2, NULL, parity1, ext2, long_cb);
// Deinterleaved extrinsic bits become apriori info for decoder 1
MAKE_VEC(srslte_vec_lut)(ext2, inter, app1, long_cb);
}
if (h->n_iter == 0) {
debug_vec(syst);
debug_vec(parity0);
debug_vec(parity1);
}
debug_vec(ext1);
debug_vec(ext2);
debug_vec(app1);
debug_vec(app2);
h->n_iter++;
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_new_cb() first\n");
}
}
#undef debug_enabled
#undef debug_len
#undef debug_vec
#undef llr_t
#undef vec_suffix
#undef print_suffix
#undef decptr
#undef dechdlr
#undef type_name
#undef input_is_interleaved

@ -1,122 +0,0 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/**********************************************************************************************
* File: turbodecoder.h
*
* Description: Turbo Decoder.
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
* encoders and one turbo code internal interleaver. The coding rate of turbo
* encoder is 1/3.
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
*
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef SRSLTE_TURBODECODER_SIMD_H
#define SRSLTE_TURBODECODER_SIMD_H
#include "srslte/config.h"
#include "srslte/phy/fec/tc_interl.h"
#include "srslte/phy/fec/cbsegm.h"
// Define maximum number of CB decoded in parallel (2 for AVX2)
#define SRSLTE_TDEC_MAX_NPAR 2
#define SRSLTE_TCOD_RATE 3
#define SRSLTE_TCOD_TOTALTAIL 12
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef struct SRSLTE_API {
uint32_t max_long_cb;
uint32_t max_par_cb;
int16_t *alpha;
int16_t *branch;
} map_gen_t;
typedef struct SRSLTE_API {
uint32_t max_long_cb;
uint32_t max_par_cb;
map_gen_t dec;
int16_t *app1[SRSLTE_TDEC_MAX_NPAR];
int16_t *app2[SRSLTE_TDEC_MAX_NPAR];
int16_t *ext1[SRSLTE_TDEC_MAX_NPAR];
int16_t *ext2[SRSLTE_TDEC_MAX_NPAR];
int16_t *syst[SRSLTE_TDEC_MAX_NPAR];
int16_t *parity0[SRSLTE_TDEC_MAX_NPAR];
int16_t *parity1[SRSLTE_TDEC_MAX_NPAR];
int cb_mask;
int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter[SRSLTE_TDEC_MAX_NPAR];
} srslte_tdec_simd_t;
SRSLTE_API int srslte_tdec_simd_init(srslte_tdec_simd_t * h,
uint32_t max_par_cb,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_simd_free(srslte_tdec_simd_t * h);
SRSLTE_API int srslte_tdec_simd_reset(srslte_tdec_simd_t * h,
uint32_t long_cb);
SRSLTE_API
SRSLTE_API int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h,
uint32_t cb_idx);
SRSLTE_API int srslte_tdec_simd_reset_cb(srslte_tdec_simd_t * h,
uint32_t cb_idx);
SRSLTE_API void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h,
int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_decision(srslte_tdec_simd_t * h,
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h,
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h,
uint8_t *output,
uint32_t cbidx,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h,
int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations,
uint32_t long_cb);
#endif // SRSLTE_TURBODECODER_SIMD_H

@ -1,119 +0,0 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/**********************************************************************************************
* File: turbodecoder.h
*
* Description: Turbo Decoder.
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
* encoders and one turbo code internal interleaver. The coding rate of turbo
* encoder is 1/3.
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
*
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef SRSLTE_TURBODECODER_SIMD_INTER_H
#define SRSLTE_TURBODECODER_SIMD_INTER_H
/** This is an simd inter-frame parallel turbo decoder. Parallizes 8 code-blocks using SSE
* This implementation is currently not functional and not used by the rest of the code
*/
#include "srslte/config.h"
#include "srslte/phy/fec/tc_interl.h"
#include "srslte/phy/fec/cbsegm.h"
#if LV_HAVE_AVX2
#define SRSLTE_TDEC_MAX_NPAR 16
#else
#define SRSLTE_TDEC_MAX_NPAR 8
#endif
typedef struct SRSLTE_API {
int max_long_cb;
int16_t *syst0;
int16_t *parity0;
int16_t *syst1;
int16_t *parity1;
int16_t *llr1;
int16_t *llr2;
int16_t *w;
int16_t *alpha;
uint32_t max_par_cb;
int current_cbidx;
uint32_t current_long_cb;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter[SRSLTE_TDEC_MAX_NPAR];
} srslte_tdec_simd_inter_t;
SRSLTE_API int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h,
uint32_t max_par_cb,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_simd_inter_free(srslte_tdec_simd_inter_t * h);
SRSLTE_API int srslte_tdec_simd_inter_reset(srslte_tdec_simd_inter_t * h,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_simd_inter_get_nof_iterations_cb(srslte_tdec_simd_inter_t * h,
uint32_t cb_idx);
SRSLTE_API int srslte_tdec_simd_inter_reset_cb(srslte_tdec_simd_inter_t * h,
uint32_t cb_idx);
SRSLTE_API void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h,
int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_cb,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h,
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_cb,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h,
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_cb,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_inter_decision_byte_cb(srslte_tdec_simd_inter_t * h,
uint8_t *output,
uint32_t cbidx,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h,
int16_t *input[SRSLTE_TDEC_MAX_NPAR],
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations,
uint32_t nof_cb,
uint32_t long_cb);
#endif // SRSLTE_TURBODECODER_SIMD_INTER_H

@ -24,78 +24,22 @@
*
*/
/**********************************************************************************************
* File: turbodecoder.h
*
* Description: Turbo Decoder.
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
* encoders and one turbo code internal interleaver. The coding rate of turbo
* encoder is 1/3.
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
*
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef SRSLTE_TURBODECODER_SSE_
#define SRSLTE_TURBODECODER_SSE_
#ifndef SRSLTE_TURBODECODER_SSE_H
#define SRSLTE_TURBODECODER_SSE_H
#include "srslte/config.h"
#include "srslte/phy/fec/tc_interl.h"
#include "srslte/phy/fec/cbsegm.h"
#define SRSLTE_TCOD_RATE 3
#define SRSLTE_TCOD_TOTALTAIL 12
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef struct SRSLTE_API {
int max_long_cb;
uint32_t max_long_cb;
int16_t *alpha;
int16_t *branch;
} map_gen_t;
typedef struct SRSLTE_API {
int max_long_cb;
map_gen_t dec;
int16_t *app1;
int16_t *app2;
int16_t *ext1;
int16_t *ext2;
int16_t *syst;
int16_t *parity0;
int16_t *parity1;
int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter;
} srslte_tdec_sse_t;
SRSLTE_API int srslte_tdec_sse_init(srslte_tdec_sse_t * h,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_sse_free(srslte_tdec_sse_t * h);
SRSLTE_API int srslte_tdec_sse_reset(srslte_tdec_sse_t * h, uint32_t long_cb);
SRSLTE_API void srslte_tdec_sse_iteration(srslte_tdec_sse_t * h,
int16_t * input,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_sse_decision(srslte_tdec_sse_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_sse_decision_byte(srslte_tdec_sse_t * h,
uint8_t *output,
uint32_t long_cb);
} tdec_sse_t;
SRSLTE_API int srslte_tdec_sse_run_all(srslte_tdec_sse_t * h,
int16_t * input,
uint8_t *output,
uint32_t nof_iterations,
uint32_t long_cb);
int tdec_sse_init(void **h, uint32_t max_long_cb);
void tdec_sse_free(void *h);
void tdec_sse_dec(void *h, int16_t * input, int16_t *app, int16_t * parity,
int16_t *output, uint32_t long_cb);
void tdec_sse_extract_input(int16_t *input, int16_t *syst, int16_t *parity0, int16_t *parity1, int16_t *app2, uint32_t long_cb);
void tdec_sse_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb);
#endif // SRSLTE_TURBODECODER_SSE_
#endif // SRSLTE_TURBODECODER_SSE_H

@ -0,0 +1,752 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include "srslte/config.h"
#define MAKE_FUNC(a) CONCAT2(CONCAT2(tdec_win,WINIMP),CONCAT2(_,a))
#define MAKE_TYPE CONCAT2(CONCAT2(tdec_win_,WINIMP),_t)
#ifdef WINIMP_IS_SSE16
#ifndef LV_HAVE_SSE
#error "Selected SSE window decoder but instruction set not supported"
#endif
#include <nmmintrin.h>
#define WINIMP sse16
#define nof_blocks 8
#define llr_t int16_t
#define simd_type_t __m128i
#define simd_load _mm_load_si128
#define simd_store _mm_store_si128
#define simd_add _mm_adds_epi16
#define simd_sub _mm_subs_epi16
#define simd_max _mm_max_epi16
#define simd_set1 _mm_set1_epi16
#define simd_insert _mm_insert_epi16
#define simd_shuffle _mm_shuffle_epi8
#define move_right _mm_set_epi8(15,14,15,14,13,12,11,10,9,8,7,6,5,4,3,2)
#define move_left _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,1,0)
#define simd_rb_shift _mm_srai_epi16
#define normalize_period 2
#define win_overlap_len 40
#define divide_output 1
#define INF 10000
#else
#ifdef WINIMP_IS_AVX16
#ifndef LV_HAVE_AVX2
#error "Selected AVX2 window decoder but instruction set not supported"
#endif
#include <immintrin.h>
#define WINIMP avx16
#define nof_blocks 16
#define llr_t int16_t
#define simd_type_t __m256i
#define simd_load _mm256_load_si256
#define simd_store _mm256_store_si256
#define simd_add _mm256_adds_epi16
#define simd_sub _mm256_subs_epi16
#define simd_max _mm256_max_epi16
#define simd_set1 _mm256_set1_epi16
#define simd_insert _mm256_insert_epi16
#define simd_shuffle _mm256_shuffle_epi8
#define move_right _mm256_set_epi8(31,30,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)
#define move_left _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,1,0)
#define normalize_period 2
#define win_overlap_len 40
#define INF 10000
#else
#ifdef WINIMP_IS_SSE8
#ifndef LV_HAVE_SSE
#error "Selected SSE window decoder but instruction set not supported"
#endif
#include <nmmintrin.h>
#define WINIMP sse8
#define nof_blocks 16
#define llr_t int8_t
#define simd_type_t __m128i
#define simd_load _mm_load_si128
#define simd_store _mm_store_si128
#define simd_add _mm_adds_epi8
#define simd_sub _mm_subs_epi8
#define simd_max _mm_max_epi8
#define simd_set1 _mm_set1_epi8
#define simd_insert _mm_insert_epi8
#define simd_shuffle _mm_shuffle_epi8
#define move_right _mm_set_epi8(15,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)
#define move_left _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0)
#define simd_rb_shift simd_rb_shift_128
#define normalize_max
#define normalize_period 1
#define win_overlap_len 40
#define use_saturated_add
#define divide_output 1
#define INF 0
inline static simd_type_t simd_rb_shift_128(simd_type_t v, const int l) {
__m128i low = _mm_srai_epi16(_mm_slli_epi16(v,8), l+8);
__m128i hi = _mm_srai_epi16(v,l);
return _mm_blendv_epi8(hi, low, _mm_set1_epi32(0x00FF00FF));
}
#else
#ifdef WINIMP_IS_AVX8
#ifndef LV_HAVE_AVX2
#error "Selected AVX2 window decoder but instruction set not supported"
#endif
#include <immintrin.h>
#define WINIMP avx8
#define nof_blocks 32
#define llr_t int8_t
#define simd_type_t __m256i
#define simd_load _mm256_load_si256
#define simd_store _mm256_store_si256
#define simd_add _mm256_adds_epi8
#define simd_sub _mm256_subs_epi8
#define simd_max _mm256_max_epi8
#define simd_set1 _mm256_set1_epi8
#define simd_insert _mm256_insert_epi8
#define simd_shuffle _mm256_shuffle_epi8
#define move_right _mm256_set_epi8(31,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)
#define move_left _mm256_set_epi8(30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0)
#define simd_rb_shift simd_rb_shift_256
#define INF 0
#define normalize_max
#define normalize_period 1
#define win_overlap_len 40
#define use_saturated_add
#define divide_output 1
inline static simd_type_t simd_rb_shift_256(simd_type_t v, const int l) {
__m256i low = _mm256_srai_epi16(_mm256_slli_epi16(v,8), l+8);
__m256i hi = _mm256_srai_epi16(v,l);
return _mm256_blendv_epi8(hi, low, _mm256_set1_epi32(0x00FF00FF));
}
#else
#error "Unknown WINIMP value"
#endif
#endif
#endif
#endif
typedef struct SRSLTE_API {
uint32_t max_long_cb;
llr_t *beta;
} MAKE_TYPE;
#define long_sb (long_cb/nof_blocks)
#define debug_enabled_win 0
#if debug_enabled_win
#define debug_state(d) printf("k=%5d, in=%5d, pa=%3d, out=%5d, alpha=[", d*long_sb+k+1, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d), MAKE_FUNC(get_simd)(out,d)); \
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d)); \
printf("], beta=["); \
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(beta_save[j], d));printf("\n");
#define debug_state_pre(d) printf("pre-window k=%5d, in=%5d, pa=%3d, alpha=[", (d+1)*long_sb-loop_len+k+1, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d)); \
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d)); \
printf("]\n");
#define debug_state_beta(d) printf("k=%5d, in=%5d, pa=%3d, beta=[", d*long_sb+k, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d)); \
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d));\
printf("\n");
static llr_t MAKE_FUNC(get_simd)(simd_type_t x, uint32_t pos) {
llr_t *s = (llr_t*) &x;
return s[pos];
}
#else
#define debug_state(a)
#define debug_state_pre(a)
#define debug_state_beta(a)
#endif
/*
static void MAKE_FUNC(print_simd)(simd_type_t x) {
llr_t *s = (llr_t*) &x;
printf("[");
for (int i=0;i<nof_blocks;i++) {
printf("%4d, ", s[i]);
}
printf("]\n");
}*/
inline static llr_t MAKE_FUNC(sadd)(llr_t x, llr_t y) {
#ifndef use_saturated_add
return x+y;
#else
int16_t z = (int16_t) x+y;
return z>127?127:(int8_t) z;
#endif
}
inline static void MAKE_FUNC(normalize)(uint32_t k, simd_type_t old[8]) {
if ((k % normalize_period) == 0 && k != 0) {
#ifdef normalize_max
simd_type_t m = simd_max(old[0],old[1]);
for (int i=2;i<8;i++) {
m = simd_max(m,old[i]);
}
for (int i=0;i<8;i++) {
old[i] = simd_sub(old[i], m);
}
#else
for (int i = 1; i < 8; i++) {
old[i] = simd_sub(old[i], old[0]);
}
old[0] = simd_set1(0);
#endif
}
}
static void MAKE_FUNC(beta_trellis)(llr_t *input, llr_t *parity, uint32_t long_cb, llr_t old[8])
{
llr_t m_b[8], new[8];
llr_t x, y, xy;
/* Calculate last state using Tail. No need to use SIMD here */
old[0] = 0;
for (int i = 1; i < 8; i++) {
old[i] = -INF;
}
for (int k=long_cb+2;k >= long_cb; k--) {
x = input[k];
y = parity[k];
xy = MAKE_FUNC(sadd)(x, y);
m_b[0] = MAKE_FUNC(sadd)(old[4],xy);
m_b[1] = old[4];
m_b[2] = MAKE_FUNC(sadd)(old[5], y);
m_b[3] = MAKE_FUNC(sadd)(old[5], x);
m_b[4] = MAKE_FUNC(sadd)(old[6], x);
m_b[5] = MAKE_FUNC(sadd)(old[6], y);
m_b[6] = old[7];
m_b[7] = MAKE_FUNC(sadd)(old[7], xy);
new[0] = old[0];
new[1] = MAKE_FUNC(sadd)(old[0], xy);
new[2] = MAKE_FUNC(sadd)(old[1], x);
new[3] = MAKE_FUNC(sadd)(old[1], y);
new[4] = MAKE_FUNC(sadd)(old[2], y);
new[5] = MAKE_FUNC(sadd)(old[2], x);
new[6] = MAKE_FUNC(sadd)(old[3], xy);
new[7] = old[3];
#if debug_enabled_win
printf("trellis: k=%d, in=%d, pa=%d, beta: ", k, x, y); for (int i=0;i<8;i++) {printf("%d,", old[i]);} printf("\n");
#endif
for (int i = 0; i < 8; i++) {
if (m_b[i] > new[i])
new[i] = m_b[i];
old[i] = new[i];
}
}
}
/* Computes beta values */
static void MAKE_FUNC(beta)(MAKE_TYPE * s, llr_t *input, llr_t *app, llr_t *parity, uint32_t long_cb)
{
simd_type_t m_b[8], new[8], old[8];
simd_type_t x, y, xy, ap;
simd_type_t *inputPtr;
simd_type_t *appPtr;
simd_type_t *parityPtr;
simd_type_t *betaPtr = (simd_type_t*) s->beta;
uint32_t loop_len;
for (int j=0;j<2;j++) {
// First run L states to find initial state for all sub-blocks after first
if (j==0) {
loop_len = win_overlap_len;
} else {
loop_len = long_sb;
}
// When passing through all window pick estimated initial states (known state for sb=0)
if (loop_len == long_sb) {
// shuffle across 128-bit boundary manually
#ifdef WINIMP_IS_AVX16
llr_t tmp[8];
for (int i = 0; i < 8; i++) {
tmp[i] = _mm256_extract_epi16(old[i], 8);
}
#endif
#ifdef WINIMP_IS_AVX8
llr_t tmp[8];
for (int i = 0; i < 8; i++) {
tmp[i] = _mm256_extract_epi8(old[i], 16);
}
#endif
for (int i = 0; i < 8; i++) {
old[i] = simd_shuffle(old[i], move_right);
}
// last sub-block state is calculated from the trellis
llr_t trellis_old[8];
MAKE_FUNC(beta_trellis)(input, parity, long_cb, trellis_old);
for (int i = 0; i < 8; i++) {
old[i] = simd_insert(old[i], trellis_old[i], nof_blocks-1);
}
#ifdef WINIMP_IS_AVX16
for (int i = 0; i < 8; i++) {
old[i] = _mm256_insert_epi16(old[i], tmp[i], 7);
}
#endif
#ifdef WINIMP_IS_AVX8
for (int i = 0; i < 8; i++) {
old[i] = _mm256_insert_epi8(old[i], tmp[i], 15);
}
#endif
inputPtr = (simd_type_t*) &input[long_cb-nof_blocks];
appPtr = (simd_type_t*) &app[long_cb-nof_blocks];
parityPtr = (simd_type_t*) &parity[long_cb-nof_blocks];
for (int i = 0; i < 8; i++) {
simd_store(&betaPtr[8*long_sb + i], old[i]);
}
} else {
// when estimating states, just set all to unknown
for (int i = 0; i < 8; i++) {
old[i] = simd_set1(-INF);
}
inputPtr = (simd_type_t*) &input[nof_blocks*(loop_len-1)];
appPtr = (simd_type_t*) &app[nof_blocks*(loop_len-1)];
parityPtr = (simd_type_t*) &parity[nof_blocks*(loop_len-1)];
}
for (int k = loop_len - 1; k >= 0; k--) {
x = simd_load(inputPtr--);
y = simd_load(parityPtr--);
if (app) {
ap = simd_load(appPtr--);
x = simd_add(ap, x);
}
xy = simd_add(x, y);
m_b[0] = simd_add(old[4], xy);
m_b[1] = old[4];
m_b[2] = simd_add(old[5], y);
m_b[3] = simd_add(old[5], x);
m_b[4] = simd_add(old[6], x);
m_b[5] = simd_add(old[6], y);
m_b[6] = old[7];
m_b[7] = simd_add(old[7], xy);
new[0] = old[0];
new[1] = simd_add(old[0], xy);
new[2] = simd_add(old[1], x);
new[3] = simd_add(old[1], y);
new[4] = simd_add(old[2], y);
new[5] = simd_add(old[2], x);
new[6] = simd_add(old[3], xy);
new[7] = old[3];
// Calculate maximum metric
for (int i = 0; i < 8; i++) {
old[i] = simd_max(m_b[i], new[i]);
}
// Store metric only when doing the final pass
if (loop_len == long_sb) {
for (int i = 0; i < 8; i++) {
simd_store(&betaPtr[8*k + i], old[i]);
}
}
if (loop_len!=long_sb) {
debug_state_beta(0);
} else {
debug_state_beta(0);
}
// normalize
MAKE_FUNC(normalize)(k, old);
}
}
}
/* Computes alpha metrics */
static void MAKE_FUNC(alpha)(MAKE_TYPE * s, llr_t *input, llr_t *app, llr_t *parity, llr_t * output, uint32_t long_cb)
{
simd_type_t m_b[8], new[8], old[8], max1[8], max0[8];
simd_type_t x, y, xy, ap;
simd_type_t m1, m0;
simd_type_t *inputPtr;
simd_type_t *appPtr;
simd_type_t *parityPtr;
simd_type_t *betaPtr = (simd_type_t*) s->beta;
simd_type_t *outputPtr = (simd_type_t*) output;
#if debug_enabled_win
simd_type_t beta_save[8];
#endif
// Skip state 0
betaPtr+=8;
uint32_t loop_len;
for (int j=0;j<2;j++) {
// First run L states to find initial state for all sub-blocks after first
if (j==0) {
loop_len = win_overlap_len;
} else {
loop_len = long_sb;
}
// When passing through all window pick estimated initial states (known state for sb=0)
if (loop_len == long_sb) {
#ifdef WINIMP_IS_AVX16
llr_t tmp[8];
for (int i=0;i<8;i++) {
tmp[i] = _mm256_extract_epi16(old[i], 7);
}
#endif
#ifdef WINIMP_IS_AVX8
llr_t tmp[8];
for (int i=0;i<8;i++) {
tmp[i] = _mm256_extract_epi8(old[i], 15);
}
#endif
for (int i = 0; i < 8; i++) {
old[i] = simd_shuffle(old[i], move_left);
}
#ifdef WINIMP_IS_AVX16
for (int i=0;i<8;i++) {
old[i] = _mm256_insert_epi16(old[i], tmp[i], 8);
}
#endif
#ifdef WINIMP_IS_AVX8
for (int i=0;i<8;i++) {
old[i] = _mm256_insert_epi8(old[i], tmp[i], 16);
}
#endif
// 1st sub-block state is known
old[0] = simd_insert(old[0], 0, 0);
for (int i = 1; i < 8; i++) {
old[i] = simd_insert(old[i], -INF, 0);
}
} else {
// when estimating states, just set all to unknown
for (int i = 0; i < 8; i++) {
old[i] = simd_set1(-INF);
}
}
inputPtr = (simd_type_t*) &input[nof_blocks*(long_sb-loop_len)];
appPtr = (simd_type_t*) &app[nof_blocks*(long_sb-loop_len)];
parityPtr = (simd_type_t*) &parity[nof_blocks*(long_sb-loop_len)];
for (int k = 0; k < loop_len; k++) {
x = simd_load(inputPtr++);
y = simd_load(parityPtr++);
if (app) {
ap = simd_load(appPtr++);
x = simd_add(ap, x);
}
xy = simd_add(x,y);
m_b[0] = old[0];
m_b[1] = simd_add(old[3], y);
m_b[2] = simd_add(old[4], y);
m_b[3] = old[7];
m_b[4] = old[1];
m_b[5] = simd_add(old[2], y);
m_b[6] = simd_add(old[5], y);
m_b[7] = old[6];
new[0] = simd_add(old[1], xy);
new[1] = simd_add(old[2], x);
new[2] = simd_add(old[5], x);
new[3] = simd_add(old[6], xy);
new[4] = simd_add(old[0], xy);
new[5] = simd_add(old[3], x);
new[6] = simd_add(old[4], x);
new[7] = simd_add(old[7], xy);
// Load beta and compute output only when passing through all window
if (loop_len == long_sb) {
simd_type_t beta;
for (int i = 0; i < 8; i++) {
beta = simd_load(betaPtr++);
max0[i] = simd_add(beta, m_b[i]);
max1[i] = simd_add(beta, new[i]);
#if debug_enabled_win
beta_save[i] = beta;
#endif
}
m1 = simd_max(max1[0], max1[1]);
m0 = simd_max(max0[0], max0[1]);
for (int i = 2; i < 8; i++) {
m1 = simd_max(m1, max1[i]);
m0 = simd_max(m0, max0[i]);
}
simd_type_t out = simd_sub(m1, m0);
// Divide output when using 8-bit arithmetic
#ifdef divide_output
out = simd_rb_shift(out, divide_output);
#endif
simd_store(outputPtr++, out);
debug_state(0);
}
for (int i = 0; i < 8; i++) {
old[i] = simd_max(m_b[i], new[i]);
}
// normalize
MAKE_FUNC(normalize)(k, old);
if (loop_len != long_sb) {
debug_state_pre(0);
}
}
}
}
int MAKE_FUNC(init)(void **hh, uint32_t max_long_cb)
{
*hh = calloc(1, sizeof(MAKE_TYPE));
MAKE_TYPE *h = (MAKE_TYPE*) *hh;
h->beta = srslte_vec_malloc(sizeof(llr_t) * 8 * max_long_cb * nof_blocks);
if (!h->beta) {
perror("srslte_vec_malloc");
return -1;
}
h->max_long_cb = max_long_cb;
return nof_blocks;
}
void MAKE_FUNC(free)(void *hh)
{
MAKE_TYPE *h = (MAKE_TYPE*) hh;
if (h) {
if (h->beta) {
free(h->beta);
}
free(h);
}
}
void MAKE_FUNC(dec)(void *hh, llr_t *input, llr_t *app, llr_t *parity, llr_t *output, uint32_t long_cb)
{
MAKE_TYPE *h = (MAKE_TYPE*) hh;
MAKE_FUNC(beta)(h, input, app, parity, long_cb);
MAKE_FUNC(alpha)(h, input, app, parity, output, long_cb);
#if debug_enabled_win
printf("running win decoder: %s\n", STRING(WINIMP));
#endif
}
#define INSERT8_INPUT(reg, st, off) reg = simd_insert(reg, input[3*(i+(st+0)*long_sb)+off], st+0);\
reg = simd_insert(reg, input[3*(i+(st+1)*long_sb)+off], st+1);\
reg = simd_insert(reg, input[3*(i+(st+2)*long_sb)+off], st+2);\
reg = simd_insert(reg, input[3*(i+(st+3)*long_sb)+off], st+3);\
reg = simd_insert(reg, input[3*(i+(st+4)*long_sb)+off], st+4);\
reg = simd_insert(reg, input[3*(i+(st+5)*long_sb)+off], st+5);\
reg = simd_insert(reg, input[3*(i+(st+6)*long_sb)+off], st+6);\
reg = simd_insert(reg, input[3*(i+(st+7)*long_sb)+off], st+7);
void MAKE_FUNC(extract_input)(llr_t *input, llr_t *systematic, llr_t *app2, llr_t *parity_0, llr_t *parity_1, uint32_t long_cb)
{
simd_type_t *systPtr = (simd_type_t*) systematic;
simd_type_t *parity0Ptr = (simd_type_t*) parity_0;
simd_type_t *parity1Ptr = (simd_type_t*) parity_1;
simd_type_t syst, parity0, parity1;
for (int i=0;i<long_sb;i++) {
INSERT8_INPUT(syst, 0, 0);
INSERT8_INPUT(parity0, 0, 1);
INSERT8_INPUT(parity1, 0, 2);
#if nof_blocks >= 16
INSERT8_INPUT(syst, 8, 0);
INSERT8_INPUT(parity0, 8, 1);
INSERT8_INPUT(parity1, 8, 2);
#endif
#if nof_blocks >= 32
INSERT8_INPUT(syst, 16, 0);
INSERT8_INPUT(parity0, 16, 1);
INSERT8_INPUT(parity1, 16, 2);
INSERT8_INPUT(syst, 24, 0);
INSERT8_INPUT(parity0, 24, 1);
INSERT8_INPUT(parity1, 24, 2);
#endif
simd_store(systPtr++, syst);
simd_store(parity0Ptr++, parity0);
simd_store(parity1Ptr++, parity1);
}
for (int i = long_cb; i < long_cb + 3; i++) {
systematic[i] = input[3*long_cb + 2*(i - long_cb)];
parity_0[i] = input[3*long_cb + 2*(i - long_cb) + 1];
app2[i] = input[3*long_cb + 6 + 2*(i - long_cb)];
parity_1[i] = input[3*long_cb + 6 + 2*(i - long_cb) + 1];
}
}
#define deinter(x,win) ((x%(long_cb/win))*(win)+x/(long_cb/win))
#define reset_cnt(a,b) if(!((a+1)%b)) { \
k+=b*nof_blocks; \
if (k >= long_cb) { \
k -= (long_cb-1);\
}\
}
#define insert_bit(a,b) ap = _mm_insert_epi16(ap, app1[k+(a%b)*nof_blocks], 7-a); \
reset_cnt(a,b); \
#define decide_for(b) for (uint32_t i = 0; i < long_cb/8; i++) { \
insert_bit(0,b);\
insert_bit(1,b);\
insert_bit(2,b);\
insert_bit(3,b);\
insert_bit(4,b);\
insert_bit(5,b);\
insert_bit(6,b);\
insert_bit(7,b);\
output[i] = (uint8_t) _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_packs_epi16(ap,zeros),zeros));\
}
/* No improvement to use AVX here */
void MAKE_FUNC(decision_byte)(llr_t *app1, uint8_t *output, uint32_t long_cb)
{
uint32_t k=0;
__m128i zeros = _mm_setzero_si128();
__m128i ap;
if ((long_cb%(nof_blocks*8)) == 0) {
decide_for(8);
} else if ((long_cb%(nof_blocks*4)) == 0) {
decide_for(4);
} else if ((long_cb%(nof_blocks*2)) == 0) {
decide_for(2);
} else {
decide_for(1);
}
}
#undef WINIMP
#undef nof_blocks
#undef llr_t
#undef normalize_period
#undef INF
#undef win_overlap_len
#undef simd_type_t
#undef simd_load
#undef simd_store
#undef simd_add
#undef simd_sub
#undef simd_max
#undef simd_set1
#undef simd_insert
#undef simd_shuffle
#undef move_right
#undef move_left
#undef debug_enabled_win
#ifdef normalize_max
#undef normalize_max
#endif
#ifdef use_saturated_add
#undef use_saturated_add
#endif
#ifdef simd_rb_shift
#undef simd_rb_shift
#endif
#ifdef divide_output
#undef divide_output
#endif

@ -53,4 +53,9 @@ SRSLTE_API int srslte_demod_soft_demodulate_s(srslte_mod_t modulation,
short* llr,
int nsymbols);
SRSLTE_API int srslte_demod_soft_demodulate_b(srslte_mod_t modulation,
const cf_t* symbols,
int8_t* llr,
int nsymbols);
#endif // SRSLTE_DEMOD_SOFT_H

@ -65,6 +65,8 @@ typedef struct SRSLTE_API {
uint16_t ue_rnti;
bool is_ue;
bool llr_is_8bit;
/* Power allocation parameter 3GPP 36.213 Clause 5.2 Rho_b */
float rho_a;

@ -81,6 +81,7 @@ typedef struct SRSLTE_API {
typedef struct {
srslte_sequence_t seq_f2[SRSLTE_NSUBFRAMES_X_FRAME];
uint32_t cell_id;
bool sequence_generated;
} srslte_pucch_user_t;
@ -112,10 +113,15 @@ typedef struct SRSLTE_API {
uint32_t last_n_prb;
uint32_t last_n_pucch;
srslte_sequence_t tmp_seq;
uint16_t ue_rnti;
bool is_ue;
}srslte_pucch_t;
SRSLTE_API int srslte_pucch_init(srslte_pucch_t *q);
SRSLTE_API int srslte_pucch_init_ue(srslte_pucch_t *q);
SRSLTE_API int srslte_pucch_init_enb(srslte_pucch_t *q);
SRSLTE_API void srslte_pucch_free(srslte_pucch_t *q);

@ -74,6 +74,8 @@ typedef struct SRSLTE_API {
uint16_t ue_rnti;
uint32_t max_re;
bool llr_is_8bit;
srslte_dft_precoding_t dft_precoding;
/* buffers */

@ -59,6 +59,8 @@ typedef struct SRSLTE_API {
uint32_t max_iterations;
uint32_t nof_iterations;
bool llr_is_8bit;
/* buffers */
uint8_t *cb_in;
uint8_t *parity_bits;

@ -68,6 +68,11 @@ SRSLTE_API void srslte_scrambling_s_offset(srslte_sequence_t *s,
int offset,
int len);
SRSLTE_API void srslte_scrambling_sb_offset(srslte_sequence_t *s,
int8_t *data,
int offset,
int len);
SRSLTE_API void srslte_scrambling_c(srslte_sequence_t *s,
cf_t *data);

@ -166,8 +166,13 @@ static inline void srslte_mat_2x2_mmse_csi_simd(simd_cf_t y0,
simd_cf_t _noise_estimate;
simd_f_t _norm = srslte_simd_f_set1(norm);
#if HAVE_NEON
_noise_estimate.val[0] = srslte_simd_f_set1(noise_estimate);
_noise_estimate.val[1] = srslte_simd_f_zero();
#else /* HAVE_NEON */
_noise_estimate.re = srslte_simd_f_set1(noise_estimate);
_noise_estimate.im = srslte_simd_f_zero();
#endif /* HAVE_NEON */
/* 1. A = H' x H + No*/
simd_cf_t a00 =

@ -100,6 +100,7 @@
#define SRSLTE_SIMD_I_SIZE 16
#define SRSLTE_SIMD_B_SIZE 64
#define SRSLTE_SIMD_S_SIZE 32
#define SRSLTE_SIMD_C16_SIZE 0
@ -111,6 +112,7 @@
#define SRSLTE_SIMD_I_SIZE 8
#define SRSLTE_SIMD_B_SIZE 32
#define SRSLTE_SIMD_S_SIZE 16
#define SRSLTE_SIMD_C16_SIZE 16
@ -122,6 +124,7 @@
#define SRSLTE_SIMD_I_SIZE 4
#define SRSLTE_SIMD_B_SIZE 16
#define SRSLTE_SIMD_S_SIZE 8
#define SRSLTE_SIMD_C16_SIZE 8
@ -132,16 +135,16 @@
#define SRSLTE_SIMD_CF_SIZE 4
#define SRSLTE_SIMD_I_SIZE 4
#define SRSLTE_SIMD_B_SIZE 16
#define SRSLTE_SIMD_S_SIZE 8
#define SRSLTE_SIMD_C16_SIZE 8
#else /* LV_HAVE_NEON */
#else /* HAVE_NEON */
#define SRSLTE_SIMD_F_SIZE 0
#define SRSLTE_SIMD_CF_SIZE 0
#define SRSLTE_SIMD_I_SIZE 0
#define SRSLTE_SIMD_B_SIZE 0
#define SRSLTE_SIMD_S_SIZE 0
#define SRSLTE_SIMD_C16_SIZE 0
@ -511,7 +514,7 @@ static inline simd_f_t srslte_simd_f_abs(simd_f_t a) {
return _mm_andnot_ps(_mm_set1_ps(-0.0f), a);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
return vqabsq_s32(a);
return vabsq_f32(a);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
@ -987,13 +990,13 @@ static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) {
static inline simd_cf_t srslte_simd_cf_neg (simd_cf_t a) {
simd_cf_t ret;
#if LV_HAVE_NEON
#if HAVE_NEON
ret.val[0] = srslte_simd_f_neg(a.val[0]);
ret.val[1] = srslte_simd_f_neg(a.val[1]);
#else /* LV_HAVE_NEON */
#else /* HAVE_NEON */
ret.re = srslte_simd_f_neg(a.re);
ret.im = srslte_simd_f_neg(a.im);
#endif /* LV_HAVE_NEON */
#endif /* HAVE_NEON */
return ret;
}
@ -1004,37 +1007,37 @@ static inline simd_cf_t srslte_simd_cf_neg_mask (simd_cf_t a, simd_f_t mask) {
mask = _mm256_permutevar8x32_ps(mask, _mm256_setr_epi32(0,4,1,5,2,6,3,7));
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
#if LV_HAVE_NEON
#if HAVE_NEON
ret.val[0] = srslte_simd_f_neg_mask(a.val[0], mask);
ret.val[1] = srslte_simd_f_neg_mask(a.val[1], mask);
#else /* LV_HAVE_NEON */
#else /* HAVE_NEON */
ret.re = srslte_simd_f_neg_mask(a.re, mask);
ret.im = srslte_simd_f_neg_mask(a.im, mask);
#endif /* LV_HAVE_NEON */
#endif /* HAVE_NEON */
return ret;
}
static inline simd_cf_t srslte_simd_cf_conj (simd_cf_t a) {
simd_cf_t ret;
#if LV_HAVE_NEON
#if HAVE_NEON
ret.val[0] = a.val[0];
ret.val[1] = srslte_simd_f_neg(a.val[1]);
#else /* LV_HAVE_NEON */
#else /* HAVE_NEON */
ret.re = a.re;
ret.im = srslte_simd_f_neg(a.im);
#endif /* LV_HAVE_NEON */
#endif /* HAVE_NEON */
return ret;
}
static inline simd_cf_t srslte_simd_cf_mulj (simd_cf_t a) {
simd_cf_t ret;
#if LV_HAVE_NEON
#if HAVE_NEON
ret.val[0] = srslte_simd_f_neg(a.val[1]);
ret.val[1] = a.val[0];
#else /* LV_HAVE_NEON */
#else /* HAVE_NEON */
ret.re = srslte_simd_f_neg(a.im);
ret.im = a.re;
#endif /* LV_HAVE_NEON */
#endif /* HAVE_NEON */
return ret;
}
@ -1336,6 +1339,24 @@ static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) {
#endif /* LV_HAVE_AVX512 */
}
static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) {
#ifdef LV_HAVE_AVX512
#error sign instruction not available in avx512
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
return _mm256_sign_epi16(a, b);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
return _mm_sign_epi16(a, b);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
#error sign instruction not available in Neon
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
static inline simd_s_t srslte_simd_s_add(simd_s_t a, simd_s_t b) {
#ifdef LV_HAVE_AVX512
return _mm512_add_epi16(a, b);
@ -1681,7 +1702,7 @@ typedef int8x16_t simd_b_t;
static inline simd_b_t srslte_simd_b_load(int8_t *ptr){
static inline simd_b_t srslte_simd_b_load(const int8_t *ptr){
#ifdef LV_HAVE_AVX512
return _mm512_load_si512(ptr);
#else /* LV_HAVE_AVX512 */
@ -1699,7 +1720,7 @@ static inline simd_b_t srslte_simd_b_load(int8_t *ptr){
#endif /* LV_HAVE_AVX512 */
}
static inline simd_b_t srslte_simd_b_loadu(int8_t *ptr){
static inline simd_b_t srslte_simd_b_loadu(const int8_t *ptr){
#ifdef LV_HAVE_AVX512
return _mm512_loadu_si512(ptr);
#else /* LV_HAVE_AVX512 */
@ -1773,6 +1794,44 @@ static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) {
#endif /* LV_HAVE_AVX512 */
}
static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) {
#ifdef LV_HAVE_AVX512
return _mm512_subs_epi8(a, b);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
return _mm256_subs_epi8(a, b);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
return _mm_subs_epi8(a, b);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
return vsubqs_s8(a, b);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) {
#ifdef LV_HAVE_AVX512
#error sign instruction not available in avx512
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
return _mm256_sign_epi8(a, b);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
return _mm_sign_epi8(a, b);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
#error sign instruction not available in Neon
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
#endif /*SRSLTE_SIMD_B_SIZE */

@ -69,6 +69,7 @@ SRSLTE_API void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_s
SRSLTE_API void srslte_vec_fprint_c(FILE *stream, cf_t *x, const uint32_t len);
SRSLTE_API void srslte_vec_fprint_f(FILE *stream, float *x, const uint32_t len);
SRSLTE_API void srslte_vec_fprint_b(FILE *stream, uint8_t *x, const uint32_t len);
SRSLTE_API void srslte_vec_fprint_bs(FILE *stream, int8_t *x, const uint32_t len);
SRSLTE_API void srslte_vec_fprint_byte(FILE *stream, uint8_t *x, const uint32_t len);
SRSLTE_API void srslte_vec_fprint_i(FILE *stream, int *x, const uint32_t len);
SRSLTE_API void srslte_vec_fprint_s(FILE *stream, short *x, const uint32_t len);
@ -82,12 +83,13 @@ SRSLTE_API void srslte_vec_load_file(char *filename, void *buffer, const uint32_
/* sum two vectors */
SRSLTE_API void srslte_vec_sum_fff(const float *x, const float *y, float *z, const uint32_t len);
SRSLTE_API void srslte_vec_sum_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len);
SRSLTE_API void srslte_vec_sub_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
SRSLTE_API void srslte_vec_sum_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
/* substract two vectors z=x-y */
SRSLTE_API void srslte_vec_sub_fff(const float *x, const float *y, float *z, const uint32_t len);
SRSLTE_API void srslte_vec_sub_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len);
SRSLTE_API void srslte_vec_sub_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
SRSLTE_API void srslte_vec_sub_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len);
/* scalar product */
SRSLTE_API void srslte_vec_sc_prod_cfc(const cf_t *x, const float h, cf_t *z, const uint32_t len);
@ -97,8 +99,10 @@ SRSLTE_API void srslte_vec_sc_prod_fff(const float *x, const float h, float *z,
SRSLTE_API void srslte_vec_convert_fi(const float *x, const float scale, int16_t *z, const uint32_t len);
SRSLTE_API void srslte_vec_convert_if(const int16_t *x, const float scale, float *z, const uint32_t len);
SRSLTE_API void srslte_vec_convert_fb(const float *x, const float scale, int8_t *z, const uint32_t len);
SRSLTE_API void srslte_vec_lut_sss(const short *x, const unsigned short *lut, short *y, const uint32_t len);
SRSLTE_API void srslte_vec_lut_bbb(const int8_t *x, const unsigned short *lut, int8_t *y, const uint32_t len);
SRSLTE_API void srslte_vec_lut_sis(const short *x, const unsigned int *lut, short *y, const uint32_t len);
/* vector product (element-wise) */
@ -115,6 +119,10 @@ SRSLTE_API void srslte_vec_prod_conj_ccc(const cf_t *x, const cf_t *y, cf_t *z,
SRSLTE_API void srslte_vec_prod_fff(const float *x, const float *y, float *z, const uint32_t len);
SRSLTE_API void srslte_vec_prod_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
// Negate sign (scrambling)
SRSLTE_API void srslte_vec_neg_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
SRSLTE_API void srslte_vec_neg_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len);
/* Dot-product */
SRSLTE_API cf_t srslte_vec_dot_prod_cfc(const cf_t *x, const float *y, const uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_ccc(const cf_t *x, const cf_t *y, const uint32_t len);

@ -62,6 +62,8 @@ SRSLTE_API void srslte_vec_sum_sss_simd(const int16_t *x, const int16_t *y, int1
SRSLTE_API void srslte_vec_sub_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, int len);
SRSLTE_API void srslte_vec_sub_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, int len);
SRSLTE_API float srslte_vec_acc_ff_simd(const float *x, int len);
SRSLTE_API cf_t srslte_vec_acc_cc_simd(const cf_t *x, int len);
@ -86,6 +88,10 @@ SRSLTE_API void srslte_vec_prod_ccc_c16_simd(const int16_t *a_re, const int16_t
SRSLTE_API void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len);
SRSLTE_API void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len);
SRSLTE_API void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len);
SRSLTE_API void srslte_vec_prod_cfc_simd(const cf_t *x, const float *y, cf_t *z, const int len);
SRSLTE_API void srslte_vec_prod_fff_simd(const float *x, const float *y, float *z, const int len);
@ -120,10 +126,14 @@ SRSLTE_API void srslte_vec_abs_square_cf_simd(const cf_t *x, float *z, const int
/* Other Functions */
SRSLTE_API void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y, const int len);
SRSLTE_API void srslte_vec_lut_bbb_simd(const int8_t *x, const unsigned short *lut, int8_t *y, const int len);
SRSLTE_API void srslte_vec_convert_if_simd(const int16_t *x, float *z, const float scale, const int len);
SRSLTE_API void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const float scale, const int len);
SRSLTE_API void srslte_vec_convert_fb_simd(const float *x, int8_t *z, const float scale, const int len);
SRSLTE_API void srslte_vec_cp_simd(const cf_t *src, cf_t *dst, int len);
SRSLTE_API void srslte_vec_interleave_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len);

@ -140,7 +140,7 @@ class radio {
srslte_rf_t rf_device;
const static uint32_t burst_preamble_max_samples = 30720000; // 30.72 MHz is maximum frequency
const static uint32_t burst_preamble_max_samples = 13824;
double burst_preamble_sec;// Start of burst preamble time (off->on RF transition time)
srslte_timestamp_t end_of_burst_time;
bool is_start_of_burst;

@ -61,6 +61,7 @@ public:
void add_bearer(uint32_t lcid, srslte_pdcp_config_t cnfg = srslte_pdcp_config_t());
void add_bearer_mrb(uint32_t lcid, srslte_pdcp_config_t cnfg = srslte_pdcp_config_t());
void del_bearer(uint32_t lcid);
void change_lcid(uint32_t old_lcid, uint32_t new_lcid);
void config_security(uint32_t lcid,
uint8_t *k_enc,
uint8_t *k_int,

@ -56,7 +56,7 @@ public:
log *rlc_log_,
mac_interface_timers *mac_timers_,
uint32_t lcid_,
int buffer_size = -1); // -1 to use default buffer sizes
int buffer_size_ = -1); // -1 to use default buffer sizes
void stop();
void get_metrics(rlc_metrics_t &m);
@ -81,12 +81,15 @@ public:
// RRC interface
void reestablish();
void reestablish(uint32_t lcid);
void reset();
void empty_queue();
void add_bearer(uint32_t lcid);
void add_bearer(uint32_t lcid, srslte_rlc_config_t cnfg);
void add_bearer_mrb(uint32_t lcid);
void del_bearer(uint32_t lcid);
void del_bearer_mrb(uint32_t lcid);
void change_lcid(uint32_t old_lcid, uint32_t new_lcid);
private:
void reset_metrics();

@ -69,14 +69,14 @@ struct rlc_amd_retx_t{
class rlc_am : public rlc_common
{
public:
rlc_am(uint32_t queue_len = 16);
rlc_am(uint32_t queue_len = 128);
~rlc_am();
void init(log *rlc_entity_log_,
void init(log *log_,
uint32_t lcid_,
srsue::pdcp_interface_rlc *pdcp_,
srsue::rrc_interface_rlc *rrc_,
mac_interface_timers *mac_timers);
bool configure(srslte_rlc_config_t cnfg);
mac_interface_timers *mac_timers_);
bool configure(srslte_rlc_config_t cfg_);
void reestablish();
void stop();
@ -100,39 +100,57 @@ public:
private:
byte_buffer_pool *pool;
srslte::log *log;
uint32_t lcid;
srsue::pdcp_interface_rlc *pdcp;
srsue::rrc_interface_rlc *rrc;
// Transmitter sub-class
class rlc_am_tx : public timer_callback
{
public:
rlc_am_tx(rlc_am *parent_, uint32_t queue_len_);
~rlc_am_tx();
// TX SDU buffers
rlc_tx_queue tx_sdu_queue;
byte_buffer_t *tx_sdu;
void init();
bool configure(srslte_rlc_am_config_t cfg_);
// PDU being resegmented
rlc_amd_tx_pdu_t tx_pdu_segments;
void empty_queue();
void reestablish();
void stop();
// Tx and Rx windows
std::map<uint32_t, rlc_amd_tx_pdu_t> tx_window;
std::deque<rlc_amd_retx_t> retx_queue;
std::map<uint32_t, rlc_amd_rx_pdu_t> rx_window;
std::map<uint32_t, rlc_amd_rx_pdu_segments_t> rx_segments;
void write_sdu(byte_buffer_t *sdu, bool blocking);
int read_pdu(uint8_t *payload, uint32_t nof_bytes);
// RX SDU buffers
byte_buffer_t *rx_sdu;
uint32_t get_buffer_state();
uint32_t get_total_buffer_state();
uint32_t get_num_tx_bytes();
void reset_metrics();
// Mutexes
pthread_mutex_t mutex;
// Timeout callback interface
void timer_expired(uint32_t timeout_id);
bool tx_enabled;
bool poll_received;
bool do_status;
rlc_status_pdu_t status;
// Interface for Rx subclass
void handle_control_pdu(uint8_t *payload, uint32_t nof_bytes);
// Metrics
uint32_t num_tx_bytes;
uint32_t num_rx_bytes;
private:
int build_status_pdu(uint8_t *payload, uint32_t nof_bytes);
int build_retx_pdu(uint8_t *payload, uint32_t nof_bytes);
int build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t retx);
int build_data_pdu(uint8_t *payload, uint32_t nof_bytes);
void debug_state();
bool retx_queue_has_sn(uint32_t sn);
int required_buffer_size(rlc_amd_retx_t retx);
void retransmit_random_pdu();
// Timer checks
bool status_prohibited;
// Helpers
bool poll_required();
bool do_status();
rlc_am *parent;
byte_buffer_pool *pool;
srslte::log *log;
/****************************************************************************
* Configurable parameters
@ -141,6 +159,12 @@ private:
srslte_rlc_am_config_t cfg;
// TX SDU buffers
rlc_tx_queue tx_sdu_queue;
byte_buffer_t *tx_sdu;;
bool tx_enabled;
/****************************************************************************
* State variables and counters
* Ref: 3GPP TS 36.322 v10.0.0 Section 7
@ -156,53 +180,125 @@ private:
uint32_t pdu_without_poll;
uint32_t byte_without_poll;
// Rx state variables
uint32_t vr_r; // Receive state. SN following last in-sequence received PDU. Low edge of rx window
uint32_t vr_mr; // Max acceptable receive state. High edge of rx window. vr_r + window size.
uint32_t vr_x; // t_reordering state. SN following PDU which triggered t_reordering.
uint32_t vr_ms; // Max status tx state. Highest possible value of SN for ACK_SN in status PDU.
uint32_t vr_h; // Highest rx state. SN following PDU with highest SN among rxed PDUs.
rlc_status_pdu_t tx_status;
/****************************************************************************
* Timers
* Ref: 3GPP TS 36.322 v10.0.0 Section 7
***************************************************************************/
timeout poll_retx_timeout;
timeout reordering_timeout;
timeout status_prohibit_timeout;
static const int reordering_timeout_id = 1;
srslte::timers::timer *poll_retx_timer;
uint32_t poll_retx_timer_id;
static const int poll_periodicity = 8; // After how many data PDUs a status PDU shall be requested
srslte::timers::timer *status_prohibit_timer;
uint32_t status_prohibit_timer_id;
// Timer checks
bool status_prohibited();
bool poll_retx();
void check_reordering_timeout();
// Tx windows
std::map<uint32_t, rlc_amd_tx_pdu_t> tx_window;
std::deque<rlc_amd_retx_t> retx_queue;
// Helpers
bool poll_required();
// Mutexes
pthread_mutex_t mutex;
int prepare_status();
int build_status_pdu(uint8_t *payload, uint32_t nof_bytes);
int build_retx_pdu(uint8_t *payload, uint32_t nof_bytes);
int build_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_retx_t retx);
int build_data_pdu(uint8_t *payload, uint32_t nof_bytes);
// Metrics
uint32_t num_tx_bytes;
};
// Receiver sub-class
class rlc_am_rx : public timer_callback
{
public:
rlc_am_rx(rlc_am* parent_);
~rlc_am_rx();
void init();
bool configure(srslte_rlc_am_config_t cfg_);
void reestablish();
void stop();
void write_pdu(uint8_t *payload, uint32_t nof_bytes);
uint32_t get_num_rx_bytes();
void reset_metrics();
// Timeout callback interface
void timer_expired(uint32_t timeout_id);
// Functions needed by Tx subclass to query rx state
int get_status(rlc_status_pdu_t* status);
bool get_do_status();
void reset_status(); // called when status PDU has been sent
private:
void handle_data_pdu(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header);
void handle_data_pdu_segment(uint8_t *payload, uint32_t nof_bytes, rlc_amd_pdu_header_t &header);
void handle_control_pdu(uint8_t *payload, uint32_t nof_bytes);
void reassemble_rx_sdus();
bool inside_tx_window(uint16_t sn);
bool inside_rx_window(uint16_t sn);
void debug_state();
void print_rx_segments();
bool add_segment_and_check(rlc_amd_rx_pdu_segments_t *pdu, rlc_amd_rx_pdu_t *segment);
int required_buffer_size(rlc_amd_retx_t retx);
bool retx_queue_has_sn(uint32_t sn);
rlc_am *parent;
byte_buffer_pool *pool;
srslte::log *log;
/****************************************************************************
* Configurable parameters
* Ref: 3GPP TS 36.322 v10.0.0 Section 7
***************************************************************************/
srslte_rlc_am_config_t cfg;
// RX SDU buffers
byte_buffer_t *rx_sdu;
/****************************************************************************
* State variables and counters
* Ref: 3GPP TS 36.322 v10.0.0 Section 7
***************************************************************************/
// Rx state variables
uint32_t vr_r; // Receive state. SN following last in-sequence received PDU. Low edge of rx window
uint32_t vr_mr; // Max acceptable receive state. High edge of rx window. vr_r + window size.
uint32_t vr_x; // t_reordering state. SN following PDU which triggered t_reordering.
uint32_t vr_ms; // Max status tx state. Highest possible value of SN for ACK_SN in status PDU.
uint32_t vr_h; // Highest rx state. SN following PDU with highest SN among rxed PDUs.
// Mutexes
pthread_mutex_t mutex;
// Rx windows
std::map<uint32_t, rlc_amd_rx_pdu_t> rx_window;
std::map<uint32_t, rlc_amd_rx_pdu_segments_t> rx_segments;
// Metrics
uint32_t num_rx_bytes;
bool poll_received;
bool do_status;
/****************************************************************************
* Timers
* Ref: 3GPP TS 36.322 v10.0.0 Section 7
***************************************************************************/
srslte::timers::timer *reordering_timer;
uint32_t reordering_timer_id;
};
// Rx and Tx objects
rlc_am_tx tx;
rlc_am_rx rx;
// Common variables needed/provided by parent class
srsue::rrc_interface_rlc *rrc;
srslte::log *log;
srsue::pdcp_interface_rlc *pdcp;
mac_interface_timers *mac_timers;
uint32_t lcid;
srslte_rlc_am_config_t cfg;
std::string rb_name;
static const int poll_periodicity = 8; // After how many data PDUs a status PDU shall be requested
};
/****************************************************************************

@ -27,13 +27,15 @@
#ifndef SRSLTE_RLC_METRICS_H
#define SRSLTE_RLC_METRICS_H
#include "srslte/common/common.h"
namespace srslte {
struct rlc_metrics_t
{
float dl_tput_mbps;
float ul_tput_mbps;
float dl_tput_mbps[SRSLTE_N_RADIO_BEARERS];
float ul_tput_mbps[SRSLTE_N_RADIO_BEARERS];
float dl_tput_mrb_mbps[SRSLTE_N_MCH_LCIDS];
};
} // namespace srslte

@ -48,7 +48,7 @@ class rlc_um
:public rlc_common
{
public:
rlc_um(uint32_t queue_len = 32);
rlc_um(uint32_t queue_len = 128);
~rlc_um();
void init(log *rlc_entity_log_,
uint32_t lcid_,

@ -59,6 +59,10 @@ log_filter::log_filter(std::string layer, logger *logger_, bool tti)
init(layer, logger_, tti);
}
log_filter::~log_filter()
{
}
void log_filter::init(std::string layer, logger *logger_, bool tti)
{
service_name = layer;

@ -53,6 +53,7 @@ bool threads_new_rt_cpu(pthread_t *thread, void *(*start_routine) (void*), void
cpu_set_t cpuset;
bool attr_enable = false;
#ifdef PER_THREAD_PRIO
if (prio_offset >= 0) {
param.sched_priority = sched_get_priority_max(SCHED_FIFO) - prio_offset;
pthread_attr_init(&attr);
@ -82,6 +83,25 @@ bool threads_new_rt_cpu(pthread_t *thread, void *(*start_routine) (void*), void
}
attr_enable = true;
} else if (prio_offset == -2) {
#else
// All threads have normal priority except prio_offset=0,1,2,3,4
if (prio_offset >= 0 && prio_offset < 5) {
param.sched_priority = 50-prio_offset;
pthread_attr_init(&attr);
if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED)) {
perror("pthread_attr_setinheritsched");
}
if (pthread_attr_setschedpolicy(&attr, SCHED_FIFO)) {
perror("pthread_attr_setschedpolicy");
}
if (pthread_attr_setschedparam(&attr, &param)) {
perror("pthread_attr_setschedparam");
fprintf(stderr, "Error not enough privileges to set Scheduling priority\n");
}
attr_enable = true;
} else {
#endif
param.sched_priority = 0;
pthread_attr_init(&attr);
if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED)) {

@ -139,6 +139,7 @@ int srslte_sequence_LTE_pr(srslte_sequence_t *q, uint32_t len, uint32_t seed) {
for (int i=0;i<len;i++) {
q->c_float[i] = (1-2*q->c[i]);
q->c_short[i] = (int16_t) q->c_float[i];
q->c_char[i] = (int8_t) q->c_float[i];;
}
return SRSLTE_SUCCESS;
}
@ -164,6 +165,10 @@ int srslte_sequence_init(srslte_sequence_t *q, uint32_t len) {
if (!q->c_short) {
return SRSLTE_ERROR;
}
q->c_char = srslte_vec_malloc(len * sizeof(int8_t));
if (!q->c_char) {
return SRSLTE_ERROR;
}
q->max_len = len;
}
return SRSLTE_SUCCESS;
@ -182,6 +187,9 @@ void srslte_sequence_free(srslte_sequence_t *q) {
if (q->c_short) {
free(q->c_short);
}
if (q->c_char) {
free(q->c_char);
}
bzero(q, sizeof(srslte_sequence_t));
}

@ -417,7 +417,8 @@ int srslte_enb_dl_put_pdcch_dl(srslte_enb_dl_t *q, srslte_ra_dl_dci_t *grant,
srslte_dci_format_t format, srslte_dci_location_t location,
uint16_t rnti, uint32_t sf_idx)
{
srslte_dci_msg_t dci_msg = {};
srslte_dci_msg_t dci_msg;
bzero(&dci_msg, sizeof(dci_msg));
bool rnti_is_user = true;
if (rnti == SRSLTE_SIRNTI || rnti == SRSLTE_PRNTI || (rnti >= SRSLTE_RARNTI_START && rnti <= SRSLTE_RARNTI_END)) {
@ -439,7 +440,8 @@ int srslte_enb_dl_put_pdcch_ul(srslte_enb_dl_t *q, srslte_ra_ul_dci_t *grant,
srslte_dci_location_t location,
uint16_t rnti, uint32_t sf_idx)
{
srslte_dci_msg_t dci_msg = {};
srslte_dci_msg_t dci_msg;
bzero(&dci_msg, sizeof(dci_msg));
srslte_dci_msg_pack_pusch(grant, &dci_msg, q->cell.nof_prb);
if (srslte_pdcch_encode(&q->pdcch, &dci_msg, location, rnti, q->sf_symbols, sf_idx, q->cfi)) {

@ -76,7 +76,7 @@ int srslte_enb_ul_init(srslte_enb_ul_t *q,
srslte_ofdm_set_normalize(&q->fft, false);
srslte_ofdm_set_freq_shift(&q->fft, -0.5);
if (srslte_pucch_init(&q->pucch)) {
if (srslte_pucch_init_enb(&q->pucch)) {
fprintf(stderr, "Error creating PUCCH object\n");
goto clean_exit;
}

@ -43,13 +43,15 @@
#endif
#ifdef LV_HAVE_SSE
#include <smmintrin.h>
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
#include <x86intrin.h>
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
int srslte_rm_turbo_rx_lut_sse_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
#endif
#ifdef LV_HAVE_AVX
#include <immintrin.h>
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
#include <x86intrin.h>
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
int srslte_rm_turbo_rx_lut_avx_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
#endif
#define NCOLS 32
@ -69,9 +71,28 @@ static int k0_vec[SRSLTE_NOF_TC_CB_SIZES][4][2];
static bool rm_turbo_tables_generated = false;
// Store deinterleaver version for sub-block turbo decoder
#if SRSLTE_TDEC_EXPECT_INPUT_SB == 1
// Prepare bit for sub-block decoder processing. These are the nof subblock sizes
#define NOF_DEINTER_TABLE_SB_IDX 3
const static int deinter_table_sb_idx[NOF_DEINTER_TABLE_SB_IDX] = {8, 16, 32};
int deinter_table_idx_from_sb_len(uint32_t nof_subblocks) {
for (int i=0;i<NOF_DEINTER_TABLE_SB_IDX;i++) {
if (deinter_table_sb_idx[i] == nof_subblocks) {
return i;
}
}
if (nof_subblocks != 0) {
fprintf(stderr, "Error number of sub-blocks %d not supported in RM\n", nof_subblocks);
}
return -1;
}
static uint16_t deinterleaver_sb[NOF_DEINTER_TABLE_SB_IDX][192][4][18448];
#endif
static uint16_t temp_table1[3*6176], temp_table2[3*6176];
void srslte_rm_turbo_gentable_systematic(uint16_t *table_bits, int k0_vec[4][2], uint32_t nrows, int ndummy) {
static void srslte_rm_turbo_gentable_systematic(uint16_t *table_bits, int k0_vec[4][2], uint32_t nrows, int ndummy) {
bool last_is_null=true;
int k_b=0, buff_idx=0;
@ -96,7 +117,7 @@ void srslte_rm_turbo_gentable_systematic(uint16_t *table_bits, int k0_vec[4][2],
}
}
void srslte_rm_turbo_gentable_parity(uint16_t *table_parity, int k0_vec[4][2], int offset, uint16_t nrows, int ndummy) {
static void srslte_rm_turbo_gentable_parity(uint16_t *table_parity, int k0_vec[4][2], int offset, uint16_t nrows, int ndummy) {
bool last_is_null=true;
int k_b=0, buff_idx0=0;
@ -140,9 +161,7 @@ void srslte_rm_turbo_gentable_parity(uint16_t *table_parity, int k0_vec[4][2], i
}
}
void srslte_rm_turbo_gentable_receive(uint16_t *table, uint32_t cb_len, uint32_t rv_idx)
static void srslte_rm_turbo_gentable_receive(uint16_t *table, uint32_t cb_len, uint32_t rv_idx)
{
int nrows = (uint32_t) (cb_len / 3 - 1) / NCOLS + 1;
@ -217,6 +236,33 @@ void srslte_rm_turbo_gentable_receive(uint16_t *table, uint32_t cb_len, uint32_t
table[i] = temp_table2[temp_table1[i]];
}
}
#if SRSLTE_TDEC_EXPECT_INPUT_SB==1
#define inter(x,win) ((x%(long_cb/win))*(win)+x/(long_cb/win))
/* Prepare output for sliding window decoder:
* (0..long_cb-1) bits are systematic
* (long_cb..2*long_cb-1) are parity0
* (2*long_cb..3*long_cb-1) are parity1
* then tail bits
*
* Within each block, bits are interleaved every nof_sb
*/
static void interleave_table_sb(uint16_t *in, uint16_t *out, uint32_t cb_idx, uint32_t nof_sb)
{
int long_cb = srslte_cbsegm_cbsize(cb_idx);
int out_len = 3*long_cb+12;
for (int i=0;i<out_len;i++) {
// Do not change tail bit order
if (in[i] < 3*long_cb) {
// align to 32 bytes (warning: must be same alignment as in rm_turbo.c)
out[i] = (in[i]%3)*(long_cb+32)+inter(in[i]/3,nof_sb);
} else {
out[i] = (in[i]-3*long_cb)+3*(long_cb+32);
}
}
}
#endif
void srslte_rm_turbo_gentables() {
if (!rm_turbo_tables_generated) {
@ -246,6 +292,13 @@ void srslte_rm_turbo_gentables() {
for (int i = 0; i < 4; i++) {
srslte_rm_turbo_gentable_receive(deinterleaver[cb_idx][i], in_len, i);
#if SRSLTE_TDEC_EXPECT_INPUT_SB == 1
for (uint32_t s = 0; s < NOF_DEINTER_TABLE_SB_IDX; s++) {
interleave_table_sb(deinterleaver[cb_idx][i], deinterleaver_sb[s][cb_idx][i], cb_idx,
deinter_table_sb_idx[s]);
}
#endif
}
}
}
@ -322,6 +375,10 @@ int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity
}
}
int srslte_rm_turbo_rx_lut(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
{
return srslte_rm_turbo_rx_lut_(input, output, in_len, cb_idx, rv_idx, true);
}
/**
* Undoes rate matching for LTE Turbo Coder. Expands rate matched buffer to full size buffer.
*
@ -331,37 +388,93 @@ int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity
* @param[in] rv_idx Redundancy Version from DCI control message
* @return Error code
*/
int srslte_rm_turbo_rx_lut(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
int srslte_rm_turbo_rx_lut_(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx, bool enable_input_tdec)
{
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
#if SRSLTE_TDEC_EXPECT_INPUT_SB == 1
int cb_len=srslte_cbsegm_cbsize(cb_idx);
int idx = deinter_table_idx_from_sb_len(srslte_tdec_autoimp_get_subblocks(cb_len));
uint16_t *deinter = NULL;
if (idx < 0 || !enable_input_tdec) {
deinter = deinterleaver[cb_idx][rv_idx];
} else if (idx < NOF_DEINTER_TABLE_SB_IDX) {
deinter = deinterleaver_sb[idx][cb_idx][rv_idx];
} else {
fprintf(stderr, "Sub-block size index %d not supported in srslte_rm_turbo_rx_lut()\n", idx);
return -1;
}
#else
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
#endif
#ifdef LV_HAVE_AVX
return srslte_rm_turbo_rx_lut_avx(input, output, in_len, cb_idx, rv_idx);
return srslte_rm_turbo_rx_lut_avx(input, output, deinter, in_len, cb_idx, rv_idx);
#else
#ifdef LV_HAVE_SSE
return srslte_rm_turbo_rx_lut_sse(input, output, in_len, cb_idx, rv_idx);
return srslte_rm_turbo_rx_lut_sse(input, output, deinter, in_len, cb_idx, rv_idx);
#else
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
for (int i=0;i<in_len;i++) {
output[deinter[i%out_len]] += input[i];
}
return 0;
#endif
#endif
} else {
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
return SRSLTE_ERROR_INVALID_INPUTS;
}
}
int srslte_rm_turbo_rx_lut_8bit(int8_t *input, int8_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
{
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
#if SRSLTE_TDEC_EXPECT_INPUT_SB == 1
int cb_len=srslte_cbsegm_cbsize(cb_idx);
int idx = deinter_table_idx_from_sb_len(srslte_tdec_autoimp_get_subblocks_8bit(cb_len));
uint16_t *deinter = NULL;
if (idx < 0) {
deinter = deinterleaver[cb_idx][rv_idx];
} else if (idx < NOF_DEINTER_TABLE_SB_IDX) {
deinter = deinterleaver_sb[idx][cb_idx][rv_idx];
} else {
fprintf(stderr, "Sub-block size index %d not supported in srslte_rm_turbo_rx_lut()\n", idx);
return -1;
}
#else
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
#endif
// FIXME: AVX version of rm_turbo_rx_lut not working
// Warning: Need to check if 8-bit sse version is correct
#ifdef LV_HAVE_SSE
return srslte_rm_turbo_rx_lut_sse_8bit(input, output, deinter, in_len, cb_idx, rv_idx);
#else
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
for (int i=0;i<in_len;i++) {
output[deinter[i%out_len]] += input[i];
}
return 0;
#endif
} else {
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
return SRSLTE_ERROR_INVALID_INPUTS;
}
}
#ifdef LV_HAVE_SSE
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
{
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
const __m128i* xPtr = (const __m128i*) input;
const __m128i* lutPtr = (const __m128i*) deinter;
@ -427,6 +540,97 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len,
}
}
int srslte_rm_turbo_rx_lut_sse_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
{
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
const __m128i* xPtr = (const __m128i*) input;
const __m128i* lutPtr = (const __m128i*) deinter;
__m128i xVal, lutVal1, lutVal2;
/* Simplify load if we do not need to wrap (ie high rates) */
if (in_len <= out_len) {
for (int i=0;i<in_len/16;i++) {
xVal = _mm_loadu_si128(xPtr);
xPtr ++;
lutVal1 = _mm_loadu_si128(lutPtr);
lutPtr++;
lutVal2 = _mm_loadu_si128(lutPtr);
lutPtr ++;
for (int j=0;j<8;j++) {
int8_t x = (int8_t) _mm_extract_epi8(xVal, j);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal1, j);
output[l] += x;
}
for (int j=0;j<8;j++) {
int8_t x = (int8_t) _mm_extract_epi8(xVal, j+8);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal2, j);
output[l] += x;
}
}
for (int i=16*(in_len/16);i<in_len;i++) {
output[deinter[i%out_len]] += input[i];
}
} else {
int intCnt = 16;
int inputCnt = 0;
int nwrapps = 0;
while(inputCnt < in_len - 16) {
xVal = _mm_loadu_si128(xPtr);
xPtr ++;
lutVal1 = _mm_loadu_si128(lutPtr);
lutPtr++;
lutVal2 = _mm_loadu_si128(lutPtr);
lutPtr ++;
for (int j=0;j<8;j++) {
int8_t x = (int8_t) _mm_extract_epi8(xVal, j);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal1, j);
output[l] += x;
}
for (int j=0;j<8;j++) {
int8_t x = (int8_t) _mm_extract_epi8(xVal, j+8);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal2, j);
output[l] += x;
}
intCnt += 16;
inputCnt += 16;
if (intCnt >= out_len && inputCnt < in_len - 16) {
/* Copy last elements */
if ((out_len%16) == 12) {
for (int j=(nwrapps+1)*out_len-12;j<(nwrapps+1)*out_len;j++) {
output[deinter[j%out_len]] += input[j];
inputCnt++;
}
} else {
for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) {
output[deinter[j%out_len]] += input[j];
inputCnt++;
}
}
/* And wrap pointers */
nwrapps++;
intCnt = 16;
xPtr = (const __m128i*) &input[nwrapps*out_len];
lutPtr = (const __m128i*) deinter;
}
}
for (int i=inputCnt;i<in_len;i++) {
output[deinter[i%out_len]] += input[i];
}
}
return 0;
} else {
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
return SRSLTE_ERROR_INVALID_INPUTS;
}
}
#endif
@ -436,12 +640,10 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len,
l = (uint16_t) _mm256_extract_epi16(lutVal, j);\
output[l] += x;
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
{
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
const __m256i* xPtr = (const __m256i*) input;
const __m256i* lutPtr = (const __m256i*) deinter;
@ -539,6 +741,155 @@ int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len,
}
}
#define SAVE_OUTPUT8(j) x = (int8_t) _mm256_extract_epi8(xVal, j);\
l = (uint16_t) _mm256_extract_epi16(lutVal1, j);\
output[l] += x;
#define SAVE_OUTPUT8_2(j) x = (int8_t) _mm256_extract_epi8(xVal, j+8);\
l = (uint16_t) _mm256_extract_epi16(lutVal2, j);\
output[l] += x;
int srslte_rm_turbo_rx_lut_avx_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
{
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
const __m256i* xPtr = (const __m256i*) input;
const __m256i* lutPtr = (const __m256i*) deinter;
__m256i xVal, lutVal1, lutVal2;
int8_t x;
uint16_t l;
/* Simplify load if we do not need to wrap (ie high rates) */
if (in_len <= out_len) {
for (int i=0;i<in_len/32;i++) {
xVal = _mm256_loadu_si256(xPtr);
xPtr ++;
lutVal1 = _mm256_loadu_si256(lutPtr);
lutPtr ++;
lutVal2 = _mm256_loadu_si256(lutPtr);
lutPtr ++;
SAVE_OUTPUT8(0);
SAVE_OUTPUT8(1);
SAVE_OUTPUT8(2);
SAVE_OUTPUT8(3);
SAVE_OUTPUT8(4);
SAVE_OUTPUT8(5);
SAVE_OUTPUT8(6);
SAVE_OUTPUT8(7);
SAVE_OUTPUT8(8);
SAVE_OUTPUT8(9);
SAVE_OUTPUT8(10);
SAVE_OUTPUT8(11);
SAVE_OUTPUT8(12);
SAVE_OUTPUT8(13);
SAVE_OUTPUT8(14);
SAVE_OUTPUT8(15);
SAVE_OUTPUT8_2(0);
SAVE_OUTPUT8_2(1);
SAVE_OUTPUT8_2(2);
SAVE_OUTPUT8_2(3);
SAVE_OUTPUT8_2(4);
SAVE_OUTPUT8_2(5);
SAVE_OUTPUT8_2(6);
SAVE_OUTPUT8_2(7);
SAVE_OUTPUT8_2(8);
SAVE_OUTPUT8_2(9);
SAVE_OUTPUT8_2(10);
SAVE_OUTPUT8_2(11);
SAVE_OUTPUT8_2(12);
SAVE_OUTPUT8_2(13);
SAVE_OUTPUT8_2(14);
SAVE_OUTPUT8_2(15);
}
for (int i=32*(in_len/32);i<in_len;i++) {
output[deinter[i%out_len]] += input[i];
}
} else {
printf("wraps not implemented!\n");
#ifdef kk
int intCnt = 32;
int inputCnt = 0;
int nwrapps = 0;
while(inputCnt < in_len - 32) {
xVal = _mm256_loadu_si256(xPtr);
xPtr ++;
lutVal1 = _mm256_loadu_si256(lutPtr);
lutPtr ++;
lutVal2 = _mm256_loadu_si256(lutPtr);
lutPtr ++;
SAVE_OUTPUT8(0);
SAVE_OUTPUT8(1);
SAVE_OUTPUT8(2);
SAVE_OUTPUT8(3);
SAVE_OUTPUT8(4);
SAVE_OUTPUT8(5);
SAVE_OUTPUT8(6);
SAVE_OUTPUT8(7);
SAVE_OUTPUT8(8);
SAVE_OUTPUT8(9);
SAVE_OUTPUT8(10);
SAVE_OUTPUT8(11);
SAVE_OUTPUT8(12);
SAVE_OUTPUT8(13);
SAVE_OUTPUT8(14);
SAVE_OUTPUT8(15);
SAVE_OUTPUT8_2(0);
SAVE_OUTPUT8_2(1);
SAVE_OUTPUT8_2(2);
SAVE_OUTPUT8_2(3);
SAVE_OUTPUT8_2(4);
SAVE_OUTPUT8_2(5);
SAVE_OUTPUT8_2(6);
SAVE_OUTPUT8_2(7);
SAVE_OUTPUT8_2(8);
SAVE_OUTPUT8_2(9);
SAVE_OUTPUT8_2(10);
SAVE_OUTPUT8_2(11);
SAVE_OUTPUT8_2(12);
SAVE_OUTPUT8_2(13);
SAVE_OUTPUT8_2(14);
SAVE_OUTPUT8_2(15);
intCnt += 32;
inputCnt += 32;
if (intCnt >= out_len && inputCnt < in_len - 32) {
printf("warning rate matching wrapping remainder %d\n", out_len%32);
/* Copy last elements */
for (int j=(nwrapps+1)*out_len-(out_len%32) ;j<(nwrapps+1)*out_len;j++) {
output[deinter[j%out_len]] += input[j];
inputCnt++;
}
/* And wrap pointers */
nwrapps++;
intCnt = 32;
xPtr = (const __m256i*) &input[nwrapps*out_len];
lutPtr = (const __m256i*) deinter;
}
}
for (int i=inputCnt;i<in_len;i++) {
output[deinter[i%out_len]] += input[i];
}
#endif
}
return 0;
} else {
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
return SRSLTE_ERROR_INVALID_INPUTS;
}
}
#endif

@ -66,7 +66,16 @@ const uint32_t f2_list[SRSLTE_NOF_TC_CB_SIZES] = { 10, 12, 42, 16, 18, 20, 22, 2
280, 142, 480, 146, 444, 120, 152, 462, 234, 158, 80, 96, 902, 166, 336,
170, 86, 174, 176, 178, 120, 182, 184, 186, 94, 190, 480 };
int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h, uint32_t long_cb) {
int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h, uint32_t long_cb)
{
return srslte_tc_interl_LTE_gen_interl(h, long_cb, 1);
}
#define deinter(x,win) ((x%(long_cb/win))*(win)+x/(long_cb/win))
#define inter(x,win) ((x%win)*(long_cb/win)+x/win)
int srslte_tc_interl_LTE_gen_interl(srslte_tc_interl_t *h, uint32_t long_cb, uint32_t interl_win) {
uint32_t cb_table_idx, f1, f2;
uint64_t i, j;
@ -92,6 +101,19 @@ int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h, uint32_t long_cb) {
h->forward[i] = (uint32_t) j;
h->reverse[j] = (uint32_t) i;
}
if (interl_win != 1) {
uint16_t *f = malloc(long_cb*sizeof(uint16_t));
uint16_t *r = malloc(long_cb*sizeof(uint16_t));
memcpy(f, h->forward, long_cb*sizeof(uint16_t));
memcpy(r, h->reverse, long_cb*sizeof(uint16_t));
for (i = 0; i < long_cb; i++) {
h->forward[i] = deinter(f[inter(i,interl_win)],interl_win);
h->reverse[i] = deinter(r[inter(i,interl_win)],interl_win);
}
free(f);
free(r);
}
return 0;
}

@ -30,7 +30,6 @@
#include <stdint.h>
#include "srslte/phy/fec/tc_interl.h"
#include "srslte/phy/fec/turbocoder.h"
#define TURBO_SRSLTE_TCOD_RATE 3

@ -183,7 +183,7 @@ int main(int argc, char **argv) {
srslte_rm_turbo_rx(buff_f, BUFFSZ, rm_bits_f, nof_e_bits, bits_f, long_cb_enc, rv_idx, 0);
bzero(bits2_s, long_cb_enc*sizeof(short));
srslte_rm_turbo_rx_lut(rm_bits_s, bits2_s, nof_e_bits, cb_idx, rv_idx);
srslte_rm_turbo_rx_lut_(rm_bits_s, bits2_s, nof_e_bits, cb_idx, rv_idx, false);
for (int i=0;i<long_cb_enc;i++) {
if (bits_f[i] != bits2_s[i]) {

@ -96,8 +96,16 @@ int main(int argc, char **argv) {
}
}
/* Create CRC for Transport Block, it is not currently used but it is required */
srslte_crc_t crc_tb;
bzero(&crc_tb, sizeof(crc_tb));
if (srslte_crc_init(&crc_tb, SRSLTE_LTE_CRC24A, 24)) {
printf("error initialising CRC\n");
exit(-1);
}
srslte_tcod_encode(&tcod, input_bits, output_bits, long_cb);
srslte_tcod_encode_lut(&tcod, NULL, input_bytes, parity, len);
srslte_tcod_encode_lut(&tcod, &crc_tb, NULL, input_bytes, parity, len, false);
srslte_bit_unpack_vector(parity, parity_bits, 2*(long_cb+4));

@ -52,12 +52,14 @@ int test_known_data = 0;
int test_errors = 0;
int nof_repetitions = 1;
srslte_tdec_impl_type_t tdec_type;
#define SNR_POINTS 4
#define SNR_MIN 1.0
#define SNR_MAX 8.0
void usage(char *prog) {
printf("Usage: %s [nlesv]\n", prog);
printf("Usage: %s [kcinNledts]\n", prog);
printf(
"\t-k Test with known data (ignores frame_length) [Default disabled]\n");
printf("\t-c nof_cb in parallel [Default %d]\n", nof_cb);
@ -66,13 +68,14 @@ void usage(char *prog) {
printf("\t-N nof_repetitions [Default %d]\n", nof_repetitions);
printf("\t-l frame_length [Default %d]\n", frame_length);
printf("\t-e ebno in dB [Default scan]\n");
printf("\t-d Decoder implementation type: 0: Generic, 1: SSE, 2: SSE-window\n");
printf("\t-t test: check errors on exit [Default disabled]\n");
printf("\t-s seed [Default 0=time]\n");
}
void parse_args(int argc, char **argv) {
int opt;
while ((opt = getopt(argc, argv, "cinNlstvekt")) != -1) {
while ((opt = getopt(argc, argv, "kcinNledts")) != -1) {
switch (opt) {
case 'c':
nof_cb = atoi(argv[optind]);
@ -95,6 +98,9 @@ void parse_args(int argc, char **argv) {
case 'l':
frame_length = atoi(argv[optind]);
break;
case 'd':
tdec_type = (srslte_tdec_impl_type_t) atoi(argv[optind]);
break;
case 'e':
ebno_db = atof(argv[optind]);
break;
@ -117,7 +123,7 @@ int main(int argc, char **argv) {
float *llr;
short *llr_s;
uint8_t *llr_c;
uint8_t *data_tx, *data_rx, *data_rx_bytes[SRSLTE_TDEC_MAX_NPAR], *symbols;
uint8_t *data_tx, *data_rx, *data_rx_bytes, *symbols;
uint32_t i, j;
float var[SNR_POINTS];
uint32_t snr_points;
@ -159,13 +165,11 @@ int main(int argc, char **argv) {
perror("malloc");
exit(-1);
}
for (int cb=0;cb<SRSLTE_TDEC_MAX_NPAR;cb++) {
data_rx_bytes[cb] = srslte_vec_malloc(frame_length * sizeof(uint8_t));
if (!data_rx_bytes[cb]) {
data_rx_bytes = srslte_vec_malloc(frame_length * sizeof(uint8_t));
if (!data_rx_bytes) {
perror("malloc");
exit(-1);
}
}
symbols = srslte_vec_malloc(coded_length * sizeof(uint8_t));
if (!symbols) {
@ -193,11 +197,13 @@ int main(int argc, char **argv) {
exit(-1);
}
if (srslte_tdec_init(&tdec, frame_length)) {
if (srslte_tdec_init_manual(&tdec, frame_length, tdec_type)) {
fprintf(stderr, "Error initiating Turbo decoder\n");
exit(-1);
}
srslte_tdec_force_not_sb(&tdec);
float ebno_inc, esno_db;
ebno_inc = (SNR_MAX - SNR_MIN) / SNR_POINTS;
if (ebno_db == 100.0) {
@ -246,7 +252,7 @@ int main(int argc, char **argv) {
}
/* decoder */
srslte_tdec_reset(&tdec, frame_length);
srslte_tdec_new_cb(&tdec, frame_length);
uint32_t t;
if (nof_iterations == -1) {
@ -255,36 +261,21 @@ int main(int argc, char **argv) {
t = nof_iterations;
}
int16_t *input[SRSLTE_TDEC_MAX_NPAR];
uint8_t *output[SRSLTE_TDEC_MAX_NPAR];
for (int n=0;n<SRSLTE_TDEC_MAX_NPAR;n++) {
if (n < nof_cb) {
input[n] = llr_s;
output[n] = data_rx_bytes[n];
} else {
input[n] = NULL;
output[n] = NULL;
}
}
gettimeofday(&tdata[1], NULL);
for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_run_all_par(&tdec, input, output, t, frame_length);
srslte_tdec_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length);
}
gettimeofday(&tdata[2], NULL);
get_time_interval(tdata);
mean_usec = (float) mean_usec * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
mean_usec = (tdata[0].tv_sec*1e6+tdata[0].tv_usec)/nof_repetitions;
frame_cnt++;
uint32_t errors_this = 0;
for (int cb=0;cb<nof_cb;cb++) {
srslte_bit_unpack_vector(data_rx_bytes[cb], data_rx, frame_length);
srslte_bit_unpack_vector(data_rx_bytes, data_rx, frame_length);
errors_this=srslte_bit_diff(data_tx, data_rx, frame_length);
//printf("error[%d]=%d\n", cb, errors_this);
errors += errors_this;
}
printf("Eb/No: %2.2f %10d/%d ", SNR_MIN + i * ebno_inc, frame_cnt, nof_frames);
printf("BER: %.2e ", (float) errors / (nof_cb*frame_cnt * frame_length));
printf("%3.1f Mbps (%6.2f usec)", (float) (nof_cb*frame_length) / mean_usec, mean_usec);
@ -301,10 +292,8 @@ int main(int argc, char **argv) {
}
}
for (int cb=0;cb<SRSLTE_TDEC_MAX_NPAR;cb++) {
if (data_rx_bytes[cb]) {
free(data_rx_bytes[cb]);
}
if (data_rx_bytes) {
free(data_rx_bytes);
}
free(data_tx);
free(symbols);

@ -193,7 +193,13 @@ int srslte_tcod_encode(srslte_tcod_t *h, uint8_t *input, uint8_t *output, uint32
}
/* Expects bytes and produces bytes. The systematic and parity bits are interlaced in the output */
int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input, uint8_t *parity, uint32_t cblen_idx)
int srslte_tcod_encode_lut(srslte_tcod_t *h,
srslte_crc_t *crc_tb,
srslte_crc_t *crc_cb,
uint8_t *input,
uint8_t *parity,
uint32_t cblen_idx,
bool last_cb)
{
if (cblen_idx < 188) {
uint32_t long_cb = (uint32_t) srslte_cbsegm_cbsize(cblen_idx);
@ -204,20 +210,24 @@ int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input,
}
/* Reset CRC */
if (crc) {
srslte_crc_set_init(crc, 0);
if (crc_cb) {
srslte_crc_set_init(crc_cb, 0);
}
/* Parity bits for the 1st constituent encoders */
uint8_t state0 = 0;
if (crc) {
if (crc_cb) {
int block_size_nocrc = (long_cb - crc_cb->order - ((last_cb) ? crc_tb->order : 0)) / 8;
/* if CRC pointer is given */
for (int i = 0; i < (long_cb - crc->order) / 8; i++) {
for (int i = 0; i < block_size_nocrc; i++) {
uint8_t in = input[i];
/* Put byte in CRC and save latest checksum */
srslte_crc_checksum_put_byte(crc, in);
/* Put byte in TB CRC and save latest checksum */
srslte_crc_checksum_put_byte(crc_tb, in);
/* Put byte in CB CRC and save latest checksum */
srslte_crc_checksum_put_byte(crc_cb, in);
/* Run actual encoder */
tcod_lut_t l = tcod_lut[state0][in];
@ -225,10 +235,27 @@ int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input,
state0 = l.next_state;
}
uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc);
for (int i = 0; i < crc->order / 8; i++) {
int mask_shift = 8 * (crc->order / 8 - i - 1);
int idx = (long_cb - crc->order) / 8 + i;
if (last_cb) {
uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc_tb);
for (int i = 0; i < crc_tb->order / 8; i++) {
int mask_shift = 8 * (crc_tb->order / 8 - i - 1);
int idx = block_size_nocrc + i;
uint8_t in = (uint8_t) ((checksum >> mask_shift) & 0xff);
/* Put byte in CB CRC and save latest checksum */
srslte_crc_checksum_put_byte(crc_cb, in);
input[idx] = in;
tcod_lut_t l = tcod_lut[state0][in];
parity[idx] = l.output;
state0 = l.next_state;
}
}
uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc_cb);
for (int i = 0; i < crc_cb->order / 8; i++) {
int mask_shift = 8 * (crc_cb->order / 8 - i - 1);
int idx = (long_cb - crc_cb->order) / 8 + i;
uint8_t in = (uint8_t) ((checksum >> mask_shift) & 0xff);
input[idx] = in;
@ -239,11 +266,31 @@ int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input,
} else {
/* No CRC given */
for (uint32_t i = 0; i < long_cb / 8; i++) {
tcod_lut_t l = tcod_lut[state0][input[i]];
int block_size_nocrc = (long_cb - ((last_cb) ? crc_tb->order : 0)) / 8;
for (uint32_t i = 0; i < block_size_nocrc; i++) {
uint8_t in = input[i];
srslte_crc_checksum_put_byte(crc_tb, in);
tcod_lut_t l = tcod_lut[state0][in];
parity[i] = l.output;
state0 = l.next_state;
}
if (last_cb) {
uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc_tb);
for (int i = 0; i < crc_tb->order / 8; i++) {
int mask_shift = 8 * (crc_tb->order / 8 - i - 1);
int idx = block_size_nocrc + i;
uint8_t in = (uint8_t) ((checksum >> mask_shift) & 0xff);
input[idx] = in;
tcod_lut_t l = tcod_lut[state0][in];
parity[idx] = l.output;
state0 = l.next_state;
}
}
}
parity[long_cb / 8] = 0; // will put tail here later

@ -24,151 +24,543 @@
*
*/
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <strings.h>
#include <srslte/srslte.h>
#include "srslte/phy/utils/vector.h"
#include "srslte/phy/fec/turbodecoder.h"
#define debug_enabled 0
/* Generic (no SSE) implementation */
#include "srslte/phy/fec/turbodecoder_gen.h"
srslte_tdec_16bit_impl_t gen_impl = {
tdec_gen_init,
tdec_gen_free,
tdec_gen_dec,
tdec_gen_extract_input,
tdec_gen_decision_byte
};
/* SSE no-window implementation */
#include "srslte/phy/fec/turbodecoder_sse.h"
srslte_tdec_16bit_impl_t sse_impl = {
tdec_sse_init,
tdec_sse_free,
tdec_sse_dec,
tdec_sse_extract_input,
tdec_sse_decision_byte
};
/* SSE window implementation */
#ifdef LV_HAVE_SSE
#include "srslte/phy/fec/turbodecoder_simd.h"
#define WINIMP_IS_SSE16
#include "srslte/phy/fec/turbodecoder_win.h"
#undef WINIMP_IS_SSE16
srslte_tdec_16bit_impl_t sse16_win_impl = {
tdec_winsse16_init,
tdec_winsse16_free,
tdec_winsse16_dec,
tdec_winsse16_extract_input,
tdec_winsse16_decision_byte
};
#endif
#include "srslte/phy/utils/vector.h"
/* AVX window implementation */
#ifdef LV_HAVE_AVX2
#define WINIMP_IS_AVX16
#include "srslte/phy/fec/turbodecoder_win.h"
#undef WINIMP_IS_AVX16
srslte_tdec_16bit_impl_t avx16_win_impl = {
tdec_winavx16_init,
tdec_winavx16_free,
tdec_winavx16_dec,
tdec_winavx16_extract_input,
tdec_winavx16_decision_byte
};
#endif
/* SSE window implementation */
#ifdef LV_HAVE_SSE
#define WINIMP_IS_SSE8
#include "srslte/phy/fec/turbodecoder_win.h"
#undef WINIMP_IS_SSE8
srslte_tdec_8bit_impl_t sse8_win_impl = {
tdec_winsse8_init,
tdec_winsse8_free,
tdec_winsse8_dec,
tdec_winsse8_extract_input,
tdec_winsse8_decision_byte
};
#endif
/* AVX window implementation */
#ifdef LV_HAVE_AVX2
#define WINIMP_IS_AVX8
#include "srslte/phy/fec/turbodecoder_win.h"
#undef WINIMP_IS_AVX8
srslte_tdec_8bit_impl_t avx8_win_impl = {
tdec_winavx8_init,
tdec_winavx8_free,
tdec_winavx8_dec,
tdec_winavx8_extract_input,
tdec_winavx8_decision_byte
};
#endif
#define AUTO_16_SSE 0
#define AUTO_16_SSEWIN 1
#define AUTO_16_AVXWIN 2
#define AUTO_8_SSEWIN 0
#define AUTO_8_AVXWIN 1
// Include interfaces for 8 and 16 bit decoder implementations
#define LLR_IS_8BIT
#include "srslte/phy/fec/turbodecoder_iter.h"
#undef LLR_IS_8BIT
#define LLR_IS_16BIT
#include "srslte/phy/fec/turbodecoder_iter.h"
#undef LLR_IS_16BIT
int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_simd_init(&h->tdec_simd, SRSLTE_TDEC_MAX_NPAR, max_long_cb);
#else
h->input_conv = srslte_vec_malloc(sizeof(float) * (3*max_long_cb+12));
return srslte_tdec_init_manual(h, max_long_cb, SRSLTE_TDEC_AUTO);
}
uint32_t interleaver_idx(uint32_t nof_subblocks) {
switch (nof_subblocks) {
case 32:
return 3;
case 16:
return 2;
case 8:
return 1;
case 1:
return 0;
default:
return 0;
}
}
/* Initializes the turbo decoder object */
int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec_impl_type_t dec_type)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->dec_type = dec_type;
// Set manual
switch(dec_type) {
case SRSLTE_TDEC_AUTO:
break;
case SRSLTE_TDEC_SSE:
h->dec16[0] = &sse_impl;
h->current_llr_type = SRSLTE_TDEC_16;
break;
case SRSLTE_TDEC_SSE_WINDOW:
h->dec16[0] = &sse16_win_impl;
h->current_llr_type = SRSLTE_TDEC_16;
break;
case SRSLTE_TDEC_GENERIC:
h->dec16[0] = &gen_impl;
h->current_llr_type = SRSLTE_TDEC_16;
break;
case SRSLTE_TDEC_SSE8_WINDOW:
h->dec8[0] = &sse8_win_impl;
h->current_llr_type = SRSLTE_TDEC_8;
break;
#ifdef LV_HAVE_AVX2
case SRSLTE_TDEC_AVX_WINDOW:
h->dec16[0] = &avx16_win_impl;
h->current_llr_type = SRSLTE_TDEC_16;
break;
case SRSLTE_TDEC_AVX8_WINDOW:
h->dec8[0] = &avx8_win_impl;
h->current_llr_type = SRSLTE_TDEC_8;
break;
#endif
default:
fprintf(stderr, "Error decoder %d not supported\n", dec_type);
goto clean_and_exit;
}
h->max_long_cb = max_long_cb;
h->app1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->app2 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext2 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst0 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->syst0) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity0) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->input_conv = srslte_vec_malloc(sizeof(int16_t) * (len * 3+32*3));
if (!h->input_conv) {
perror("malloc");
return -1;
perror("srslte_vec_malloc");
goto clean_and_exit;
}
return srslte_tdec_gen_init(&h->tdec_gen, max_long_cb);
if (dec_type == SRSLTE_TDEC_AUTO) {
h->dec16[AUTO_16_SSE] = &sse_impl;
h->dec16[AUTO_16_SSEWIN] = &sse16_win_impl;
h->dec8[AUTO_8_SSEWIN] = &sse8_win_impl;
#ifdef LV_HAVE_AVX2
h->dec16[AUTO_16_AVXWIN] = &avx16_win_impl;
h->dec8[AUTO_8_AVXWIN] = &avx8_win_impl;
#endif
for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_16;td++) {
if (h->dec16[td]) {
if ((h->nof_blocks16[td] = h->dec16[td]->tdec_init(&h->dec16_hdlr[td], h->max_long_cb))<0) {
goto clean_and_exit;
}
}
}
for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_8;td++) {
if (h->dec8[td]) {
if ((h->nof_blocks8[td] = h->dec8[td]->tdec_init(&h->dec8_hdlr[td], h->max_long_cb))<0) {
goto clean_and_exit;
}
}
}
void srslte_tdec_free(srslte_tdec_t * h) {
#ifdef LV_HAVE_SSE
srslte_tdec_simd_free(&h->tdec_simd);
#else
// Compute 1 interleaver for each possible nof_subblocks (1, 8, 16 or 32)
for (int s=0;s<4;s++) {
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (srslte_tc_interl_init(&h->interleaver[s][i], srslte_cbsegm_cbsize(i)) < 0) {
goto clean_and_exit;
}
srslte_tc_interl_LTE_gen_interl(&h->interleaver[s][i], srslte_cbsegm_cbsize(i), s?(8<<(s-1)):1);
}
}
} else {
uint32_t nof_subblocks;
if (dec_type < SRSLTE_TDEC_SSE8_WINDOW) {
if ((h->nof_blocks16[0] = h->dec16[0]->tdec_init(&h->dec16_hdlr[0], h->max_long_cb))<0) {
goto clean_and_exit;
}
nof_subblocks = h->nof_blocks16[0];
} else {
if ((h->nof_blocks8[0] = h->dec8[0]->tdec_init(&h->dec8_hdlr[0], h->max_long_cb))<0) {
goto clean_and_exit;
}
nof_subblocks = h->nof_blocks8[0];
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (srslte_tc_interl_init(&h->interleaver[interleaver_idx(nof_subblocks)][i], srslte_cbsegm_cbsize(i)) < 0) {
goto clean_and_exit;
}
srslte_tc_interl_LTE_gen_interl(&h->interleaver[interleaver_idx(nof_subblocks)][i], srslte_cbsegm_cbsize(i), nof_subblocks);
}
}
h->current_cbidx = -1;
ret = 0;
clean_and_exit:
if (ret == -1) {
srslte_tdec_free(h);
}
return ret;
}
void srslte_tdec_free(srslte_tdec_t * h)
{
if (h->app1) {
free(h->app1);
}
if (h->app2) {
free(h->app2);
}
if (h->ext1) {
free(h->ext1);
}
if (h->ext2) {
free(h->ext2);
}
if (h->syst0) {
free(h->syst0);
}
if (h->parity0) {
free(h->parity0);
}
if (h->parity1) {
free(h->parity1);
}
if (h->input_conv) {
free(h->input_conv);
}
srslte_tdec_gen_free(&h->tdec_gen);
#endif
for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_8;td++) {
if (h->dec8[td] && h->dec8_hdlr[td]) {
h->dec8[td]->tdec_free(h->dec8_hdlr[td]);
}
}
for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_16;td++) {
if (h->dec16[td] && h->dec16_hdlr[td]) {
h->dec16[td]->tdec_free(h->dec16_hdlr[td]);
}
}
for (int s=0;s<4;s++) {
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
srslte_tc_interl_free(&h->interleaver[s][i]);
}
}
bzero(h, sizeof(srslte_tdec_t));
}
int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_simd_reset(&h->tdec_simd, long_cb);
#else
return srslte_tdec_gen_reset(&h->tdec_gen, long_cb);
#endif
void srslte_tdec_force_not_sb(srslte_tdec_t *h) {
h->force_not_sb = true;
}
int srslte_tdec_reset_cb(srslte_tdec_t * h, uint32_t cb_idx) {
#ifdef LV_HAVE_SSE
return srslte_tdec_simd_reset_cb(&h->tdec_simd, cb_idx);
#else
return srslte_tdec_gen_reset(&h->tdec_gen, h->tdec_gen.current_cb_len);
#endif
static void tdec_decision_byte(srslte_tdec_t * h, uint8_t *output)
{
if (h->current_llr_type == SRSLTE_TDEC_16) {
h->dec16[h->current_dec]->tdec_decision_byte(!(h->n_iter%2)?h->app1:h->ext1, output, h->current_long_cb);
} else {
h->dec8[h->current_dec]->tdec_decision_byte(!(h->n_iter%2)?(int8_t*)h->app1:(int8_t*)h->ext1, output, h->current_long_cb);
}
}
int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h, uint32_t cb_idx)
/* Returns number of subblocks in automatic mode for this long_cb */
uint32_t srslte_tdec_autoimp_get_subblocks(uint32_t long_cb)
{
#ifdef LV_HAVE_SSE
return srslte_tdec_simd_get_nof_iterations_cb(&h->tdec_simd, cb_idx);
#else
return h->tdec_gen.n_iter;
#ifdef LV_HAVE_AVX2
if (!(long_cb%16) && long_cb > 800) {
return 16;
} else
#endif
if (!(long_cb%8) && long_cb > 400) {
return 8;
} else {
return 0;
}
}
void srslte_tdec_iteration_par(srslte_tdec_t * h, int16_t* input[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) {
#ifdef LV_HAVE_SSE
srslte_tdec_simd_iteration(&h->tdec_simd, input, long_cb);
#else
srslte_vec_convert_if(input[0], 0.01, h->input_conv, 3*long_cb+12);
srslte_tdec_gen_iteration(&h->tdec_gen, h->input_conv, long_cb);
static int tdec_sb_idx(uint32_t long_cb) {
uint32_t nof_sb = srslte_tdec_autoimp_get_subblocks(long_cb);
switch(nof_sb) {
case 16:
return AUTO_16_AVXWIN;
case 8:
return AUTO_16_SSEWIN;
case 0:
return AUTO_16_SSE;
}
fprintf(stderr, "Error in tdec_sb_idx() invalid nof_sb=%d\n", nof_sb);
return 0;
}
uint32_t srslte_tdec_autoimp_get_subblocks_8bit(uint32_t long_cb)
{
#ifdef LV_HAVE_AVX2
if (!(long_cb%32) && long_cb > 2048) {
return 32;
} else
#endif
if (!(long_cb%16) && long_cb > 800) {
return 16;
} else if (!(long_cb%8) && long_cb > 400) {
return 8;
} else {
return 0;
}
}
void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) {
int16_t *input_par[SRSLTE_TDEC_MAX_NPAR];
input_par[0] = input;
return srslte_tdec_iteration_par(h, input_par, long_cb);
static int tdec_sb_idx_8(uint32_t long_cb) {
uint32_t nof_sb = srslte_tdec_autoimp_get_subblocks_8bit(long_cb);
switch(nof_sb) {
case 32:
return AUTO_8_AVXWIN;
case 16:
return AUTO_8_SSEWIN;
case 8:
return 10+AUTO_16_SSEWIN;
case 0:
return 10+AUTO_16_SSE;
}
fprintf(stderr, "Error in tdec_sb_idx_8() invalid nof_sb=%d\n", nof_sb);
return 0;
}
void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_simd_decision(&h->tdec_simd, output, long_cb);
#else
return srslte_tdec_gen_decision(&h->tdec_gen, output[0], long_cb);
#endif
// TODO: Implement SSE version. Don't really a problem since this only called at very low rates
static void convert_8_to_16(int8_t *in, int16_t *out, uint32_t len)
{
for (int i=0;i<len;i++) {
out[i] = (int16_t) in[i];
}
}
uint32_t srslte_tdec_get_nof_parallel(srslte_tdec_t *h) {
#ifdef LV_HAVE_AVX2
return 2;
#else
return 1;
#endif
static void convert_16_to_8(int16_t *in, int8_t *out, uint32_t len)
{
for (int i=0;i<len;i++) {
out[i] = (int8_t) in[i];
}
}
void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR];
output_par[0] = output;
srslte_tdec_decision_par(h, output_par, long_cb);
static void tdec_iteration_8(srslte_tdec_t * h, int8_t * input)
{
// Select decoder if in auto mode
if (h->dec_type == SRSLTE_TDEC_AUTO) {
h->current_llr_type = SRSLTE_TDEC_8;
h->current_dec = tdec_sb_idx_8(h->current_long_cb);
h->current_inter_idx = interleaver_idx(h->nof_blocks8[h->current_dec]);
// If long_cb is not multiple of any 8-bit decoder, use a 16-bit decoder and do type conversion
if (h->current_dec >= 10) {
h->current_llr_type = SRSLTE_TDEC_16;
h->current_dec -= 10;
h->current_inter_idx = interleaver_idx(h->nof_blocks16[h->current_dec]);
}
} else {
h->current_dec = 0;
}
void srslte_tdec_decision_byte_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) {
#ifdef LV_HAVE_SSE
srslte_tdec_simd_decision_byte(&h->tdec_simd, output, long_cb);
#else
srslte_tdec_gen_decision_byte(&h->tdec_gen, output[0], long_cb);
#endif
if (h->current_llr_type == SRSLTE_TDEC_16) {
if (!h->n_iter) {
convert_8_to_16(input, h->input_conv, 3*h->current_long_cb+12);
}
run_tdec_iteration_16bit(h, h->input_conv);
} else {
run_tdec_iteration_8bit(h, input);
}
}
void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
srslte_tdec_simd_decision_byte_cb(&h->tdec_simd, output, cb_idx, long_cb);
#else
srslte_tdec_gen_decision_byte(&h->tdec_gen, output, long_cb);
#endif
static void tdec_iteration_16(srslte_tdec_t * h, int16_t * input)
{
// Select decoder if in auto mode
if (h->dec_type == SRSLTE_TDEC_AUTO) {
h->current_llr_type = SRSLTE_TDEC_16;
h->current_dec = tdec_sb_idx(h->current_long_cb);
} else {
h->current_dec = 0;
}
h->current_inter_idx = interleaver_idx(h->nof_blocks16[h->current_dec]);
void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR];
output_par[0] = output;
srslte_tdec_decision_byte_par(h, output_par, long_cb);
if (h->current_llr_type == SRSLTE_TDEC_8) {
h->current_inter_idx = interleaver_idx(h->nof_blocks8[h->current_dec]);
if (!h->n_iter) {
convert_16_to_8(input, h->input_conv, 3*h->current_long_cb+12);
}
run_tdec_iteration_8bit(h, h->input_conv);
} else {
run_tdec_iteration_16bit(h, input);
}
}
int srslte_tdec_run_all_par(srslte_tdec_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_simd_run_all(&h->tdec_simd, input, output, nof_iterations, long_cb);
#else
srslte_vec_convert_if(input[0], 0.01, h->input_conv, 3*long_cb+12);
return srslte_tdec_gen_run_all(&h->tdec_gen, h->input_conv, output[0], nof_iterations, long_cb);
#endif
/* Resets the decoder and sets the codeblock length */
int srslte_tdec_new_cb(srslte_tdec_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb);
return -1;
}
h->n_iter = 0;
h->current_long_cb = long_cb;
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb);
return -1;
}
return 0;
}
int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb)
void srslte_tdec_iteration(srslte_tdec_t * h, int16_t * input, uint8_t *output)
{
uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR];
output_par[0] = output;
int16_t *input_par[SRSLTE_TDEC_MAX_NPAR];
input_par[0] = input;
if (h->current_cbidx >= 0) {
tdec_iteration_16(h, input);
tdec_decision_byte(h, output);
}
}
return srslte_tdec_run_all_par(h, input_par, output_par, nof_iterations, long_cb);
/* Runs nof_iterations iterations and decides the output bits */
int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb)
{
if (srslte_tdec_new_cb(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
tdec_iteration_16(h, input);
} while (h->n_iter < nof_iterations);
tdec_decision_byte(h, output);
return SRSLTE_SUCCESS;
}
void srslte_tdec_iteration_8bit(srslte_tdec_t * h, int8_t * input, uint8_t *output)
{
if (h->current_cbidx >= 0) {
tdec_iteration_8(h, input);
tdec_decision_byte(h, output);
}
}
/* Runs nof_iterations iterations and decides the output bits */
int srslte_tdec_run_all_8bit(srslte_tdec_t * h, int8_t * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb)
{
if (srslte_tdec_new_cb(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
tdec_iteration_8(h, input);
} while (h->n_iter < nof_iterations);
tdec_decision_byte(h, output);
return SRSLTE_SUCCESS;
}
int srslte_tdec_get_nof_iterations(srslte_tdec_t * h)
{
return h->n_iter;
}

@ -1,475 +0,0 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_simd.h"
#include "srslte/phy/utils/vector.h"
#include <inttypes.h>
#define NUMSTATES 8
#define NINPUTS 2
#define TAIL 3
#define TOTALTAIL 12
#define INF 10000
#define ZERO 0
#ifdef LV_HAVE_AVX2
#include <smmintrin.h>
#include <immintrin.h>
// Number of CB processed in parllel in AVX
#define NCB 2
/*
static void print_256i(__m256i x) {
int16_t *s = (int16_t*) &x;
printf("[%d", s[0]);
for (int i=1;i<16;i++) {
printf(",%d", s[i]);
}
printf("]\n");
}
*/
/* Computes the horizontal MAX from 8 16-bit integers using the minpos_epu16 SSE4.1 instruction */
static inline int16_t hMax0(__m256i masked_value)
{
__m128i tmp1 = _mm256_extractf128_si256(masked_value, 0);
__m128i tmp3 = _mm_minpos_epu16(tmp1);
return (int16_t)(_mm_cvtsi128_si32(tmp3));
}
static inline int16_t hMax1(__m256i masked_value)
{
__m128i tmp1 = _mm256_extractf128_si256(masked_value, 1);
__m128i tmp3 = _mm_minpos_epu16(tmp1);
return (int16_t)(_mm_cvtsi128_si32(tmp3));
}
/* Computes beta values */
void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
{
int k;
uint32_t end = long_cb + 3;
const __m256i *alphaPtr = (const __m256i*) s->alpha;
__m256i beta_k = _mm256_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0, -INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
__m256i g, bp, bn, alpha_k;
/* Define the shuffle constant for the positive beta */
__m256i shuf_bp = _mm256_set_epi8(
// 1st CB
15+16, 14+16, // 7
7+16, 6+16, // 3
5+16, 4+16, // 2
13+16, 12+16, // 6
11+16, 10+16, // 5
3+16, 2+16, // 1
1+16, 0+16, // 0
9+16, 8+16, // 4
// 2nd CB
15, 14, // 7
7, 6, // 3
5, 4, // 2
13, 12, // 6
11, 10, // 5
3, 2, // 1
1, 0, // 0
9, 8 // 4
);
/* Define the shuffle constant for the negative beta */
__m256i shuf_bn = _mm256_set_epi8(
7+16, 6+16, // 3
15+16, 14+16, // 7
13+16, 12+16, // 6
5+16, 4+16, // 2
3+16, 2+16, // 1
11+16, 10+16, // 5
9+16, 8+16, // 4
1+16, 0+16, // 0
7, 6, // 3
15, 14, // 7
13, 12, // 6
5, 4, // 2
3, 2, // 1
11, 10, // 5
9, 8, // 4
1, 0 // 0
);
alphaPtr += long_cb-1;
/* Define shuffle for branch costs */
__m256i shuf_g[4];
shuf_g[3] = _mm256_set_epi8(3+16,2+16,1+16,0+16,1+16,0+16,3+16,2+16,3+16,2+16,1+16,0+16,1+16,0+16,3+16,2+16,
3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2);
shuf_g[2] = _mm256_set_epi8(7+16,6+16,5+16,4+16,5+16,4+16,7+16,6+16,7+16,6+16,5+16,4+16,5+16,4+16,7+16,6+16,
7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6);
shuf_g[1] = _mm256_set_epi8(11+16,10+16,9+16,8+16,9+16,8+16,11+16,10+16,11+16,10+16,9+16,8+16,9+16,8+16,11+16,10+16,
11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10);
shuf_g[0] = _mm256_set_epi8(15+16,14+16,13+16,12+16,13+16,12+16,15+16,14+16,15+16,14+16,13+16,12+16,13+16,12+16,15+16,14+16,
15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14);
/* Define shuffle for beta normalization */
__m256i shuf_norm = _mm256_set_epi8(17,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
__m256i gv;
int16_t *b = &s->branch[2*NCB*long_cb-16];
__m256i *gPtr = (__m256i*) b;
/* This defines a beta computation step:
* Adds and substracts the branch metrics to the previous beta step,
* shuffles the states according to the trellis path and selects maximum state
*/
#define BETA_STEP(g) bp = _mm256_add_epi16(beta_k, g);\
bn = _mm256_sub_epi16(beta_k, g);\
bp = _mm256_shuffle_epi8(bp, shuf_bp);\
bn = _mm256_shuffle_epi8(bn, shuf_bn);\
beta_k = _mm256_max_epi16(bp, bn);
/* Loads the alpha metrics from memory and adds them to the temporal bn and bp
* metrics. Then computes horizontal maximum of both metrics and computes difference
*/
#define BETA_STEP_CNT(c,d) g = _mm256_shuffle_epi8(gv, shuf_g[c]);\
BETA_STEP(g)\
alpha_k = _mm256_load_si256(alphaPtr);\
alphaPtr--;\
bp = _mm256_add_epi16(bp, alpha_k);\
bn = _mm256_add_epi16(bn, alpha_k);\
bn = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bn);\
bp = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bp);\
output[0][k-d] = hMax0(bn) - hMax0(bp);\
output[1][k-d] = hMax1(bn) - hMax1(bp);
/* The tail does not require to load alpha or produce outputs. Only update
* beta metrics accordingly */
for (k=end-1; k>=long_cb; k--) {
int16_t g0_1 = s->branch[2*NCB*k];
int16_t g1_1 = s->branch[2*NCB*k+1];
int16_t g0_2 = s->branch[2*NCB*k+6];
int16_t g1_2 = s->branch[2*NCB*k+6+1];
g = _mm256_set_epi16(g1_2, g0_2, g0_2, g1_2, g1_2, g0_2, g0_2, g1_2, g1_1, g0_1, g0_1, g1_1, g1_1, g0_1, g0_1, g1_1);
BETA_STEP(g);
}
/* We inline 2 trelis steps for each normalization */
__m256i norm;
for (; k >= 0; k-=8) {
gv = _mm256_load_si256(gPtr);
gPtr--;
BETA_STEP_CNT(0,0);
BETA_STEP_CNT(1,1);
BETA_STEP_CNT(2,2);
BETA_STEP_CNT(3,3);
norm = _mm256_shuffle_epi8(beta_k, shuf_norm);
beta_k = _mm256_sub_epi16(beta_k, norm);
gv = _mm256_load_si256(gPtr);
gPtr--;
BETA_STEP_CNT(0,4);
BETA_STEP_CNT(1,5);
BETA_STEP_CNT(2,6);
BETA_STEP_CNT(3,7);
norm = _mm256_shuffle_epi8(beta_k, shuf_norm);
beta_k = _mm256_sub_epi16(beta_k, norm);
}
}
/* Computes alpha metrics */
void map_avx_alpha(map_gen_t * s, uint32_t long_cb)
{
uint32_t k;
int16_t *alpha1 = s->alpha;
int16_t *alpha2 = &s->alpha[8];
uint32_t i;
alpha1[0] = 0;
alpha2[0] = 0;
for (i = 1; i < 8; i++) {
alpha1[i] = -INF;
alpha2[i] = -INF;
}
/* Define the shuffle constant for the positive alpha */
__m256i shuf_ap = _mm256_set_epi8(
// 1st CB
31, 30, // 7
25, 24, // 4
23, 22, // 3
17, 16, // 0
29, 28, // 6
27, 26, // 5
21, 20, // 2
19, 18, // 1
// 2nd CB
15, 14, // 7
9, 8, // 4
7, 6, // 3
1, 0, // 0
13, 12, // 6
11, 10, // 5
5, 4, // 2
3, 2 // 1
);
/* Define the shuffle constant for the negative alpha */
__m256i shuf_an = _mm256_set_epi8(
// 1nd CB
29, 28, // 6
27, 26, // 5
21, 20, // 2
19, 18, // 1
31, 30, // 7
25, 24, // 4
23, 22, // 3
17, 16, // 0
// 2nd CB
13, 12, // 6
11, 10, // 5
5, 4, // 2
3, 2, // 1
15, 14, // 7
9, 8, // 4
7, 6, // 3
1, 0 // 0
);
/* Define shuffle for branch costs */
__m256i shuf_g[4];
shuf_g[0] = _mm256_set_epi8(3+16,2+16,3+16,2+16,1+16,0+16,1+16,0+16,1+16,0+16,1+16,0+16,3+16,2+16,3+16,2+16,
3,2,3,2,1,0,1,0,1,0,1,0,3,2,3,2);
shuf_g[1] = _mm256_set_epi8(7+16,6+16,7+16,6+16,5+16,4+16,5+16,4+16,5+16,4+16,5+16,4+16,7+16,6+16,7+16,6+16,
7,6,7,6,5,4,5,4,5,4,5,4,7,6,7,6);
shuf_g[2] = _mm256_set_epi8(11+16,10+16,11+16,10+16,9+16,8+16,9+16,8+16,9+16,8+16,9+16,8+16,11+16,10+16,11+16,10+16,
11,10,11,10,9,8,9,8,9,8,9,8,11,10,11,10);
shuf_g[3] = _mm256_set_epi8(15+16,14+16,15+16,14+16,13+16,12+16,13+16,12+16,13+16,12+16,13+16,12+16,15+16,14+16,15+16,14+16,
15,14,15,14,13,12,13,12,13,12,13,12,15,14,15,14);
__m256i shuf_norm = _mm256_set_epi8(17,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
__m256i* alphaPtr = (__m256i*) s->alpha;
alphaPtr++;
__m256i gv;
__m256i *gPtr = (__m256i*) s->branch;
__m256i g, ap, an;
__m256i alpha_k = _mm256_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0, -INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
/* This defines a alpha computation step:
* Adds and substracts the branch metrics to the previous alpha step,
* shuffles the states according to the trellis path and selects maximum state
*/
#define ALPHA_STEP(c) g = _mm256_shuffle_epi8(gv, shuf_g[c]); \
ap = _mm256_add_epi16(alpha_k, g);\
an = _mm256_sub_epi16(alpha_k, g);\
ap = _mm256_shuffle_epi8(ap, shuf_ap);\
an = _mm256_shuffle_epi8(an, shuf_an);\
alpha_k = _mm256_max_epi16(ap, an);\
_mm256_store_si256(alphaPtr, alpha_k);\
alphaPtr++;\
/* In this loop, we compute 8 steps and normalize twice for each branch metrics memory load */
__m256i norm;
for (k = 0; k < long_cb/8; k++) {
gv = _mm256_load_si256(gPtr);
gPtr++;
ALPHA_STEP(0);
ALPHA_STEP(1);
ALPHA_STEP(2);
ALPHA_STEP(3);
norm = _mm256_shuffle_epi8(alpha_k, shuf_norm);
alpha_k = _mm256_sub_epi16(alpha_k, norm);
gv = _mm256_load_si256(gPtr);
gPtr++;
ALPHA_STEP(0);
ALPHA_STEP(1);
ALPHA_STEP(2);
ALPHA_STEP(3);
norm = _mm256_shuffle_epi8(alpha_k, shuf_norm);
alpha_k = _mm256_sub_epi16(alpha_k, norm);
}
}
void map_sse_gamma_single(int16_t *output, int16_t *input, int16_t *app, int16_t *parity)
{
__m128i res00, res10, res01, res11, res0, res1;
__m128i in, ap, pa, g1, g0;
__m128i *inPtr = (__m128i*) input;
__m128i *appPtr = (__m128i*) app;
__m128i *paPtr = (__m128i*) parity;
__m128i *resPtr = (__m128i*) output;
__m128i res00_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
__m128i res10_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
__m128i res01_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
__m128i res11_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
in = _mm_load_si128(inPtr);
inPtr++;
pa = _mm_load_si128(paPtr);
paPtr++;
if (appPtr) {
ap = _mm_load_si128(appPtr);
appPtr++;
in = _mm_add_epi16(ap, in);
}
g1 = _mm_add_epi16(in, pa);
g0 = _mm_sub_epi16(in, pa);
g1 = _mm_srai_epi16(g1, 1);
g0 = _mm_srai_epi16(g0, 1);
res00 = _mm_shuffle_epi8(g0, res00_mask);
res10 = _mm_shuffle_epi8(g0, res10_mask);
res01 = _mm_shuffle_epi8(g1, res01_mask);
res11 = _mm_shuffle_epi8(g1, res11_mask);
res0 = _mm_or_si128(res00, res01);
res1 = _mm_or_si128(res10, res11);
_mm_store_si128(resPtr, res0);
resPtr++;
_mm_store_si128(resPtr, res1);
resPtr++;
}
/* Compute branch metrics (gamma) */
void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb)
{
__m128i res10, res20, res11, res21, res1, res2;
__m256i in, ap, pa, g1, g0;
__m256i *inPtr = (__m256i*) input;
__m256i *appPtr = (__m256i*) app;
__m256i *paPtr = (__m256i*) parity;
__m128i *resPtr = (__m128i*) h->branch;
if (cbidx) {
resPtr++;
}
__m128i res10_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
__m128i res11_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
__m128i res20_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
__m128i res21_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
for (int i=0;i<long_cb/16;i++) {
in = _mm256_load_si256(inPtr);
inPtr++;
pa = _mm256_load_si256(paPtr);
paPtr++;
if (appPtr) {
ap = _mm256_load_si256(appPtr);
appPtr++;
in = _mm256_add_epi16(ap, in);
}
g0 = _mm256_sub_epi16(in, pa);
g1 = _mm256_add_epi16(in, pa);
g0 = _mm256_srai_epi16(g0, 1);
g1 = _mm256_srai_epi16(g1, 1);
__m128i g0_t = _mm256_extractf128_si256(g0, 0);
__m128i g1_t = _mm256_extractf128_si256(g1, 0);
res10 = _mm_shuffle_epi8(g0_t, res10_mask);
res11 = _mm_shuffle_epi8(g1_t, res11_mask);
res20 = _mm_shuffle_epi8(g0_t, res20_mask);
res21 = _mm_shuffle_epi8(g1_t, res21_mask);
res1 = _mm_or_si128(res10, res11);
res2 = _mm_or_si128(res20, res21);
_mm_store_si128(resPtr, res1);
resPtr++;
resPtr++;
_mm_store_si128(resPtr, res2);
resPtr++;
resPtr++;
g0_t = _mm256_extractf128_si256(g0, 1);
g1_t = _mm256_extractf128_si256(g1, 1);
res10 = _mm_shuffle_epi8(g0_t, res10_mask);
res11 = _mm_shuffle_epi8(g1_t, res11_mask);
res20 = _mm_shuffle_epi8(g0_t, res20_mask);
res21 = _mm_shuffle_epi8(g1_t, res21_mask);
res1 = _mm_or_si128(res10, res11);
res2 = _mm_or_si128(res20, res21);
_mm_store_si128(resPtr, res1);
resPtr++;
resPtr++;
_mm_store_si128(resPtr, res2);
resPtr++;
resPtr++;
}
if (long_cb%16) {
map_sse_gamma_single((int16_t*) resPtr, (int16_t*) inPtr, (int16_t*) appPtr, (int16_t*) paPtr);
}
for (int i=long_cb;i<long_cb+3;i++) {
h->branch[2*i*NCB+cbidx*6] = (input[i] - parity[i])/2;
h->branch[2*i*NCB+cbidx*6+1] = (input[i] + parity[i])/2;
}
}
#endif

@ -39,8 +39,16 @@
#define TAIL 3
#define TOTALTAIL 12
#define INF 9e4
#define ZERO 9e-4
#define INF 10000
#define debug_enabled 0
#if debug_enabled
#define debug_state printf("k=%5d, in=%5d, pa=%3d, out=%5d, alpha=", k, x, parity[k-1], out); srslte_vec_fprint_s(stdout, alpha, 8); \
printf(", beta="); srslte_vec_fprint_s(stdout, &beta[8*(k)], 8); printf("\n");
#else
#define debug_state
#endif
/************************************************
*
@ -48,14 +56,13 @@
* Decoder
*
************************************************/
static void map_gen_beta(srslte_map_gen_vl_t * s, float * input, float * parity,
uint32_t long_cb)
static void map_gen_beta(tdec_gen_t *s, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb)
{
float m_b[8], new[8], old[8];
float x, y, xy;
int16_t m_b[8], new[8], old[8];
int16_t x, y, xy;
int k;
uint32_t end = long_cb + SRSLTE_TCOD_RATE;
float *beta = s->beta;
int16_t *beta = s->beta;
uint32_t i;
for (i = 0; i < 8; i++) {
@ -64,6 +71,9 @@ static void map_gen_beta(srslte_map_gen_vl_t * s, float * input, float * parity,
for (k = end - 1; k >= 0; k--) {
x = input[k];
if (app && k<long_cb) {
x += app[k];
}
y = parity[k];
xy = x + y;
@ -92,19 +102,25 @@ static void map_gen_beta(srslte_map_gen_vl_t * s, float * input, float * parity,
old[i] = new[i];
beta[8 * k + i] = old[i];
}
if ((k%4)==0 && k < long_cb) {
for (i = 1; i < 8; i++) {
old[i] -= old[0];
}
old[0] = 0;
}
}
}
static void map_gen_alpha(srslte_map_gen_vl_t * s, float * input, float * parity, float * output,
uint32_t long_cb)
static void map_gen_alpha(tdec_gen_t *s, int16_t *input, int16_t *app, int16_t *parity, int16_t *output, uint32_t long_cb)
{
float m_b[8], new[8], old[8], max1[8], max0[8];
float m1, m0;
float x, y, xy;
float out;
int16_t m_b[8], new[8], old[8], max1[8], max0[8];
int16_t m1, m0;
int16_t x, y, xy;
int16_t out;
uint32_t k;
uint32_t end = long_cb;
float *beta = s->beta;
int16_t *beta = s->beta;
uint32_t i;
old[0] = 0;
@ -112,12 +128,23 @@ static void map_gen_alpha(srslte_map_gen_vl_t * s, float * input, float * parity
old[i] = -INF;
}
#if debug_enabled
int16_t alpha[8];
#endif
for (k = 1; k < end + 1; k++) {
x = input[k - 1];
if (app) {
x += app[k - 1];
}
y = parity[k - 1];
xy = x + y;
#if debug_enabled
memcpy(alpha, old, sizeof(int16_t)*8);
#endif
m_b[0] = old[0];
m_b[1] = old[3] + y;
m_b[2] = old[4] + y;
@ -150,251 +177,99 @@ static void map_gen_alpha(srslte_map_gen_vl_t * s, float * input, float * parity
if (max0[i] > m0)
m0 = max0[i];
}
for (i = 0; i < 8; i++) {
if (m_b[i] > new[i])
new[i] = m_b[i];
old[i] = new[i];
old[i] = new[i]; }
if ((k%4)==0) {
for (i = 1; i < 8; i++) {
old[i] -= old[0];
}
old[0] = 0;
}
out = m1 - m0;
output[k - 1] = out;
debug_state;
}
}
static int map_gen_init(srslte_map_gen_vl_t * h, int max_long_cb)
int tdec_gen_init(void **hh, uint32_t max_long_cb)
{
bzero(h, sizeof(srslte_map_gen_vl_t));
h->beta = srslte_vec_malloc(sizeof(float) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
*hh = calloc(1, sizeof(tdec_gen_t));
tdec_gen_t *h = (tdec_gen_t*) *hh;
h->beta = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->beta) {
perror("srslte_vec_malloc");
return -1;
}
h->max_long_cb = max_long_cb;
return 0;
return 1;
}
static void map_gen_free(srslte_map_gen_vl_t * h)
void tdec_gen_free(void *hh)
{
tdec_gen_t *h = (tdec_gen_t*) hh;
if (h) {
if (h->beta) {
free(h->beta);
}
bzero(h, sizeof(srslte_map_gen_vl_t));
free(h);
}
}
static void map_gen_dec(srslte_map_gen_vl_t * h, float * input, float * parity, float * output,
uint32_t long_cb)
void tdec_gen_dec(void *hh, int16_t *input, int16_t *app, int16_t *parity, int16_t *output, uint32_t long_cb)
{
uint32_t k;
tdec_gen_t *h = (tdec_gen_t*) hh;
h->beta[(long_cb + TAIL) * NUMSTATES] = 0;
for (k = 1; k < NUMSTATES; k++)
for (uint32_t k = 1; k < NUMSTATES; k++)
h->beta[(long_cb + TAIL) * NUMSTATES + k] = -INF;
map_gen_beta(h, input, parity, long_cb);
map_gen_alpha(h, input, parity, output, long_cb);
map_gen_beta(h, input, app, parity, long_cb);
map_gen_alpha(h, input, app, parity, output, long_cb);
}
/************************************************
*
* TURBO DECODER INTERFACE
*
************************************************/
int srslte_tdec_gen_init(srslte_tdec_gen_t * h, uint32_t max_long_cb)
void tdec_gen_extract_input(int16_t *input, int16_t *syst, int16_t *app2, int16_t *parity0, int16_t *parity1, uint32_t long_cb)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_gen_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->max_long_cb = max_long_cb;
h->llr1 = srslte_vec_malloc(sizeof(float) * len);
if (!h->llr1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->llr2 = srslte_vec_malloc(sizeof(float) * len);
if (!h->llr2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->w = srslte_vec_malloc(sizeof(float) * len);
if (!h->w) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst = srslte_vec_malloc(sizeof(float) * len);
if (!h->syst) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity = srslte_vec_malloc(sizeof(float) * len);
if (!h->parity) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
if (map_gen_init(&h->dec, h->max_long_cb)) {
goto clean_and_exit;
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
goto clean_and_exit;
}
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
}
h->current_cbidx = -1;
ret = 0;
clean_and_exit:if (ret == -1) {
srslte_tdec_gen_free(h);
}
return ret;
}
void srslte_tdec_gen_free(srslte_tdec_gen_t * h)
{
if (h->llr1) {
free(h->llr1);
}
if (h->llr2) {
free(h->llr2);
}
if (h->w) {
free(h->w);
}
if (h->syst) {
free(h->syst);
}
if (h->parity) {
free(h->parity);
}
map_gen_free(&h->dec);
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
srslte_tc_interl_free(&h->interleaver[i]);
}
bzero(h, sizeof(srslte_tdec_gen_t));
}
void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h, float * input, uint32_t long_cb)
{
uint32_t i;
if (h->current_cbidx >= 0) {
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
// Prepare systematic and parity bits for MAP DEC #1
for (i = 0; i < long_cb; i++) {
h->syst[i] = input[SRSLTE_TCOD_RATE * i] + h->w[i];
h->parity[i] = input[SRSLTE_TCOD_RATE * i + 1];
}
for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) {
h->syst[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb)];
h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb) + 1];
for (uint32_t i = 0; i < long_cb; i++) {
syst[i] = input[SRSLTE_TCOD_RATE * i];
parity0[i] = input[SRSLTE_TCOD_RATE * i + 1];
parity1[i] = input[SRSLTE_TCOD_RATE * i + 2];
}
for (uint32_t i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) {
syst[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb)];
parity0[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb) + 1];
// Run MAP DEC #1
map_gen_dec(&h->dec, h->syst, h->parity, h->llr1, long_cb);
// Prepare systematic and parity bits for MAP DEC #1
for (i = 0; i < long_cb; i++) {
h->syst[i] = h->llr1[inter[i]]
- h->w[inter[i]];
h->parity[i] = input[SRSLTE_TCOD_RATE * i + 2];
}
for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) {
h->syst[i] =
input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE + NINPUTS * (i - long_cb)];
h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE
app2[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE + NINPUTS * (i - long_cb)];
parity1[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE
+ NINPUTS * (i - long_cb) + 1];
}
// Run MAP DEC #2
map_gen_dec(&h->dec, h->syst, h->parity, h->llr2, long_cb);
//printf("llr2=");
//srslte_vec_fprint_f(stdout, h->llr2, long_cb);
// Update a-priori LLR from the last iteration
for (i = 0; i < long_cb; i++) {
h->w[i] += h->llr2[deinter[i]] - h->llr1[i];
}
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_gen_reset() first\n");
}
// Increase number of iterations
h->n_iter++;
}
int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb);
return -1;
}
memset(h->w, 0, sizeof(float) * long_cb);
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
h->current_cb_len = long_cb;
if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb);
return -1;
}
return 0;
}
void srslte_tdec_gen_decision(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb)
void tdec_gen_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb)
{
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
uint32_t i;
for (i = 0; i < long_cb; i++) {
output[i] = (h->llr2[deinter[i]] > 0) ? 1 : 0;
}
}
void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb)
{
uint32_t i;
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
// long_cb is always byte aligned
for (i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->llr2[deinter[8*i+0]]>0?mask[0]:0;
uint8_t out1 = h->llr2[deinter[8*i+1]]>0?mask[1]:0;
uint8_t out2 = h->llr2[deinter[8*i+2]]>0?mask[2]:0;
uint8_t out3 = h->llr2[deinter[8*i+3]]>0?mask[3]:0;
uint8_t out4 = h->llr2[deinter[8*i+4]]>0?mask[4]:0;
uint8_t out5 = h->llr2[deinter[8*i+5]]>0?mask[5]:0;
uint8_t out6 = h->llr2[deinter[8*i+6]]>0?mask[6]:0;
uint8_t out7 = h->llr2[deinter[8*i+7]]>0?mask[7]:0;
for (uint32_t i = 0; i < long_cb/8; i++) {
uint8_t out0 = app1[8*i+0]>0?mask[0]:0;
uint8_t out1 = app1[8*i+1]>0?mask[1]:0;
uint8_t out2 = app1[8*i+2]>0?mask[2]:0;
uint8_t out3 = app1[8*i+3]>0?mask[3]:0;
uint8_t out4 = app1[8*i+4]>0?mask[4]:0;
uint8_t out5 = app1[8*i+5]>0?mask[5]:0;
uint8_t out6 = app1[8*i+6]>0?mask[6]:0;
uint8_t out7 = app1[8*i+7]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
}
int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h, float * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb)
{
uint32_t iter = 0;
if (srslte_tdec_gen_reset(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
srslte_tdec_gen_iteration(h, input, long_cb);
iter++;
} while (iter < nof_iterations);
srslte_tdec_gen_decision_byte(h, output, long_cb);
return SRSLTE_SUCCESS;
}

@ -1,542 +0,0 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_simd.h"
#include "srslte/phy/utils/vector.h"
#include <inttypes.h>
#define NUMSTATES 8
#define NINPUTS 2
#define TAIL 3
#define TOTALTAIL 12
#define INF 10000
#define ZERO 0
#ifdef LV_HAVE_SSE
#include <smmintrin.h>
// Define SSE/AVX implementations
void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb);
void map_sse_alpha(map_gen_t * s, uint32_t long_cb);
void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb);
#ifdef LV_HAVE_AVX2
void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb);
void map_avx_alpha(map_gen_t * s, uint32_t long_cb);
void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb);
#endif
void map_simd_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
{
if (nof_cb == 1) {
map_sse_beta(s, output[0], long_cb);
}
#ifdef LV_HAVE_AVX2
else if (nof_cb == 2) {
map_avx_beta(s, output, long_cb);
}
#endif
}
void map_simd_alpha(map_gen_t * s, uint32_t nof_cb, uint32_t long_cb)
{
if (nof_cb == 1) {
map_sse_alpha(s, long_cb);
}
#ifdef LV_HAVE_AVX2
else if (nof_cb == 2) {
map_avx_alpha(s, long_cb);
}
#endif
}
void map_simd_gamma(map_gen_t * s, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t nof_cb, uint32_t long_cb)
{
if (nof_cb == 1) {
map_sse_gamma(s, input, app, parity, long_cb);
}
#ifdef LV_HAVE_AVX2
else if (nof_cb == 2) {
map_avx_gamma(s, input, app, parity, cbidx, long_cb);
}
#endif
}
/* Inititalizes constituent decoder object */
int map_simd_init(map_gen_t * h, uint32_t max_par_cb, uint32_t max_long_cb)
{
bzero(h, sizeof(map_gen_t));
h->max_par_cb = max_par_cb;
h->max_long_cb = max_long_cb;
h->alpha = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES * h->max_par_cb);
if (!h->alpha) {
perror("srslte_vec_malloc");
return -1;
}
h->branch = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES * h->max_par_cb);
if (!h->branch) {
perror("srslte_vec_malloc");
return -1;
}
return 0;
}
void map_simd_free(map_gen_t * h)
{
if (h->alpha) {
free(h->alpha);
}
if (h->branch) {
free(h->branch);
}
bzero(h, sizeof(map_gen_t));
}
/* Runs one instance of a decoder */
void map_simd_dec(map_gen_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], int16_t *app[SRSLTE_TDEC_MAX_NPAR], int16_t * parity[SRSLTE_TDEC_MAX_NPAR],
int16_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t cb_mask, uint32_t long_cb)
{
uint32_t nof_cb = 1;
int16_t *outptr[SRSLTE_TDEC_MAX_NPAR] = { NULL, NULL };
// Compute branch metrics
switch(cb_mask) {
case 1:
nof_cb = 1;
outptr[0] = output[0];
map_simd_gamma(h, input[0], app?app[0]:NULL, parity[0], 0, 1, long_cb);
break;
case 2:
nof_cb = 1;
outptr[0] = output[1];
map_simd_gamma(h, input[1], app?app[1]:NULL, parity[1], 0, 1, long_cb);
break;
case 3:
nof_cb = 2;
for (int i=0;i<2;i++) {
outptr[i] = output[i];
map_simd_gamma(h, input[i], app?app[i]:NULL, parity[i], i, 2, long_cb);
}
break;
}
// Forward recursion
map_simd_alpha(h, nof_cb, long_cb);
// Backwards recursion + LLR computation
map_simd_beta(h, outptr, nof_cb, long_cb);
}
/* Initializes the turbo decoder object */
int srslte_tdec_simd_init(srslte_tdec_simd_t * h, uint32_t max_par_cb, uint32_t max_long_cb)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_simd_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->max_long_cb = max_long_cb;
h->max_par_cb = max_par_cb;
for (int i=0;i<h->max_par_cb;i++) {
h->app1[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app1[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->app2[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app2[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext1[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext1[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext2[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext2[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->syst[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity0[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity0[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity1[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity1[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
}
if (map_simd_init(&h->dec, h->max_par_cb, h->max_long_cb)) {
goto clean_and_exit;
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
goto clean_and_exit;
}
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
}
h->current_cbidx = -1;
h->cb_mask = 0;
ret = 0;
clean_and_exit:if (ret == -1) {
srslte_tdec_simd_free(h);
}
return ret;
}
void srslte_tdec_simd_free(srslte_tdec_simd_t * h)
{
for (int i=0;i<h->max_par_cb;i++) {
if (h->app1[i]) {
free(h->app1[i]);
}
if (h->app2[i]) {
free(h->app2[i]);
}
if (h->ext1[i]) {
free(h->ext1[i]);
}
if (h->ext2[i]) {
free(h->ext2[i]);
}
if (h->syst[i]) {
free(h->syst[i]);
}
if (h->parity0[i]) {
free(h->parity0[i]);
}
if (h->parity1[i]) {
free(h->parity1[i]);
}
}
map_simd_free(&h->dec);
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
srslte_tc_interl_free(&h->interleaver[i]);
}
bzero(h, sizeof(srslte_tdec_simd_t));
}
/* Deinterleaves the 3 streams from the input (systematic and 2 parity bits) into
* 3 buffers ready to be used by compute_gamma()
*/
void deinterleave_input_simd(srslte_tdec_simd_t *h, int16_t *input, uint32_t cbidx, uint32_t long_cb) {
uint32_t i;
__m128i *inputPtr = (__m128i*) input;
__m128i in0, in1, in2;
__m128i s0, s1, s2, s;
__m128i p00, p01, p02, p0;
__m128i p10, p11, p12, p1;
__m128i *sysPtr = (__m128i*) h->syst[cbidx];
__m128i *pa0Ptr = (__m128i*) h->parity0[cbidx];
__m128i *pa1Ptr = (__m128i*) h->parity1[cbidx];
// pick bits 0, 3, 6 from 1st word
__m128i s0_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0);
// pick bits 1, 4, 7 from 2st word
__m128i s1_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 2, 5 from 3rd word
__m128i s2_mask = _mm_set_epi8(11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 1, 4, 7 from 1st word
__m128i p00_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,15,14,9,8,3,2);
// pick bits 2, 5, from 2st word
__m128i p01_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 0, 3, 6 from 3rd word
__m128i p02_mask = _mm_set_epi8(13,12,7,6,1,0,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 2, 5 from 1st word
__m128i p10_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4);
// pick bits 0, 3, 6, from 2st word
__m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff);
// pick bits 1, 4, 7 from 3rd word
__m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// Split systematic and parity bits
for (i = 0; i < long_cb/8; i++) {
in0 = _mm_load_si128(inputPtr); inputPtr++;
in1 = _mm_load_si128(inputPtr); inputPtr++;
in2 = _mm_load_si128(inputPtr); inputPtr++;
/* Deinterleave Systematic bits */
s0 = _mm_shuffle_epi8(in0, s0_mask);
s1 = _mm_shuffle_epi8(in1, s1_mask);
s2 = _mm_shuffle_epi8(in2, s2_mask);
s = _mm_or_si128(s0, s1);
s = _mm_or_si128(s, s2);
_mm_store_si128(sysPtr, s);
sysPtr++;
/* Deinterleave parity 0 bits */
p00 = _mm_shuffle_epi8(in0, p00_mask);
p01 = _mm_shuffle_epi8(in1, p01_mask);
p02 = _mm_shuffle_epi8(in2, p02_mask);
p0 = _mm_or_si128(p00, p01);
p0 = _mm_or_si128(p0, p02);
_mm_store_si128(pa0Ptr, p0);
pa0Ptr++;
/* Deinterleave parity 1 bits */
p10 = _mm_shuffle_epi8(in0, p10_mask);
p11 = _mm_shuffle_epi8(in1, p11_mask);
p12 = _mm_shuffle_epi8(in2, p12_mask);
p1 = _mm_or_si128(p10, p11);
p1 = _mm_or_si128(p1, p12);
_mm_store_si128(pa1Ptr, p1);
pa1Ptr++;
}
for (i = 0; i < 3; i++) {
h->syst[cbidx][i+long_cb] = input[3*long_cb + 2*i];
h->parity0[cbidx][i+long_cb] = input[3*long_cb + 2*i + 1];
}
for (i = 0; i < 3; i++) {
h->app2[cbidx][i+long_cb] = input[3*long_cb + 6 + 2*i];
h->parity1[cbidx][i+long_cb] = input[3*long_cb + 6 + 2*i + 1];
}
}
/* Runs 1 turbo decoder iteration */
void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
{
int16_t *tmp_app[SRSLTE_TDEC_MAX_NPAR];
if (h->current_cbidx >= 0) {
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
#ifndef LV_HAVE_AVX2
input[1] = NULL;
#endif
h->cb_mask = (input[0]?1:0) | (input[1]?2:0);
for (int i=0;i<h->max_par_cb;i++) {
if (h->n_iter[i] == 0 && input[i]) {
//printf("deinterleaveing %d\n",i);
deinterleave_input_simd(h, input[i], i, long_cb);
}
}
// Add apriori information to decoder 1
for (int i=0;i<h->max_par_cb;i++) {
if (h->n_iter[i] > 0 && input[i]) {
srslte_vec_sub_sss(h->app1[i], h->ext1[i], h->app1[i], long_cb);
}
}
// Run MAP DEC #1
for (int i=0;i<h->max_par_cb;i++) {
if (input[i]) {
tmp_app[i] = h->n_iter[i]?h->app1[i]:NULL;
} else {
tmp_app[i] = NULL;
}
}
map_simd_dec(&h->dec, h->syst, tmp_app, h->parity0, h->ext1, h->cb_mask, long_cb);
// Convert aposteriori information into extrinsic information
for (int i=0;i<h->max_par_cb;i++) {
if (h->n_iter[i] > 0 && input[i]) {
srslte_vec_sub_sss(h->ext1[i], h->app1[i], h->ext1[i], long_cb);
}
}
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2
for (int i=0;i<h->max_par_cb;i++) {
if (input[i]) {
srslte_vec_lut_sss(h->ext1[i], deinter, h->app2[i], long_cb);
}
}
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
map_simd_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, h->cb_mask, long_cb);
// Deinterleaved extrinsic bits become apriori info for decoder 1
for (int i=0;i<h->max_par_cb;i++) {
if (input[i]) {
srslte_vec_lut_sss(h->ext2[i], inter, h->app1[i], long_cb);
}
}
for (int i=0;i<h->max_par_cb;i++) {
if (input[i]) {
h->n_iter[i]++;
}
}
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_simd_reset() first\n");
}
}
/* Resets the decoder and sets the codeblock length */
int srslte_tdec_simd_reset(srslte_tdec_simd_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb);
return -1;
}
for (int i=0;i<h->max_par_cb;i++) {
h->n_iter[i] = 0;
}
h->cb_mask = 0;
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb);
return -1;
}
return 0;
}
int srslte_tdec_simd_reset_cb(srslte_tdec_simd_t * h, uint32_t cb_idx)
{
h->n_iter[cb_idx] = 0;
return 0;
}
int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h, uint32_t cb_idx)
{
return h->n_iter[cb_idx];
}
void tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx, uint32_t long_cb)
{
__m128i zero = _mm_set1_epi16(0);
__m128i lsb_mask = _mm_set1_epi16(1);
__m128i *appPtr = (__m128i*) h->app1[cbidx];
__m128i *outPtr = (__m128i*) output;
__m128i ap, out, out0, out1;
for (uint32_t i = 0; i < long_cb/16; i++) {
ap = _mm_load_si128(appPtr); appPtr++;
out0 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
ap = _mm_load_si128(appPtr); appPtr++;
out1 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
out = _mm_packs_epi16(out0, out1);
_mm_store_si128(outPtr, out);
outPtr++;
}
if (long_cb%16) {
for (int i=0;i<8;i++) {
output[long_cb-8+i] = h->app1[cbidx][long_cb-8+i]>0?1:0;
}
}
}
void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
{
for (int i=0;i<h->max_par_cb;i++) {
tdec_simd_decision(h, output[i], i, long_cb);
}
}
void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx, uint32_t long_cb)
{
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
// long_cb is always byte aligned
for (uint32_t i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->app1[cbidx][8*i+0]>0?mask[0]:0;
uint8_t out1 = h->app1[cbidx][8*i+1]>0?mask[1]:0;
uint8_t out2 = h->app1[cbidx][8*i+2]>0?mask[2]:0;
uint8_t out3 = h->app1[cbidx][8*i+3]>0?mask[3]:0;
uint8_t out4 = h->app1[cbidx][8*i+4]>0?mask[4]:0;
uint8_t out5 = h->app1[cbidx][8*i+5]>0?mask[5]:0;
uint8_t out6 = h->app1[cbidx][8*i+6]>0?mask[6]:0;
uint8_t out7 = h->app1[cbidx][8*i+7]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
}
void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
{
for (int i=0;i<h->max_par_cb;i++) {
if (output[i]) {
srslte_tdec_simd_decision_byte_cb(h, output[i], i, long_cb);
}
}
}
/* Runs nof_iterations iterations and decides the output bits */
int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations, uint32_t long_cb)
{
if (srslte_tdec_simd_reset(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
srslte_tdec_simd_iteration(h, input, long_cb);
} while (h->n_iter[0] < nof_iterations);
srslte_tdec_simd_decision_byte(h, output, long_cb);
return SRSLTE_SUCCESS;
}
#endif

@ -1,299 +0,0 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_simd_inter.h"
#include "srslte/phy/utils/vector.h"
#define TOTALTAIL 12
#ifdef LV_HAVE_SSE
#include <smmintrin.h>
void map_see_inter_alpha(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, uint32_t long_cb);
void map_sse_inter_beta(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, int16_t * output, uint32_t long_cb);
void sse_inter_update_w(srslte_tdec_simd_inter_t *h, uint16_t *deinter, uint32_t long_cb);
void sse_inter_extract_syst1(srslte_tdec_simd_inter_t *h, uint16_t *inter, uint32_t long_cb);
static void map_sse_inter_dec(srslte_tdec_simd_inter_t * h, int16_t * input, int16_t * parity, int16_t * output,
uint32_t long_cb)
{
map_see_inter_alpha(h, input, parity, long_cb);
map_sse_inter_beta(h, input, parity, output, long_cb);
}
/************************************************
*
* TURBO DECODER INTERFACE
*
************************************************/
int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h, uint32_t max_par_cb, uint32_t max_long_cb)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_simd_inter_t));
uint32_t len = max_long_cb + 12;
h->max_long_cb = max_long_cb;
h->max_par_cb = max_par_cb;
h->llr1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->llr1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->llr2 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->llr2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->w = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->w) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst0 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->syst0) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->syst1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->parity0) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->parity1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->alpha = srslte_vec_malloc(sizeof(int16_t) * 8*(len+12) * h->max_par_cb);
if (!h->alpha) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
goto clean_and_exit;
}
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
}
h->current_cbidx = -1;
ret = 0;
clean_and_exit:if (ret == -1) {
srslte_tdec_simd_inter_free(h);
}
return ret;
}
void srslte_tdec_simd_inter_free(srslte_tdec_simd_inter_t * h)
{
if (h->llr1) {
free(h->llr1);
}
if (h->llr2) {
free(h->llr2);
}
if (h->w) {
free(h->w);
}
if (h->syst0) {
free(h->syst0);
}
if (h->syst1) {
free(h->syst1);
}
if (h->parity0) {
free(h->parity0);
}
if (h->parity1) {
free(h->parity1);
}
if (h->alpha) {
free(h->alpha);
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
srslte_tc_interl_free(&h->interleaver[i]);
}
bzero(h, sizeof(srslte_tdec_simd_inter_t));
}
/* Deinterleave for inter-frame parallelization */
void extract_input(srslte_tdec_simd_inter_t *h, int16_t *input, uint32_t cbidx, uint32_t long_cb)
{
for (int i=0;i<long_cb;i++) {
h->syst0[h->max_par_cb*i+cbidx] = input[3*i+0];
h->parity0[h->max_par_cb*i+cbidx] = input[3*i+1];
h->parity1[h->max_par_cb*i+cbidx] = input[3*i+2];
}
for (int i = long_cb; i < long_cb + 3; i++) {
h->syst0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb)];
h->syst1[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb)];
h->parity0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb) + 1];
h->parity0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb) + 2];
}
}
void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h, int16_t *input[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
{
if (h->current_cbidx >= 0) {
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
// Prepare systematic and parity bits for MAP DEC #1
for (int i=0;i<nof_cb;i++) {
if (h->n_iter[i] == 0) {
extract_input(h, input[i], i, long_cb);
}
srslte_vec_sum_sss(h->syst0, h->w, h->syst0, long_cb*h->max_par_cb);
}
// Run MAP DEC #1
map_sse_inter_dec(h, h->syst0, h->parity0, h->llr1, long_cb);
// Prepare systematic and parity bits for MAP DEC #1
sse_inter_extract_syst1(h, inter, long_cb);
// Run MAP DEC #2
map_sse_inter_dec(h, h->syst1, h->parity1, h->llr2, long_cb);
// Update a-priori LLR from the last iteration
sse_inter_update_w(h, deinter, long_cb);
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_simd_inter_reset() first\n");
}
}
int srslte_tdec_simd_inter_reset_cb(srslte_tdec_simd_inter_t * h, uint32_t cb_idx)
{
for (int i=0;i<h->current_long_cb;i++) {
h->w[h->max_par_cb*i+cb_idx] = 0;
}
return 0;
}
int srslte_tdec_simd_inter_reset(srslte_tdec_simd_inter_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb);
return -1;
}
h->current_long_cb = long_cb;
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb);
return -1;
}
memset(h->w, 0, sizeof(int16_t) * long_cb * h->max_par_cb);
return 0;
}
void srslte_tdec_simd_inter_decision_cb(srslte_tdec_simd_inter_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb)
{
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
uint32_t i;
for (i = 0; i < long_cb; i++) {
output[i] = (h->llr2[h->max_par_cb*deinter[i]+cb_idx] > 0) ? 1 : 0;
}
}
void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
{
for (int i=0;i<nof_cb;i++) {
srslte_tdec_simd_inter_decision_cb(h, output[i], i, long_cb);
}
}
void srslte_tdec_simd_inter_decision_byte_cb(srslte_tdec_simd_inter_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb)
{
uint32_t i;
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
#define indexOf_cb(idx, cb) (h->max_par_cb*(deinter[8*i+idx])+cb)
// long_cb is always byte aligned
for (i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->llr2[indexOf_cb(0, cb_idx)]>0?mask[0]:0;
uint8_t out1 = h->llr2[indexOf_cb(1, cb_idx)]>0?mask[1]:0;
uint8_t out2 = h->llr2[indexOf_cb(2, cb_idx)]>0?mask[2]:0;
uint8_t out3 = h->llr2[indexOf_cb(3, cb_idx)]>0?mask[3]:0;
uint8_t out4 = h->llr2[indexOf_cb(4, cb_idx)]>0?mask[4]:0;
uint8_t out5 = h->llr2[indexOf_cb(5, cb_idx)]>0?mask[5]:0;
uint8_t out6 = h->llr2[indexOf_cb(6, cb_idx)]>0?mask[6]:0;
uint8_t out7 = h->llr2[indexOf_cb(7, cb_idx)]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
}
void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
{
for (int i=0;i<nof_cb;i++) {
srslte_tdec_simd_inter_decision_byte_cb(h, output[i], i, long_cb);
}
}
int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h,
int16_t *input[SRSLTE_TDEC_MAX_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations, uint32_t nof_cb, uint32_t long_cb)
{
uint32_t iter = 0;
if (srslte_tdec_simd_inter_reset(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
srslte_tdec_simd_inter_iteration(h, input, nof_cb, long_cb);
iter++;
} while (iter < nof_iterations);
srslte_tdec_simd_inter_decision_byte(h, output, nof_cb, long_cb);
return SRSLTE_SUCCESS;
}
#endif

@ -31,13 +31,15 @@
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_simd.h"
#include "srslte/phy/fec/turbodecoder_sse.h"
#include "srslte/phy/utils/vector.h"
#include <inttypes.h>
#ifdef LV_HAVE_SSE
#include <smmintrin.h>
#include <srslte/phy/fec/turbodecoder_sse.h>
#endif
@ -47,21 +49,46 @@
#define TOTALTAIL 12
#define INF 10000
#define ZERO 0
#ifdef LV_HAVE_SSE
/*
#define debug_enabled 0
#if debug_enabled
#define debug_state(c,d) printf("k=%5d, in=%5d, pa=%5d, out=%5d, alpha=", k-d,\
s->branch[2*(k-d)] + s->branch[2*(k-d)+1], \
-s->branch[2*(k-d)] + s->branch[2*(k-d)+1], output[k-d]);print_128i(alpha_k);\
printf(", beta=");print_128i(beta_k);printf("\n");
static void print_128i(__m128i x) {
int16_t *s = (int16_t*) &x;
printf("[%d", s[0]);
printf("[%5d", s[0]);
for (int i=1;i<8;i++) {
printf(",%d", s[i]);
printf(",%5d", s[i]);
}
printf("]\n");
printf("]");
}
*/
static uint32_t max_128i(__m128i x) {
int16_t *s = (int16_t*) &x;
int16_t m = -INF;
uint32_t max = 0;
for (int i=1;i<8;i++) {
if (s[i] > m) {
max = i;
m = s[i];
}
}
return max;
}
#else
#define debug_state(c,d)
#endif
//#define use_beta_transposed_max
#ifndef use_beta_transposed_max
@ -75,7 +102,7 @@ static inline int16_t hMax(__m128i buffer)
}
/* Computes beta values */
void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
void tdec_sse_beta(tdec_sse_t * s, int16_t * output, uint32_t long_cb)
{
int k;
uint32_t end = long_cb + 3;
@ -141,7 +168,9 @@ void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
alphaPtr--;\
bp = _mm_add_epi16(bp, alpha_k);\
bn = _mm_add_epi16(bn, alpha_k);\
output[k-d] = hMax(bn)-hMax(bp);
output[k-d] = hMax(bn)-hMax(bp);\
debug_state(c,d);
/* The tail does not require to load alpha or produce outputs. Only update
* beta metrics accordingly */
@ -179,7 +208,7 @@ void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
#endif
/* Computes alpha metrics */
void map_sse_alpha(map_gen_t * s, uint32_t long_cb)
void tdec_sse_alpha(tdec_sse_t * s, uint32_t long_cb)
{
uint32_t k;
int16_t *alpha = s->alpha;
@ -268,7 +297,7 @@ void map_sse_alpha(map_gen_t * s, uint32_t long_cb)
}
/* Compute branch metrics (gamma) */
void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb)
void tdec_sse_gamma(tdec_sse_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb)
{
__m128i res00, res10, res01, res11, res0, res1;
__m128i in, ap, pa, g1, g0;
@ -313,6 +342,8 @@ void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity,
resPtr++;
_mm_store_si128(resPtr, res1);
resPtr++;
//printf("k=%d, in=%d, pa=%d, g0=%d, g1=%d\n", i, input[i], parity[i], h->branch[2*i], h->branch[2*i+1]);
}
for (int i=long_cb;i<long_cb+3;i++) {
@ -322,6 +353,163 @@ void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity,
}
/* Inititalizes constituent decoder object */
int tdec_sse_init(void **hh, uint32_t max_long_cb)
{
*hh = calloc(1, sizeof(tdec_sse_t));
tdec_sse_t *h = (tdec_sse_t*) *hh;
h->max_long_cb = max_long_cb;
h->alpha = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + TOTALTAIL + 1) * NUMSTATES);
if (!h->alpha) {
perror("srslte_vec_malloc");
return -1;
}
h->branch = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + TOTALTAIL + 1) * NUMSTATES);
if (!h->branch) {
perror("srslte_vec_malloc");
return -1;
}
return 1;
}
void tdec_sse_free(void *hh)
{
tdec_sse_t *h = (tdec_sse_t*) hh;
if (h) {
if (h->alpha) {
free(h->alpha);
}
if (h->branch) {
free(h->branch);
}
free(h);
}
}
/* Runs one instance of a decoder */
void tdec_sse_dec(void *hh, int16_t * input, int16_t *app, int16_t * parity,
int16_t *output, uint32_t long_cb)
{
tdec_sse_t *h = (tdec_sse_t*) hh;
// Compute branch metrics
tdec_sse_gamma(h, input, app, parity, long_cb);
// Forward recursion
tdec_sse_alpha(h, long_cb);
// Backwards recursion + LLR computation
tdec_sse_beta(h, output, long_cb);
}
/* Deinterleaves the 3 streams from the input (systematic and 2 parity bits) into
* 3 buffers ready to be used by compute_gamma()
*/
void tdec_sse_extract_input(int16_t *input, int16_t *syst0, int16_t *app2, int16_t *parity0, int16_t *parity1, uint32_t long_cb) {
uint32_t i;
__m128i *inputPtr = (__m128i*) input;
__m128i in0, in1, in2;
__m128i s0, s1, s2, s;
__m128i p00, p01, p02, p0;
__m128i p10, p11, p12, p1;
__m128i *sysPtr = (__m128i*) syst0;
__m128i *pa0Ptr = (__m128i*) parity0;
__m128i *pa1Ptr = (__m128i*) parity1;
// pick bits 0, 3, 6 from 1st word
__m128i s0_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0);
// pick bits 1, 4, 7 from 2st word
__m128i s1_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 2, 5 from 3rd word
__m128i s2_mask = _mm_set_epi8(11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 1, 4, 7 from 1st word
__m128i p00_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,15,14,9,8,3,2);
// pick bits 2, 5, from 2st word
__m128i p01_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 0, 3, 6 from 3rd word
__m128i p02_mask = _mm_set_epi8(13,12,7,6,1,0,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 2, 5 from 1st word
__m128i p10_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4);
// pick bits 0, 3, 6, from 2st word
__m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff);
// pick bits 1, 4, 7 from 3rd word
__m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// Split systematic and parity bits
for (i = 0; i < long_cb/8; i++) {
in0 = _mm_load_si128(inputPtr); inputPtr++;
in1 = _mm_load_si128(inputPtr); inputPtr++;
in2 = _mm_load_si128(inputPtr); inputPtr++;
/* Deinterleave Systematic bits */
s0 = _mm_shuffle_epi8(in0, s0_mask);
s1 = _mm_shuffle_epi8(in1, s1_mask);
s2 = _mm_shuffle_epi8(in2, s2_mask);
s = _mm_or_si128(s0, s1);
s = _mm_or_si128(s, s2);
_mm_store_si128(sysPtr, s);
sysPtr++;
/* Deinterleave parity 0 bits */
p00 = _mm_shuffle_epi8(in0, p00_mask);
p01 = _mm_shuffle_epi8(in1, p01_mask);
p02 = _mm_shuffle_epi8(in2, p02_mask);
p0 = _mm_or_si128(p00, p01);
p0 = _mm_or_si128(p0, p02);
_mm_store_si128(pa0Ptr, p0);
pa0Ptr++;
/* Deinterleave parity 1 bits */
p10 = _mm_shuffle_epi8(in0, p10_mask);
p11 = _mm_shuffle_epi8(in1, p11_mask);
p12 = _mm_shuffle_epi8(in2, p12_mask);
p1 = _mm_or_si128(p10, p11);
p1 = _mm_or_si128(p1, p12);
_mm_store_si128(pa1Ptr, p1);
pa1Ptr++;
}
for (i = 0; i < 3; i++) {
syst0[i+long_cb] = input[3*long_cb + 2*i];
parity0[i+long_cb] = input[3*long_cb + 2*i + 1];
}
for (i = 0; i < 3; i++) {
app2[i+long_cb] = input[3*long_cb + 6 + 2*i];
parity1[i+long_cb] = input[3*long_cb + 6 + 2*i + 1];
}
}
void tdec_sse_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb)
{
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
// long_cb is always byte aligned
for (uint32_t i = 0; i < long_cb/8; i++) {
uint8_t out0 = app1[8*i+0]>0?mask[0]:0;
uint8_t out1 = app1[8*i+1]>0?mask[1]:0;
uint8_t out2 = app1[8*i+2]>0?mask[2]:0;
uint8_t out3 = app1[8*i+3]>0?mask[3]:0;
uint8_t out4 = app1[8*i+4]>0?mask[4]:0;
uint8_t out5 = app1[8*i+5]>0?mask[5]:0;
uint8_t out6 = app1[8*i+6]>0?mask[6]:0;
uint8_t out7 = app1[8*i+7]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
}
@ -381,7 +569,7 @@ static inline __m128i transposed_max(__m128i a, __m128i b, __m128i c, __m128i d,
return res;
}
void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
void tdec_sse_beta(tdec_sse_t * s, int16_t * output, uint32_t long_cb)
{
int k;
uint32_t end = long_cb + 3;

@ -1,202 +0,0 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_simd_inter.h"
#include "srslte/phy/utils/vector.h"
#define NCB 8
#define INF 10000
#ifdef LV_HAVE_SSE
#include <smmintrin.h>
void sse_inter_extract_syst1(srslte_tdec_simd_inter_t *h, uint16_t *inter, uint32_t long_cb)
{
__m128i *llr1Ptr = (__m128i*) h->llr1;
__m128i *wPtr = (__m128i*) h->w;
__m128i *syst1Ptr = (__m128i*) h->syst1;
for (int i = 0; i < long_cb; i++) {
__m128i llr1 = _mm_load_si128(&llr1Ptr[inter[i]]);
__m128i w = _mm_load_si128(&wPtr[inter[i]]);
_mm_store_si128(syst1Ptr++, _mm_sub_epi16(llr1, w));
}
}
void sse_inter_update_w(srslte_tdec_simd_inter_t *h, uint16_t *deinter, uint32_t long_cb)
{
__m128i *llr1Ptr = (__m128i*) h->llr1;
__m128i *llr2Ptr = (__m128i*) h->llr2;
__m128i *wPtr = (__m128i*) h->w;
__m128i *syst1Ptr = (__m128i*) h->syst1;
for (int i = 0; i < long_cb; i++) {
__m128i llr1 = _mm_load_si128(llr1Ptr++);
__m128i w = _mm_load_si128(wPtr++);
__m128i llr2 = _mm_load_si128(&llr2Ptr[deinter[i]]);
_mm_store_si128(syst1Ptr++, _mm_add_epi16(w, _mm_sub_epi16(llr2, llr1)));
}
}
/* Computes beta values */
void map_sse_inter_beta(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, int16_t * output, uint32_t long_cb)
{
__m128i m_b[8], new[8], old[8], max1[8], max0[8];
__m128i x, y, xy;
__m128i m1, m0;
uint32_t end = long_cb + 3;
uint32_t i;
__m128i *inputPtr = (__m128i*) input;
__m128i *parityPtr = (__m128i*) parity;
__m128i *outputPtr = (__m128i*) output;
__m128i *alphaPtr = (__m128i*) s->alpha;
for (int i = 0; i < 8; i++) {
old[i] = _mm_set1_epi16(0);
}
for (int k = end - 1; k >= 0; k--) {
x = _mm_load_si128(inputPtr++);
y = _mm_load_si128(parityPtr++);
xy = _mm_add_epi16(x,y);
m_b[0] = _mm_add_epi16(old[4], xy);
m_b[1] = old[4];
m_b[2] = _mm_add_epi16(old[5], y);
m_b[3] = _mm_add_epi16(old[5], x);
m_b[4] = _mm_add_epi16(old[6], x);
m_b[5] = _mm_add_epi16(old[6], y);
m_b[6] = old[7];
m_b[7] = _mm_add_epi16(old[7], xy);
new[0] = old[0];
new[1] = _mm_add_epi16(old[0], xy);
new[2] = _mm_add_epi16(old[1], x);
new[3] = _mm_add_epi16(old[1], y);
new[4] = _mm_add_epi16(old[2], y);
new[5] = _mm_add_epi16(old[2], x);
new[6] = _mm_add_epi16(old[3], xy);
new[7] = old[3];
for (i = 0; i < 8; i++) {
__m128i alpha = _mm_load_si128(alphaPtr++);
max0[i] = _mm_add_epi16(alpha, m_b[i]);
max1[i] = _mm_add_epi16(alpha, new[i]);
}
m1 = _mm_max_epi16(max1[0], max1[1]);
m0 = _mm_max_epi16(max0[0], max0[1]);
for (i = 2; i < 8; i++) {
m1 = _mm_max_epi16(m1, max1[i]);
m0 = _mm_max_epi16(m0, max0[i]);
}
for (i = 0; i < 8; i++) {
new[i] = _mm_max_epi16(m_b[i], new[i]);
old[i] = new[i];
}
__m128i out = _mm_sub_epi16(m1, m0);
_mm_store_si128(outputPtr++, out);
// normalize
if ((k%4)==0) {
for (int i=1;i<8;i++) {
_mm_sub_epi16(old[i], old[0]);
}
}
}
}
/* Computes alpha metrics */
void map_see_inter_alpha(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, uint32_t long_cb)
{
__m128i m_b[8], new[8], old[8];
__m128i x, y, xy;
uint32_t k;
__m128i *inputPtr = (__m128i*) input;
__m128i *parityPtr = (__m128i*) parity;
__m128i *alphaPtr = (__m128i*) s->alpha;
old[0] = _mm_set1_epi16(0);
for (int i = 1; i < 8; i++) {
old[i] = _mm_set1_epi16(-INF);
}
for (k = 0; k < long_cb; k++) {
x = _mm_load_si128(inputPtr++);
y = _mm_load_si128(parityPtr++);
xy = _mm_add_epi16(x,y);
m_b[0] = old[0];
m_b[1] = _mm_add_epi16(old[3], y);
m_b[2] = _mm_add_epi16(old[4], y);
m_b[3] = old[7];
m_b[4] = old[1];
m_b[5] = _mm_add_epi16(old[2], y);
m_b[6] = _mm_add_epi16(old[5], y);
m_b[7] = old[6];
new[0] = _mm_add_epi16(old[1], xy);
new[1] = _mm_add_epi16(old[2], x);
new[2] = _mm_add_epi16(old[5], x);
new[3] = _mm_add_epi16(old[6], xy);
new[4] = _mm_add_epi16(old[0], xy);
new[5] = _mm_add_epi16(old[3], x);
new[6] = _mm_add_epi16(old[4], x);
new[7] = _mm_add_epi16(old[7], xy);
for (int i = 0; i < 8; i++) {
new[i] = _mm_max_epi16(m_b[i], new[i]);
old[i] = new[i];
_mm_store_si128(alphaPtr++, old[i]);
}
// normalize
if ((k%4)==0) {
for (int i=1;i<8;i++) {
_mm_sub_epi16(old[i], old[0]);
}
}
}
}
#endif

@ -32,18 +32,26 @@
#include "srslte/phy/utils/bit.h"
#include "srslte/phy/modem/demod_soft.h"
// AVX implementation not useful for integers. Wait for AVX2
#ifdef LV_HAVE_SSE
#include <smmintrin.h>
void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols);
#endif
#define SCALE_SHORT_CONV_QPSK 100
#define SCALE_SHORT_CONV_QAM16 400
#define SCALE_SHORT_CONV_QAM64 700
#define SCALE_BYTE_CONV_QPSK 20
#define SCALE_BYTE_CONV_QAM16 30
#define SCALE_BYTE_CONV_QAM64 40
void demod_bpsk_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) {
for (int i=0;i<nsymbols;i++) {
llr[i] = (int8_t) -SCALE_BYTE_CONV_QPSK*(crealf(symbols[i]) + cimagf(symbols[i]))/sqrt(2);
}
}
void demod_bpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
for (int i=0;i<nsymbols;i++) {
llr[i] = (short) -SCALE_SHORT_CONV_QPSK*(crealf(symbols[i]) + cimagf(symbols[i]))/sqrt(2);
@ -56,6 +64,10 @@ void demod_bpsk_lte(const cf_t *symbols, float *llr, int nsymbols) {
}
}
void demod_qpsk_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) {
srslte_vec_convert_fb((const float*) symbols, -SCALE_BYTE_CONV_QPSK*sqrt(2), llr, nsymbols*2);
}
void demod_qpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
srslte_vec_convert_fi((const float*) symbols, -SCALE_SHORT_CONV_QPSK*sqrt(2), llr, nsymbols*2);
}
@ -87,9 +99,11 @@ void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols) {
__m128i result11, result12, result22, result21;
__m128 scale_v = _mm_set1_ps(-SCALE_SHORT_CONV_QAM16);
__m128i shuffle_negated_1 = _mm_set_epi8(0xff,0xff,0xff,0xff,7,6,5,4,0xff,0xff,0xff,0xff,3,2,1,0);
__m128i shuffle_negated_2 = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,13,12,0xff,0xff,0xff,0xff,11,10,9,8);
__m128i shuffle_abs_1 = _mm_set_epi8(7,6,5,4,0xff,0xff,0xff,0xff,3,2,1,0,0xff,0xff,0xff,0xff);
__m128i shuffle_negated_2 = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,13,12,0xff,0xff,0xff,0xff,11,10,9,8);
__m128i shuffle_abs_2 = _mm_set_epi8(15,14,13,12,0xff,0xff,0xff,0xff,11,10,9,8,0xff,0xff,0xff,0xff);
for (int i=0;i<nsymbols/4;i++) {
symbol1 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
symbol2 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
@ -120,6 +134,60 @@ void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols) {
llr[4*i+3] = abs(yim)-2*SCALE_SHORT_CONV_QAM16/sqrt(10);
}
}
void demod_16qam_lte_b_sse(const cf_t *symbols, int8_t *llr, int nsymbols) {
float *symbolsPtr = (float*) symbols;
__m128i *resultPtr = (__m128i*) llr;
__m128 symbol1, symbol2, symbol3, symbol4;
__m128i symbol_i1, symbol_i2, symbol_i3, symbol_i4, symbol_i, symbol_abs, symbol_12, symbol_34;
__m128i offset = _mm_set1_epi8(2*SCALE_BYTE_CONV_QAM16/sqrt(10));
__m128i result1n, result1a, result2n, result2a;
__m128 scale_v = _mm_set1_ps(-SCALE_BYTE_CONV_QAM16);
__m128i shuffle_negated_1 = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
__m128i shuffle_abs_1 = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
__m128i shuffle_negated_2 = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
__m128i shuffle_abs_2 = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
for (int i=0;i<nsymbols/8;i++) {
symbol1 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
symbol2 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
symbol3 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
symbol4 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
symbol_i1 = _mm_cvtps_epi32(_mm_mul_ps(symbol1, scale_v));
symbol_i2 = _mm_cvtps_epi32(_mm_mul_ps(symbol2, scale_v));
symbol_i3 = _mm_cvtps_epi32(_mm_mul_ps(symbol3, scale_v));
symbol_i4 = _mm_cvtps_epi32(_mm_mul_ps(symbol4, scale_v));
symbol_12 = _mm_packs_epi32(symbol_i1, symbol_i2);
symbol_34 = _mm_packs_epi32(symbol_i3, symbol_i4);
symbol_i = _mm_packs_epi16(symbol_12, symbol_34);
symbol_abs = _mm_abs_epi8(symbol_i);
symbol_abs = _mm_sub_epi8(symbol_abs, offset);
result1n = _mm_shuffle_epi8(symbol_i, shuffle_negated_1);
result1a = _mm_shuffle_epi8(symbol_abs, shuffle_abs_1);
result2n = _mm_shuffle_epi8(symbol_i, shuffle_negated_2);
result2a = _mm_shuffle_epi8(symbol_abs, shuffle_abs_2);
_mm_store_si128(resultPtr, _mm_or_si128(result1n, result1a)); resultPtr++;
_mm_store_si128(resultPtr, _mm_or_si128(result2n, result2a)); resultPtr++;
}
// Demodulate last symbols
for (int i=8*(nsymbols/8);i<nsymbols;i++) {
short yre = (int8_t) (SCALE_BYTE_CONV_QAM16*crealf(symbols[i]));
short yim = (int8_t) (SCALE_BYTE_CONV_QAM16*cimagf(symbols[i]));
llr[4*i+0] = -yre;
llr[4*i+1] = -yim;
llr[4*i+2] = abs(yre)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
llr[4*i+3] = abs(yim)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
}
}
#endif
void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
@ -138,6 +206,22 @@ void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
#endif
}
void demod_16qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) {
#ifdef LV_HAVE_SSE
demod_16qam_lte_b_sse(symbols, llr, nsymbols);
#else
for (int i=0;i<nsymbols;i++) {
int8_t yre = (int8_t) (SCALE_BYTE_CONV_QAM16*crealf(symbols[i]));
int8_t yim = (int8_t) (SCALE_BYTE_CONV_QAM16*cimagf(symbols[i]));
llr[4*i+0] = -yre;
llr[4*i+1] = -yim;
llr[4*i+2] = abs(yre)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
llr[4*i+3] = abs(yim)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
}
#endif
}
void demod_64qam_lte(const cf_t *symbols, float *llr, int nsymbols)
{
for (int i=0;i<nsymbols;i++) {
@ -219,6 +303,76 @@ void demod_64qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols)
}
}
void demod_64qam_lte_b_sse(const cf_t *symbols, int8_t *llr, int nsymbols)
{
float *symbolsPtr = (float*) symbols;
__m128i *resultPtr = (__m128i*) llr;
__m128 symbol1, symbol2, symbol3, symbol4;
__m128i symbol_i1, symbol_i2, symbol_i3, symbol_i4, symbol_i, symbol_abs, symbol_abs2,symbol_12, symbol_34;
__m128i offset1 = _mm_set1_epi8(4*SCALE_BYTE_CONV_QAM64/sqrt(42));
__m128i offset2 = _mm_set1_epi8(2*SCALE_BYTE_CONV_QAM64/sqrt(42));
__m128 scale_v = _mm_set1_ps(-SCALE_BYTE_CONV_QAM64);
__m128i result11, result12, result13, result22, result21,result23, result31, result32, result33;
__m128i shuffle_negated_1 = _mm_set_epi8(0xff,0xff,5,4,0xff,0xff,0xff,0xff,3,2,0xff,0xff,0xff,0xff,1,0);
__m128i shuffle_negated_2 = _mm_set_epi8(11,10,0xff,0xff,0xff,0xff,9,8,0xff,0xff,0xff,0xff,7,6,0xff,0xff);
__m128i shuffle_negated_3 = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,0xff,0xff,0xff,0xff,13,12,0xff,0xff,0xff,0xff);
__m128i shuffle_abs_1 = _mm_set_epi8(5,4,0xff,0xff,0xff,0xff,3,2,0xff,0xff,0xff,0xff,1,0,0xff,0xff);
__m128i shuffle_abs_2 = _mm_set_epi8(0xff,0xff,0xff,0xff,9,8,0xff,0xff,0xff,0xff,7,6,0xff,0xff,0xff,0xff);
__m128i shuffle_abs_3 = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,0xff,0xff,13,12,0xff,0xff,0xff,0xff,11,10);
__m128i shuffle_abs2_1 = _mm_set_epi8(0xff,0xff,0xff,0xff,3,2,0xff,0xff,0xff,0xff,1,0,0xff,0xff,0xff,0xff);
__m128i shuffle_abs2_2 = _mm_set_epi8(0xff,0xff,9,8,0xff,0xff,0xff,0xff,7,6,0xff,0xff,0xff,0xff,5,4);
__m128i shuffle_abs2_3 = _mm_set_epi8(15,14,0xff,0xff,0xff,0xff,13,12,0xff,0xff,0xff,0xff,11,10,0xff,0xff);
for (int i=0;i<nsymbols/8;i++) {
symbol1 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
symbol2 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
symbol3 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
symbol4 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
symbol_i1 = _mm_cvtps_epi32(_mm_mul_ps(symbol1, scale_v));
symbol_i2 = _mm_cvtps_epi32(_mm_mul_ps(symbol2, scale_v));
symbol_i3 = _mm_cvtps_epi32(_mm_mul_ps(symbol3, scale_v));
symbol_i4 = _mm_cvtps_epi32(_mm_mul_ps(symbol4, scale_v));
symbol_12 = _mm_packs_epi32(symbol_i1, symbol_i2);
symbol_34 = _mm_packs_epi32(symbol_i3, symbol_i4);
symbol_i = _mm_packs_epi16(symbol_12, symbol_34);
symbol_abs = _mm_abs_epi8(symbol_i);
symbol_abs = _mm_sub_epi8(symbol_abs, offset1);
symbol_abs2 = _mm_sub_epi8(_mm_abs_epi8(symbol_abs), offset2);
result11 = _mm_shuffle_epi8(symbol_i, shuffle_negated_1);
result12 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_1);
result13 = _mm_shuffle_epi8(symbol_abs2, shuffle_abs2_1);
result21 = _mm_shuffle_epi8(symbol_i, shuffle_negated_2);
result22 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_2);
result23 = _mm_shuffle_epi8(symbol_abs2, shuffle_abs2_2);
result31 = _mm_shuffle_epi8(symbol_i, shuffle_negated_3);
result32 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_3);
result33 = _mm_shuffle_epi8(symbol_abs2, shuffle_abs2_3);
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result11, result12),result13)); resultPtr++;
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result21, result22),result23)); resultPtr++;
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result31, result32),result33)); resultPtr++;
}
for (int i=8*(nsymbols/8);i<nsymbols;i++) {
float yre = (int8_t) (SCALE_BYTE_CONV_QAM64*crealf(symbols[i]));
float yim = (int8_t) (SCALE_BYTE_CONV_QAM64*cimagf(symbols[i]));
llr[6*i+0] = -yre;
llr[6*i+1] = -yim;
llr[6*i+2] = abs(yre)-4*SCALE_BYTE_CONV_QAM64/sqrt(42);
llr[6*i+3] = abs(yim)-4*SCALE_BYTE_CONV_QAM64/sqrt(42);
llr[6*i+4] = abs(llr[6*i+2])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
}
}
#endif
void demod_64qam_lte_s(const cf_t *symbols, short *llr, int nsymbols)
@ -240,6 +394,25 @@ void demod_64qam_lte_s(const cf_t *symbols, short *llr, int nsymbols)
#endif
}
void demod_64qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols)
{
#ifdef LV_HAVE_SSE
demod_64qam_lte_b_sse(symbols, llr, nsymbols);
#else
for (int i=0;i<nsymbols;i++) {
float yre = (int8_t) (SCALE_BYTE_CONV_QAM64*crealf(symbols[i]));
float yim = (int8_t) (SCALE_BYTE_CONV_QAM64*cimagf(symbols[i]));
llr[6*i+0] = -yre;
llr[6*i+1] = -yim;
llr[6*i+2] = abs(yre)-4*SCALE_BYTE_CONV_QAM64/sqrt(42);
llr[6*i+3] = abs(yim)-4*SCALE_BYTE_CONV_QAM64/sqrt(42);
llr[6*i+4] = abs(llr[6*i+2])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
}
#endif
}
int srslte_demod_soft_demodulate(srslte_mod_t modulation, const cf_t* symbols, float* llr, int nsymbols) {
switch(modulation) {
case SRSLTE_MOD_BPSK:
@ -281,3 +454,24 @@ int srslte_demod_soft_demodulate_s(srslte_mod_t modulation, const cf_t* symbols,
}
return 0;
}
int srslte_demod_soft_demodulate_b(srslte_mod_t modulation, const cf_t* symbols, int8_t* llr, int nsymbols) {
switch(modulation) {
case SRSLTE_MOD_BPSK:
demod_bpsk_lte_b(symbols, llr, nsymbols);
break;
case SRSLTE_MOD_QPSK:
demod_qpsk_lte_b(symbols, llr, nsymbols);
break;
case SRSLTE_MOD_16QAM:
demod_16qam_lte_b(symbols, llr, nsymbols);
break;
case SRSLTE_MOD_64QAM:
demod_64qam_lte_b(symbols, llr, nsymbols);
break;
default:
fprintf(stderr, "Invalid modulation %d\n", modulation);
return -1;
}
return 0;
}

@ -112,6 +112,7 @@ int main(int argc, char **argv) {
cf_t *symbols;
float *llr;
short *llr_s;
int8_t *llr_b;
parse_args(argc, argv);
@ -153,6 +154,12 @@ int main(int argc, char **argv) {
exit(-1);
}
llr_b = srslte_vec_malloc(sizeof(int8_t) * num_bits);
if (!llr_b) {
perror("malloc");
exit(-1);
}
/* generate random data */
srand(0);
@ -160,6 +167,7 @@ int main(int argc, char **argv) {
struct timeval t[3];
float mean_texec = 0.0;
float mean_texec_s = 0.0;
float mean_texec_b = 0.0;
for (int n=0;n<nof_frames;n++) {
for (i=0;i<num_bits;i++) {
input[i] = rand()%2;
@ -187,6 +195,15 @@ int main(int argc, char **argv) {
mean_texec_s = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec_s, n-1);
}
gettimeofday(&t[1], NULL);
srslte_demod_soft_demodulate_b(modulation, symbols, llr_b, num_bits / mod.nbits_x_symbol);
gettimeofday(&t[2], NULL);
get_time_interval(t);
if (n > 0) {
mean_texec_b = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec_b, n-1);
}
if (SRSLTE_VERBOSE_ISDEBUG()) {
printf("bits=");
srslte_vec_fprint_b(stdout, input, num_bits);
@ -200,6 +217,9 @@ int main(int argc, char **argv) {
printf("llr_s=");
srslte_vec_fprint_s(stdout, llr_s, num_bits);
printf("llr_b=");
srslte_vec_fprint_bs(stdout, llr_b, num_bits);
}
// Check demodulation errors
@ -213,6 +233,8 @@ int main(int argc, char **argv) {
ret = 0;
clean_exit:
free(llr_b);
free(llr_s);
free(llr);
free(symbols);
free(output);
@ -220,7 +242,7 @@ clean_exit:
srslte_modem_table_free(&mod);
printf("Mean Throughput: %.2f/%.2f. Mbps ExTime: %.2f/%.2f us\n",
num_bits/mean_texec, num_bits/mean_texec_s, mean_texec, mean_texec_s);
printf("Mean Throughput: %.2f/%.2f/%.2f. Mbps ExTime: %.2f/%.2f/%.2f us\n",
num_bits/mean_texec, num_bits/mean_texec_s, num_bits/mean_texec_b, mean_texec, mean_texec_s, mean_texec_b);
exit(ret);
}

@ -41,6 +41,10 @@
#include "srslte/phy/utils/vector.h"
#ifdef LV_HAVE_SSE
#include <immintrin.h>
#endif /* LV_HAVE_SSE */
#define MAX_PDSCH_RE(cp) (2 * SRSLTE_CP_NSYMB(cp) * 12)
@ -615,10 +619,10 @@ static srslte_sequence_t *get_user_sequence(srslte_pdsch_t *q, uint16_t rnti,
uint32_t rnti_idx = q->is_ue?0:rnti;
// The scrambling sequence is pregenerated for all RNTIs in the eNodeB but only for C-RNTI in the UE
if (q->users[rnti_idx] && q->users[rnti_idx]->sequence_generated &&
if (q->users[rnti_idx] &&
q->users[rnti_idx]->sequence_generated &&
q->users[rnti_idx]->cell_id == q->cell.id &&
q->ue_rnti == rnti &&
((rnti >= SRSLTE_CRNTI_START && rnti < SRSLTE_CRNTI_END) || !q->is_ue))
(!q->is_ue || q->ue_rnti == rnti))
{
return &q->users[rnti_idx]->seq[codeword_idx][sf_idx];
} else {
@ -669,31 +673,10 @@ static int srslte_pdsch_codeword_encode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *c
return SRSLTE_SUCCESS;
}
static int srslte_pdsch_codeword_decode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *cfg, srslte_sch_t *dl_sch,
srslte_softbuffer_rx_t *softbuffer, uint16_t rnti, uint8_t *data,
uint32_t codeword_idx, uint32_t tb_idx, bool *ack) {
srslte_ra_nbits_t *nbits = &cfg->nbits[tb_idx];
srslte_ra_mcs_t *mcs = &cfg->grant.mcs[tb_idx];
uint32_t rv = cfg->rv[tb_idx];
int ret = SRSLTE_ERROR_INVALID_INPUTS;
if (softbuffer && data && ack && nbits->nof_bits && nbits->nof_re) {
INFO("Decoding PDSCH SF: %d (CW%d -> TB%d), Mod %s, NofBits: %d, NofSymbols: %d, NofBitsE: %d, rv_idx: %d\n",
cfg->sf_idx, codeword_idx, tb_idx, srslte_mod_string(mcs->mod), mcs->tbs,
nbits->nof_re, nbits->nof_bits, rv);
/* demodulate symbols
* The MAX-log-MAP algorithm used in turbo decoding is unsensitive to SNR estimation,
* thus we don't need tot set it in the LLRs normalization
*/
srslte_demod_soft_demodulate_s(mcs->mod, q->d[codeword_idx], q->e[codeword_idx], nbits->nof_re);
/* Select scrambling sequence */
srslte_sequence_t *seq = get_user_sequence(q, rnti, codeword_idx, cfg->sf_idx, nbits->nof_bits);
/* Bit scrambling */
srslte_scrambling_s_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits);
static void csi_correction(srslte_pdsch_t *q, srslte_pdsch_cfg_t *cfg, uint32_t codeword_idx, uint32_t tb_idx, void *e)
{
srslte_ra_nbits_t *nbits = &cfg->nbits[tb_idx];
uint32_t qm = 0;
switch(cfg->grant.mcs[tb_idx].mod) {
@ -713,20 +696,120 @@ static int srslte_pdsch_codeword_decode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *c
ERROR("No modulation");
}
int16_t *e = q->e[codeword_idx];
if (q->csi_enabled) {
const uint32_t csi_max_idx = srslte_vec_max_fi(q->csi[codeword_idx], nbits->nof_bits / qm);
float csi_max = 1.0f;
if (csi_max_idx < nbits->nof_bits / qm) {
csi_max = q->csi[codeword_idx][csi_max_idx];
}
int8_t *e_b = e;
int16_t *e_s = e;
float *csi_v = q->csi[codeword_idx];
if (q->llr_is_8bit) {
for (int i = 0; i < nbits->nof_bits / qm; i++) {
const float csi = *(csi_v++) / csi_max;
for (int k = 0; k < qm; k++) {
*e_b = (int8_t) ((float) *e_b * csi);
e_b++;
}
}
} else {
int i = 0;
#ifdef LV_HAVE_SSE
__m128 _csi_scale = _mm_set1_ps(INT16_MAX / csi_max);
__m64 *_e = (__m64 *) e;
switch(cfg->grant.mcs[tb_idx].mod) {
case SRSLTE_MOD_QPSK:
for (; i < nbits->nof_bits - 3; i += 4) {
__m128 _csi1 = _mm_set1_ps(*(csi_v++));
__m128 _csi2 = _mm_set1_ps(*(csi_v++));
_csi1 = _mm_blend_ps(_csi1, _csi2, 3);
_csi1 = _mm_mul_ps(_csi1, _csi_scale);
_e[0] = _mm_mulhi_pi16(_e[0], _mm_cvtps_pi16(_csi1));
_e += 1;
}
break;
case SRSLTE_MOD_16QAM:
for (; i < nbits->nof_bits - 3; i += 4) {
__m128 _csi = _mm_set1_ps(*(csi_v++));
_csi = _mm_mul_ps(_csi, _csi_scale);
_e[0] = _mm_mulhi_pi16(_e[0], _mm_cvtps_pi16(_csi));
_e += 1;
}
break;
case SRSLTE_MOD_64QAM:
for (; i < nbits->nof_bits - 11; i += 12) {
__m128 _csi1 = _mm_set1_ps(*(csi_v++));
__m128 _csi3 = _mm_set1_ps(*(csi_v++));
_csi1 = _mm_mul_ps(_csi1, _csi_scale);
_csi3 = _mm_mul_ps(_csi3, _csi_scale);
__m128 _csi2 = _mm_blend_ps(_csi1, _csi3, 3);
_e[0] = _mm_mulhi_pi16(_e[0], _mm_cvtps_pi16(_csi1));
_e[1] = _mm_mulhi_pi16(_e[1], _mm_cvtps_pi16(_csi2));
_e[2] = _mm_mulhi_pi16(_e[2], _mm_cvtps_pi16(_csi3));
_e += 3;
}
break;
case SRSLTE_MOD_BPSK:
case SRSLTE_MOD_LAST:
/* Do nothing */
break;
}
i /= qm;
#endif /* LV_HAVE_SSE */
for (; i < nbits->nof_bits / qm; i++) {
const float csi = q->csi[codeword_idx][i] / csi_max;
for (int k = 0; k < qm; k++) {
e[qm * i + k] = (int16_t) ((float) e[qm * i + k] * csi);
e_s[qm * i + k] = (int16_t) ((float) e_s[qm * i + k] * csi);
}
}
}
}
static int srslte_pdsch_codeword_decode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *cfg, srslte_sch_t *dl_sch,
srslte_softbuffer_rx_t *softbuffer, uint16_t rnti, uint8_t *data,
uint32_t codeword_idx, uint32_t tb_idx, bool *ack) {
srslte_ra_nbits_t *nbits = &cfg->nbits[tb_idx];
srslte_ra_mcs_t *mcs = &cfg->grant.mcs[tb_idx];
uint32_t rv = cfg->rv[tb_idx];
int ret = SRSLTE_ERROR_INVALID_INPUTS;
if (softbuffer && data && ack && nbits->nof_bits && nbits->nof_re) {
INFO("Decoding PDSCH SF: %d (CW%d -> TB%d), Mod %s, NofBits: %d, NofSymbols: %d, NofBitsE: %d, rv_idx: %d\n",
cfg->sf_idx, codeword_idx, tb_idx, srslte_mod_string(mcs->mod), mcs->tbs,
nbits->nof_re, nbits->nof_bits, rv);
/* demodulate symbols
* The MAX-log-MAP algorithm used in turbo decoding is unsensitive to SNR estimation,
* thus we don't need tot set it in the LLRs normalization
*/
if (q->llr_is_8bit) {
srslte_demod_soft_demodulate_b(mcs->mod, q->d[codeword_idx], q->e[codeword_idx], nbits->nof_re);
} else {
srslte_demod_soft_demodulate_s(mcs->mod, q->d[codeword_idx], q->e[codeword_idx], nbits->nof_re);
}
/* Select scrambling sequence */
srslte_sequence_t *seq = get_user_sequence(q, rnti, codeword_idx, cfg->sf_idx, nbits->nof_bits);
/* Bit scrambling */
if (q->llr_is_8bit) {
srslte_scrambling_sb_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits);
} else {
srslte_scrambling_s_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits);
}
if (q->csi_enabled) {
csi_correction(q, cfg, codeword_idx, tb_idx, q->e[codeword_idx]);
}
/* Return */

@ -654,7 +654,15 @@ int srslte_prach_detect_offset(srslte_prach_t *p,
peak_to_avg[*n_indices] = p->peak_values[j] / corr_ave;
}
if (t_offsets) {
t_offsets[*n_indices] = (float) p->peak_offsets[j] * p->T_seq / p->N_zc;
float corr = 1.8;
if (p->peak_offsets[j] > 30) {
corr = 1.9;
}
if (p->peak_offsets[j] > 250) {
corr = 1.91;
}
t_offsets[*n_indices] = corr*p->peak_offsets[j]/(DELTA_F_RA * p->N_zc);
}
(*n_indices)++;
}

@ -416,7 +416,7 @@ void srslte_pucch_set_threshold(srslte_pucch_t *q, float format1_threshold) {
}
/** Initializes the PDCCH transmitter and receiver */
int srslte_pucch_init(srslte_pucch_t *q) {
int srslte_pucch_init_(srslte_pucch_t *q, bool is_ue) {
int ret = SRSLTE_ERROR_INVALID_INPUTS;
if (q != NULL) {
ret = SRSLTE_ERROR;
@ -426,17 +426,26 @@ int srslte_pucch_init(srslte_pucch_t *q) {
return SRSLTE_ERROR;
}
q->users = calloc(sizeof(srslte_pucch_user_t*), 1+SRSLTE_SIRNTI);
q->is_ue = is_ue;
q->users = calloc(sizeof(srslte_pucch_user_t*), q->is_ue?1:(1+SRSLTE_SIRNTI));
if (!q->users) {
perror("malloc");
goto clean_exit;
}
if (srslte_sequence_init(&q->tmp_seq, 20)) {
goto clean_exit;
}
srslte_uci_cqi_pucch_init(&q->cqi);
q->z = srslte_vec_malloc(sizeof(cf_t)*SRSLTE_PUCCH_MAX_SYMBOLS);
q->z_tmp = srslte_vec_malloc(sizeof(cf_t)*SRSLTE_PUCCH_MAX_SYMBOLS);
if (!q->is_ue) {
q->ce = srslte_vec_malloc(sizeof(cf_t)*SRSLTE_PUCCH_MAX_SYMBOLS);
}
q->threshold_format1 = 0.8;
@ -449,13 +458,28 @@ clean_exit:
return ret;
}
int srslte_pucch_init_ue(srslte_pucch_t *q) {
return srslte_pucch_init_(q, true);
}
int srslte_pucch_init_enb(srslte_pucch_t *q) {
return srslte_pucch_init_(q, false);
}
void srslte_pucch_free(srslte_pucch_t *q) {
if (q->users) {
if (q->is_ue) {
srslte_pucch_clear_rnti(q, 0);
} else {
for (int rnti = 0; rnti <= SRSLTE_SIRNTI; rnti++) {
srslte_pucch_clear_rnti(q, rnti);
}
}
free(q->users);
}
srslte_sequence_free(&q->tmp_seq);
srslte_uci_cqi_pucch_free(&q->cqi);
if (q->z) {
free(q->z);
@ -497,29 +521,43 @@ int srslte_pucch_set_cell(srslte_pucch_t *q, srslte_cell_t cell) {
void srslte_pucch_clear_rnti(srslte_pucch_t *q, uint16_t rnti) {
if (q->users[rnti]) {
uint32_t rnti_idx = q->is_ue?0:rnti;
if (q->users[rnti_idx]) {
for (int i = 0; i < SRSLTE_NSUBFRAMES_X_FRAME; i++) {
srslte_sequence_free(&q->users[rnti]->seq_f2[i]);
srslte_sequence_free(&q->users[rnti_idx]->seq_f2[i]);
}
free(q->users[rnti]);
q->users[rnti] = NULL;
free(q->users[rnti_idx]);
q->users[rnti_idx] = NULL;
q->ue_rnti = 0;
}
}
int srslte_pucch_set_crnti(srslte_pucch_t *q, uint16_t rnti) {
if (!q->users[rnti]) {
q->users[rnti] = calloc(1, sizeof(srslte_pucch_user_t));
if (q->users[rnti]) {
uint32_t rnti_idx = q->is_ue?0:rnti;
if (!q->users[rnti_idx] || q->is_ue) {
if (!q->users[rnti_idx]) {
q->users[rnti_idx] = calloc(1, sizeof(srslte_pucch_user_t));
if (!q->users[rnti_idx]) {
perror("calloc");
return -1;
}
}
q->users[rnti_idx]->sequence_generated = false;
for (uint32_t sf_idx=0;sf_idx<SRSLTE_NSUBFRAMES_X_FRAME;sf_idx++) {
// Precompute scrambling sequence for pucch format 2
if (srslte_sequence_pucch(&q->users[rnti]->seq_f2[sf_idx], rnti, 2*sf_idx, q->cell.id)) {
if (srslte_sequence_pucch(&q->users[rnti_idx]->seq_f2[sf_idx], rnti, 2*sf_idx, q->cell.id)) {
fprintf(stderr, "Error computing PUCCH Format 2 scrambling sequence\n");
srslte_pucch_clear_rnti(q, rnti);
return SRSLTE_ERROR;
}
}
q->users[rnti]->sequence_generated = true;
}
q->ue_rnti = rnti;
q->users[rnti_idx]->cell_id = q->cell.id;
q->users[rnti_idx]->sequence_generated = true;
} else {
fprintf(stderr, "Error generating PUSCH sequence: rnti=0x%x already generated\n", rnti);
}
return SRSLTE_SUCCESS;
}
@ -592,11 +630,36 @@ int srslte_pucch_format2ab_mod_bits(srslte_pucch_format_t format, uint8_t bits[2
}
}
static srslte_sequence_t *get_user_sequence(srslte_pucch_t *q, uint16_t rnti, uint32_t sf_idx)
{
uint32_t rnti_idx = q->is_ue?0:rnti;
// The scrambling sequence is pregenerated for all RNTIs in the eNodeB but only for C-RNTI in the UE
if (rnti >= SRSLTE_CRNTI_START && rnti < SRSLTE_CRNTI_END) {
if (q->users[rnti_idx] &&
q->users[rnti_idx]->sequence_generated &&
q->users[rnti_idx]->cell_id == q->cell.id &&
(!q->is_ue || q->ue_rnti == rnti))
{
return &q->users[rnti_idx]->seq_f2[sf_idx];
} else {
if (srslte_sequence_pucch(&q->tmp_seq, rnti, 2 * sf_idx, q->cell.id)) {
fprintf(stderr, "Error computing PUCCH Format 2 scrambling sequence\n");
return NULL;
}
return &q->tmp_seq;
}
} else {
fprintf(stderr, "Invalid RNTI=0x%x\n", rnti);
return NULL;
}
}
/* Encode PUCCH bits according to Table 5.4.1-1 in Section 5.4.1 of 36.211 */
static int uci_mod_bits(srslte_pucch_t *q, srslte_pucch_format_t format, uint8_t bits[SRSLTE_PUCCH_MAX_BITS], uint32_t sf_idx, uint16_t rnti)
{
uint8_t tmp[2];
srslte_sequence_t *seq;
switch(format) {
case SRSLTE_PUCCH_FORMAT_1:
q->d[0] = uci_encode_format1();
@ -612,12 +675,13 @@ static int uci_mod_bits(srslte_pucch_t *q, srslte_pucch_format_t format, uint8_t
case SRSLTE_PUCCH_FORMAT_2:
case SRSLTE_PUCCH_FORMAT_2A:
case SRSLTE_PUCCH_FORMAT_2B:
if (q->users[rnti] && q->users[rnti]->sequence_generated) {
seq = get_user_sequence(q, rnti, sf_idx);
if (seq) {
memcpy(q->bits_scram, bits, SRSLTE_PUCCH2_NOF_BITS*sizeof(uint8_t));
srslte_scrambling_b(&q->users[rnti]->seq_f2[sf_idx], q->bits_scram);
srslte_scrambling_b(seq, q->bits_scram);
srslte_mod_modulate(&q->mod, q->bits_scram, q->d, SRSLTE_PUCCH2_NOF_BITS);
} else {
fprintf(stderr, "Error modulating PUCCH2 bits: rnti not set\n");
fprintf(stderr, "Error modulating PUCCH2 bits: could not generate sequence\n");
return -1;
}
break;
@ -745,6 +809,8 @@ int srslte_pucch_decode(srslte_pucch_t* q, srslte_pucch_format_t format,
uint8_t bits[SRSLTE_PUCCH_MAX_BITS], uint32_t nof_bits)
{
int ret = SRSLTE_ERROR_INVALID_INPUTS;
srslte_sequence_t *seq;
if (q != NULL &&
ce != NULL &&
sf_symbols != NULL)
@ -843,7 +909,8 @@ int srslte_pucch_decode(srslte_pucch_t* q, srslte_pucch_format_t format,
case SRSLTE_PUCCH_FORMAT_2:
case SRSLTE_PUCCH_FORMAT_2A:
case SRSLTE_PUCCH_FORMAT_2B:
if (q->users[rnti] && q->users[rnti]->sequence_generated) {
seq = get_user_sequence(q, rnti, sf_idx);
if (seq) {
pucch_encode_(q, format, n_pucch, sf_idx, rnti, NULL, ref, true);
srslte_vec_prod_conj_ccc(q->z, ref, q->z_tmp, SRSLTE_PUCCH_MAX_SYMBOLS);
for (int i=0;i<SRSLTE_PUCCH2_NOF_BITS/2;i++) {
@ -853,11 +920,11 @@ int srslte_pucch_decode(srslte_pucch_t* q, srslte_pucch_format_t format,
}
}
srslte_demod_soft_demodulate_s(SRSLTE_MOD_QPSK, q->z, llr_pucch2, SRSLTE_PUCCH2_NOF_BITS/2);
srslte_scrambling_s(&q->users[rnti]->seq_f2[sf_idx], llr_pucch2);
srslte_scrambling_s(seq, llr_pucch2);
q->last_corr = (float) srslte_uci_decode_cqi_pucch(&q->cqi, llr_pucch2, bits, nof_bits)/2000;
ret = 1;
} else {
fprintf(stderr, "Decoding PUCCH2: rnti not set\n");
fprintf(stderr, "Decoding PUCCH2: could not generate sequence\n");
return -1;
}
break;

@ -481,17 +481,25 @@ static srslte_sequence_t *get_user_sequence(srslte_pusch_t *q, uint16_t rnti, ui
{
uint32_t rnti_idx = q->is_ue?0:rnti;
if (rnti >= SRSLTE_CRNTI_START && rnti < SRSLTE_CRNTI_END) {
// The scrambling sequence is pregenerated for all RNTIs in the eNodeB but only for C-RNTI in the UE
if (q->users[rnti_idx] && q->users[rnti_idx]->sequence_generated &&
if (q->users[rnti_idx] &&
q->users[rnti_idx]->sequence_generated &&
q->users[rnti_idx]->cell_id == q->cell.id &&
q->ue_rnti == rnti &&
((rnti >= SRSLTE_CRNTI_START && rnti < SRSLTE_CRNTI_END) || !q->is_ue))
(!q->is_ue || q->ue_rnti == rnti))
{
return &q->users[rnti_idx]->seq[sf_idx];
} else {
srslte_sequence_pusch(&q->tmp_seq, rnti, 2 * sf_idx, q->cell.id, len);
if (srslte_sequence_pusch(&q->tmp_seq, rnti, 2 * sf_idx, q->cell.id, len)) {
fprintf(stderr, "Error generating temporal scrambling sequence\n");
return NULL;
}
return &q->tmp_seq;
}
} else {
fprintf(stderr, "Invalid RNTI=0x%x\n", rnti);
return NULL;
}
}
/** Converts the PUSCH data bits to symbols mapped to the slot ready for transmission
@ -603,7 +611,11 @@ int srslte_pusch_decode(srslte_pusch_t *q,
srslte_dft_precoding(&q->dft_precoding, q->z, q->d, cfg->grant.L_prb, cfg->nbits.nof_symb);
// Soft demodulation
if (q->llr_is_8bit) {
srslte_demod_soft_demodulate_b(cfg->grant.mcs.mod, q->d, q->q, cfg->nbits.nof_re);
} else {
srslte_demod_soft_demodulate_s(cfg->grant.mcs.mod, q->d, q->q, cfg->nbits.nof_re);
}
// Generate scrambling sequence if not pre-generated
srslte_sequence_t *seq = get_user_sequence(q, rnti, cfg->sf_idx, cfg->nbits.nof_bits);
@ -632,7 +644,11 @@ int srslte_pusch_decode(srslte_pusch_t *q,
}
// Descrambling
if (q->llr_is_8bit) {
srslte_scrambling_sb_offset(seq, q->q, 0, cfg->nbits.nof_bits);
} else {
srslte_scrambling_s_offset(seq, q->q, 0, cfg->nbits.nof_bits);
}
// Decode
ret = srslte_ulsch_uci_decode(&q->ul_sch, cfg, softbuffer, q->q, q->g, data, uci_data);

@ -32,12 +32,18 @@
#include <stdbool.h>
#include <assert.h>
#include <math.h>
#include <srslte/phy/phch/sch.h>
#include "srslte/phy/phch/pdsch.h"
#include "srslte/phy/utils/bit.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/vector.h"
#define SRSLTE_PDSCH_MAX_TDEC_ITERS 4
#define SRSLTE_PDSCH_MAX_TDEC_ITERS 10
#ifdef LV_HAVE_SSE
#include <immintrin.h>
#endif /* LV_HAVE_SSE */
/* 36.213 Table 8.6.3-1: Mapping of HARQ-ACK offset values and the index signalled by higher layers */
float beta_harq_offset[16] = {2.0, 2.5, 3.125, 4.0, 5.0, 6.250, 8.0, 10.0,
@ -184,8 +190,6 @@ static int encode_tb_off(srslte_sch_t *q,
uint32_t Qm, uint32_t rv, uint32_t nof_e_bits,
uint8_t *data, uint8_t *e_bits, uint32_t w_offset)
{
uint8_t parity[3] = {0, 0, 0};
uint32_t par;
uint32_t i;
uint32_t cb_len=0, rp=0, wp=0, rlen=0, n_e=0;
int ret = SRSLTE_ERROR_INVALID_INPUTS;
@ -213,16 +217,8 @@ static int encode_tb_off(srslte_sch_t *q,
gamma = Gp%cb_segm->C;
}
if (data) {
/* Compute transport block CRC */
par = srslte_crc_checksum_byte(&q->crc_tb, data, cb_segm->tbs);
/* parity bits will be appended later */
parity[0] = (par&(0xff<<16))>>16;
parity[1] = (par&(0xff<<8))>>8;
parity[2] = par&0xff;
}
/* Reset TB CRC */
srslte_crc_set_init(&q->crc_tb, 0);
wp = 0;
rp = 0;
@ -252,6 +248,7 @@ static int encode_tb_off(srslte_sch_t *q,
cb_len, rlen, wp, rp, n_e);
if (data) {
bool last_cb = false;
/* Copy data to another buffer, making space for the Codeblock CRC */
if (i < cb_segm->C - 1) {
@ -263,13 +260,19 @@ static int encode_tb_off(srslte_sch_t *q,
/* Append Transport Block parity bits to the last CB */
memcpy(q->cb_in, &data[rp/8], (rlen - 24) * sizeof(uint8_t)/8);
memcpy(&q->cb_in[(rlen - 24)/8], parity, 3 * sizeof(uint8_t));
last_cb = true;
}
/* Turbo Encoding
* If Codeblock CRC is required it is given the CRC instance pointer, otherwise CRC pointer shall be NULL
*/
srslte_tcod_encode_lut(&q->encoder, (cb_segm->C > 1) ? &q->crc_cb : NULL, q->cb_in, q->parity_bits, cblen_idx);
srslte_tcod_encode_lut(&q->encoder,
&q->crc_tb,
(cb_segm->C > 1) ? &q->crc_cb : NULL,
q->cb_in,
q->parity_bits,
cblen_idx,
last_cb);
}
DEBUG("RM cblen_idx=%d, n_e=%d, wp=%d, nof_e_bits=%d\n",cblen_idx, n_e, wp, nof_e_bits);
@ -304,88 +307,65 @@ static int encode_tb(srslte_sch_t *q,
bool decode_tb_cb(srslte_sch_t *q,
srslte_softbuffer_rx_t *softbuffer, srslte_cbsegm_t *cb_segm,
uint32_t Qm, uint32_t rv, uint32_t nof_e_bits,
int16_t *e_bits, uint8_t *data,
uint32_t cb_size_group)
void *e_bits, uint8_t *data)
{
bool cb_map[SRSLTE_MAX_CODEBLOCKS];
uint32_t cb_idx[SRSLTE_TDEC_MAX_NPAR];
int16_t *decoder_input[SRSLTE_TDEC_MAX_NPAR];
int8_t *e_bits_b = e_bits;
int16_t *e_bits_s = e_bits;
uint32_t nof_cb = cb_size_group?cb_segm->C2:cb_segm->C1;
uint32_t first_cb = cb_size_group?cb_segm->C1:0;
uint32_t cb_len = cb_size_group?cb_segm->K2:cb_segm->K1;
uint32_t cb_len_idx = cb_size_group?cb_segm->K2_idx:cb_segm->K1_idx;
uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24);
uint32_t Gp = nof_e_bits / Qm;
uint32_t gamma = cb_segm->C>0?Gp%cb_segm->C:Gp;
uint32_t n_e = Qm * (Gp/cb_segm->C);
if (nof_cb > SRSLTE_MAX_CODEBLOCKS) {
if (cb_segm->C > SRSLTE_MAX_CODEBLOCKS) {
fprintf(stderr, "Error SRSLTE_MAX_CODEBLOCKS=%d\n", SRSLTE_MAX_CODEBLOCKS);
return false;
}
for (int i=0;i<srslte_tdec_get_nof_parallel(&q->decoder);i++) {
cb_idx[i] = i+first_cb;
decoder_input[i] = NULL;
}
uint32_t remaining_cb = 0;
for (int i=0;i<nof_cb;i++) {
/* Do not process blocks with CRC Ok */
cb_map[i] = softbuffer->cb_crc[i];
if (softbuffer->cb_crc[i] == false) {
remaining_cb ++;
}
}
srslte_tdec_reset(&q->decoder, cb_len);
q->nof_iterations = 0;
while(remaining_cb>0) {
for (int cb_idx=0;cb_idx<cb_segm->C;cb_idx++)
{
/* Do not process blocks with CRC Ok */
if (softbuffer->cb_crc[cb_idx] == false) {
// Unratematch the codeblocks left to decode
for (int i=0;i<srslte_tdec_get_nof_parallel(&q->decoder);i++) {
uint32_t cb_len = cb_idx<cb_segm->C1?cb_segm->K1:cb_segm->K2;
uint32_t cb_len_idx = cb_idx<cb_segm->C1?cb_segm->K1_idx:cb_segm->K2_idx;
if (!decoder_input[i] && remaining_cb > 0) {
// Find an unprocessed CB
cb_idx[i]=first_cb;
while(cb_idx[i]<first_cb+nof_cb-1 && cb_map[cb_idx[i]]) {
cb_idx[i]++;
}
if (cb_map[cb_idx[i]] == false) {
cb_map[cb_idx[i]] = true;
uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24);
uint32_t Gp = nof_e_bits / Qm;
uint32_t gamma = cb_segm->C>0?Gp%cb_segm->C:Gp;
uint32_t n_e = Qm * (Gp/cb_segm->C);
uint32_t rp = cb_idx[i]*n_e;
uint32_t rp = cb_idx*n_e;
uint32_t n_e2 = n_e;
if (cb_idx[i] > cb_segm->C - gamma) {
if (cb_idx > cb_segm->C - gamma) {
n_e2 = n_e+Qm;
rp = (cb_segm->C - gamma)*n_e + (cb_idx[i]-(cb_segm->C - gamma))*n_e2;
rp = (cb_segm->C - gamma)*n_e + (cb_idx-(cb_segm->C - gamma))*n_e2;
}
INFO("CB %d: rp=%d, n_e=%d, i=%d\n", cb_idx[i], rp, n_e2, i);
if (srslte_rm_turbo_rx_lut(&e_bits[rp], softbuffer->buffer_f[cb_idx[i]], n_e2, cb_len_idx, rv)) {
if (q->llr_is_8bit) {
if (srslte_rm_turbo_rx_lut_8bit(&e_bits_b[rp], (int8_t*) softbuffer->buffer_f[cb_idx], n_e2, cb_len_idx, rv)) {
fprintf(stderr, "Error in rate matching\n");
return SRSLTE_ERROR;
}
decoder_input[i] = softbuffer->buffer_f[cb_idx[i]];
}
} else {
if (srslte_rm_turbo_rx_lut(&e_bits_s[rp], softbuffer->buffer_f[cb_idx], n_e2, cb_len_idx, rv)) {
fprintf(stderr, "Error in rate matching\n");
return SRSLTE_ERROR;
}
}
// Run 1 iteration for the codeblocks in queue
srslte_tdec_iteration_par(&q->decoder, decoder_input, cb_len);
srslte_tdec_new_cb(&q->decoder, cb_len);
// Decide output bits and compute CRC
for (int i=0;i<srslte_tdec_get_nof_parallel(&q->decoder);i++) {
if (decoder_input[i]) {
srslte_tdec_decision_byte_par_cb(&q->decoder, q->cb_in, i, cb_len);
// Run iterations and use CRC for early stopping
bool early_stop = false;
uint32_t cb_noi = 0;
do {
if (q->llr_is_8bit) {
srslte_tdec_iteration_8bit(&q->decoder, (int8_t*) softbuffer->buffer_f[cb_idx], &data[cb_idx*rlen/8]);
} else {
srslte_tdec_iteration(&q->decoder, softbuffer->buffer_f[cb_idx], &data[cb_idx*rlen/8]);
}
q->nof_iterations++;
cb_noi++;
uint32_t len_crc;
srslte_crc_t *crc_ptr;
@ -399,47 +379,45 @@ bool decode_tb_cb(srslte_sch_t *q,
}
// CRC is OK
if (!srslte_crc_checksum_byte(crc_ptr, q->cb_in, len_crc)) {
if (!srslte_crc_checksum_byte(crc_ptr, &data[cb_idx*rlen/8], len_crc)) {
memcpy(softbuffer->data[cb_idx[i]], q->cb_in, rlen/8 * sizeof(uint8_t));
softbuffer->cb_crc[cb_idx[i]] = true;
q->nof_iterations += srslte_tdec_get_nof_iterations_cb(&q->decoder, i);
// Reset number of iterations for that CB in the decoder
srslte_tdec_reset_cb(&q->decoder, i);
remaining_cb--;
decoder_input[i] = NULL;
cb_idx[i] = 0;
softbuffer->cb_crc[cb_idx] = true;
early_stop = true;
// CRC is error and exceeded maximum iterations for this CB.
// Early stop the whole transport block.
} else if (srslte_tdec_get_nof_iterations_cb(&q->decoder, i) >= q->max_iterations) {
INFO("CB %d: Error. CB is erroneous. remaining_cb=%d, i=%d, first_cb=%d, nof_cb=%d\n",
cb_idx[i], remaining_cb, i, first_cb, nof_cb);
q->nof_iterations += q->max_iterations;
srslte_tdec_reset_cb(&q->decoder, i);
remaining_cb--;
decoder_input[i] = NULL;
cb_idx[i] = 0;
}
}
} while (cb_noi < q->max_iterations && !early_stop);
INFO("CB %d: rp=%d, n_e=%d, cb_len=%d, CRC=%s, rlen=%d, iterations=%d/%d\n",
cb_idx, rp, n_e2, cb_len, early_stop?"OK":"KO", rlen, cb_noi, q->max_iterations);
} else {
// Copy decoded data from previous transmissions
uint32_t cb_len = cb_idx<cb_segm->C1?cb_segm->K1:cb_segm->K2;
uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24);
memcpy(&data[cb_idx*rlen/8], softbuffer->data[cb_idx], rlen/8 * sizeof(uint8_t));
}
}
softbuffer->tb_crc = true;
for (int i = 0; i < nof_cb && softbuffer->tb_crc; i++) {
for (int i = 0; i < cb_segm->C && softbuffer->tb_crc; i++) {
/* If one CB failed return false */
softbuffer->tb_crc = softbuffer->cb_crc[i];
}
if (softbuffer->tb_crc) {
for (int i = 0; i < nof_cb; i++) {
memcpy(&data[i * rlen / 8], softbuffer->data[i], rlen/8 * sizeof(uint8_t));
// If TB CRC failed, save correct CB for next retransmission
if (!softbuffer->tb_crc) {
for (int i = 0; i < cb_segm->C; i++) {
if (softbuffer->cb_crc[i]) {
uint32_t cb_len = i<cb_segm->C1?cb_segm->K1:cb_segm->K2;
uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24);
memcpy(softbuffer->data[i], &data[i * rlen / 8], rlen/8 * sizeof(uint8_t));
}
}
}
q->nof_iterations /= nof_cb;
q->nof_iterations /= cb_segm->C;
return softbuffer->tb_crc;
}
@ -485,16 +463,12 @@ static int decode_tb(srslte_sch_t *q,
bool crc_ok = true;
uint32_t nof_cb_groups = cb_segm->C2>0?2:1;
data[cb_segm->tbs/8+0] = 0;
data[cb_segm->tbs/8+1] = 0;
data[cb_segm->tbs/8+2] = 0;
// Process Codeblocks in groups of equal CB size to parallelize according to SRSLTE_TDEC_MAX_NPAR
for (uint32_t i=0;i<nof_cb_groups && crc_ok;i++) {
crc_ok = decode_tb_cb(q, softbuffer, cb_segm, Qm, rv, nof_e_bits, e_bits, data, i);
}
// Process Codeblocks
crc_ok = decode_tb_cb(q, softbuffer, cb_segm, Qm, rv, nof_e_bits, e_bits, data);
if (crc_ok) {
@ -593,22 +567,286 @@ static void ulsch_interleave_gen(uint32_t H_prime_total, uint32_t N_pusch_symbs,
}
}
static void ulsch_interleave_qm2(const uint8_t *g_bits, uint32_t rows, uint32_t cols, uint8_t *q_bits, uint32_t ri_min_row, const uint8_t *ri_present) {
uint32_t bit_read_idx = 0;
for (uint32_t j = 0; j < ri_min_row; j++) {
for (uint32_t i = 0; i < cols; i++) {
uint32_t k = (i * rows + j) * 2;
uint32_t read_byte_idx = bit_read_idx / 8;
uint32_t read_bit_idx = bit_read_idx % 8;
uint32_t write_byte_idx = k / 8;
uint32_t write_bit_idx = k % 8;
uint8_t w = (g_bits[read_byte_idx] >> (6 - read_bit_idx)) & (uint8_t) 0x03;
q_bits[write_byte_idx] |= w << (6 - write_bit_idx);
bit_read_idx += 2;
}
}
for (uint32_t j = ri_min_row; j < rows; j++) {
for (uint32_t i = 0; i < cols; i++) {
uint32_t k = (i * rows + j) * 2;
if (ri_present[k]) {
/* do nothing */
} else {
uint32_t read_byte_idx = bit_read_idx / 8;
uint32_t read_bit_idx = bit_read_idx % 8;
uint32_t write_byte_idx = k / 8;
uint32_t write_bit_idx = k % 8;
uint8_t w = (g_bits[read_byte_idx] >> (6 - read_bit_idx)) & (uint8_t) 0x03;
q_bits[write_byte_idx] |= w << (6 - write_bit_idx);
bit_read_idx += 2;
}
}
}
}
static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols, uint8_t *q_bits, uint32_t ri_min_row, const uint8_t *ri_present) {
uint32_t bit_read_idx = 0;
for (uint32_t j = 0; j < ri_min_row; j++) {
int32_t i = 0;
#ifndef LV_HAVE_SSE
__m128i _counter = _mm_slli_epi32(_mm_add_epi32(_mm_mullo_epi32(_counter0,_rows),_mm_set1_epi32(j)), 2);
uint8_t *_g_bits = &g_bits[bit_read_idx/8];
/* First bits are aligned to byte */
if (0 == (bit_read_idx & 0x3)) {
for (; i < (cols - 3); i += 4) {
uint8_t w1 = *(_g_bits++);
uint8_t w2 = *(_g_bits++);
__m128i _write_byte_idx = _mm_srli_epi32(_counter, 3);
__m128i _write_bit_idx = _mm_and_si128(_counter, _7);
__m128i _write_shift = _mm_sub_epi32(_4, _write_bit_idx);
q_bits[_mm_extract_epi32(_write_byte_idx, 0)] |= (w1 >> 0x4) << _mm_extract_epi32(_write_shift, 0);
q_bits[_mm_extract_epi32(_write_byte_idx, 1)] |= (w1 & 0xf) << _mm_extract_epi32(_write_shift, 1);
q_bits[_mm_extract_epi32(_write_byte_idx, 2)] |= (w2 >> 0x4) << _mm_extract_epi32(_write_shift, 2);
q_bits[_mm_extract_epi32(_write_byte_idx, 3)] |= (w2 & 0xf) << _mm_extract_epi32(_write_shift, 3);
_counter = _mm_add_epi32(_counter, _inc);
}
} else {
for (; i < (cols - 3); i += 4) {
__m128i _write_byte_idx = _mm_srli_epi32(_counter, 3);
__m128i _write_bit_idx = _mm_and_si128(_counter, _7);
__m128i _write_shift = _mm_sub_epi32(_4, _write_bit_idx);
uint8_t w1 = *(_g_bits);
uint8_t w2 = *(_g_bits++);
uint8_t w3 = *(_g_bits++);
q_bits[_mm_extract_epi32(_write_byte_idx, 0)] |= (w1 & 0xf) << _mm_extract_epi32(_write_shift, 0);
q_bits[_mm_extract_epi32(_write_byte_idx, 1)] |= (w2 >> 0x4) << _mm_extract_epi32(_write_shift, 1);
q_bits[_mm_extract_epi32(_write_byte_idx, 2)] |= (w2 & 0xf) << _mm_extract_epi32(_write_shift, 2);
q_bits[_mm_extract_epi32(_write_byte_idx, 3)] |= (w3 >> 0x4) << _mm_extract_epi32(_write_shift, 3);
_counter = _mm_add_epi32(_counter, _inc);
}
}
bit_read_idx += i * 4;
#endif /* LV_HAVE_SSE */
/* Spare bits */
for (; i < cols; i++) {
uint32_t k = (i * rows + j) * 4;
uint32_t read_byte_idx = bit_read_idx / 8;
uint32_t read_bit_idx = bit_read_idx % 8;
uint32_t write_byte_idx = k / 8;
uint32_t write_bit_idx = k % 8;
uint8_t w = (g_bits[read_byte_idx] >> (4 - read_bit_idx)) & (uint8_t) 0x0f;
q_bits[write_byte_idx] |= w << (4 - write_bit_idx);
bit_read_idx += 4;
}
}
/* Do rows containing RI */
for (uint32_t j = ri_min_row; j < rows; j++) {
for (uint32_t i = 0; i < cols; i++) {
uint32_t k = (i * rows + j) * 4;
if (ri_present[k]) {
/* do nothing */
} else {
uint32_t read_byte_idx = bit_read_idx / 8;
uint32_t read_bit_idx = bit_read_idx % 8;
uint32_t write_byte_idx = k / 8;
uint32_t write_bit_idx = k % 8;
uint8_t w = (g_bits[read_byte_idx] >> (4 - read_bit_idx)) & (uint8_t) 0x0f;
q_bits[write_byte_idx] |= w << (4 - write_bit_idx);
bit_read_idx += 4;
}
}
}
}
static void ulsch_interleave_qm6(const uint8_t *g_bits,
uint32_t rows,
uint32_t cols,
uint8_t *q_bits,
uint32_t ri_min_row,
const uint8_t *ri_present) {
uint32_t bit_read_idx = 0;
for (uint32_t j = 0; j < ri_min_row; j++) {
for (uint32_t i = 0; i < cols; i++) {
uint32_t k = (i * rows + j) * 6;
uint32_t read_byte_idx = bit_read_idx / 8;
uint32_t read_bit_idx = bit_read_idx % 8;
uint32_t write_byte_idx = k / 8;
uint32_t write_bit_idx = k % 8;
uint8_t w;
switch (read_bit_idx) {
case 0:
w = g_bits[read_byte_idx] >> 2;
break;
case 2:
w = g_bits[read_byte_idx] & (uint8_t) 0x3f;
break;
case 4:
w = ((g_bits[read_byte_idx] << 2) | (g_bits[read_byte_idx + 1] >> 6)) & (uint8_t) 0x3f;
break;
case 6:
w = ((g_bits[read_byte_idx] << 4) | (g_bits[read_byte_idx + 1] >> 4)) & (uint8_t) 0x3f;
break;
default:
w = 0;
}
switch (write_bit_idx) {
case 0:
q_bits[write_byte_idx] |= w << 2;
break;
case 2:
q_bits[write_byte_idx] |= w;
break;
case 4:
q_bits[write_byte_idx] |= w >> 2;
q_bits[write_byte_idx + 1] |= w << 6;
break;
case 6:
q_bits[write_byte_idx] |= w >> 4;
q_bits[write_byte_idx + 1] |= w << 4;
break;
default:
/* Do nothing */;
}
bit_read_idx += 6;
}
}
for (uint32_t j = ri_min_row; j < rows; j++) {
for (uint32_t i = 0; i < cols; i++) {
uint32_t k = (i * rows + j) * 6;
if (ri_present[k]) {
/* do nothing */
} else {
uint32_t read_byte_idx = bit_read_idx / 8;
uint32_t read_bit_idx = bit_read_idx % 8;
uint32_t write_byte_idx = k / 8;
uint32_t write_bit_idx = k % 8;
uint8_t w;
switch (read_bit_idx) {
case 0:
w = g_bits[read_byte_idx] >> 2;
break;
case 2:
w = g_bits[read_byte_idx] & (uint8_t) 0x3f;
break;
case 4:
w = ((g_bits[read_byte_idx] << 2) | (g_bits[read_byte_idx + 1] >> 6)) & (uint8_t) 0x3f;
break;
case 6:
w = ((g_bits[read_byte_idx] << 4) | (g_bits[read_byte_idx + 1] >> 4)) & (uint8_t) 0x3f;
break;
default:
w = 0;
}
switch (write_bit_idx) {
case 0:
q_bits[write_byte_idx] |= w << 2;
break;
case 2:
q_bits[write_byte_idx] |= w;
break;
case 4:
q_bits[write_byte_idx] |= w >> 2;
q_bits[write_byte_idx + 1] |= w << 6;
break;
case 6:
q_bits[write_byte_idx] |= w >> 4;
q_bits[write_byte_idx + 1] |= w << 4;
break;
default:
/* Do nothing */;
}
bit_read_idx += 6;
}
}
}
}
/* UL-SCH channel interleaver according to 5.2.2.8 of 36.212 */
void ulsch_interleave(uint8_t *g_bits, uint32_t Qm, uint32_t H_prime_total,
uint32_t N_pusch_symbs, uint8_t *q_bits, srslte_uci_bit_t *ri_bits, uint32_t nof_ri_bits,
uint8_t *ri_present, uint32_t *inteleaver_lut)
{
const uint32_t nof_bits = H_prime_total * Qm;
uint32_t rows = H_prime_total / N_pusch_symbs;
uint32_t cols = N_pusch_symbs;
uint32_t ri_min_row = rows;
// Prepare ri_bits for fast search using temp_buffer
if (nof_ri_bits > 0) {
for (uint32_t i=0;i<nof_ri_bits;i++) {
uint32_t ri_row = (ri_bits[i].position / Qm) % rows;
if (ri_row < ri_min_row) {
ri_min_row = ri_row;
}
ri_present[ri_bits[i].position] = 1;
}
}
#if 1
bzero(q_bits, nof_bits / 8);
switch (Qm) {
case 2:
ulsch_interleave_qm2(g_bits, rows, cols, q_bits, ri_min_row, ri_present);
break;
case 4:
ulsch_interleave_qm4(g_bits, rows, cols, q_bits, ri_min_row, ri_present);
break;
case 6:
ulsch_interleave_qm6(g_bits, rows, cols, q_bits, ri_min_row, ri_present);
break;
default:
/* This line should never be reached */
fprintf(stderr, "Wrong Qm (%d)\n", Qm);
}
#else
// Genearate interleaver table and interleave bits
ulsch_interleave_gen(H_prime_total, N_pusch_symbs, Qm, ri_present, inteleaver_lut);
srslte_bit_interleave_i(g_bits, q_bits, inteleaver_lut, H_prime_total*Qm);
#endif
// Reset temp_buffer because will be reused next time
if (nof_ri_bits > 0) {

@ -63,18 +63,23 @@ bool tb_cw_swap = false;
bool enable_coworker = false;
uint32_t pmi = 0;
char *input_file = NULL;
int M=1;
bool use_8_bit = false;
void usage(char *prog) {
printf("Usage: %s [fmMcsrtRFpnwav] \n", prog);
printf("Usage: %s [fmMbcsrtRFpnwav] \n", prog);
printf("\t-f read signal from file [Default generate it with pdsch_encode()]\n");
printf("\t-m MCS [Default %d]\n", mcs[0]);
printf("\t-M MCS2 [Default %d]\n", mcs[1]);
printf("\t-c cell id [Default %d]\n", cell.id);
printf("\t-b Use 8-bit LLR [Default 16-bit]\n");
printf("\t-s subframe [Default %d]\n", subframe);
printf("\t-r rv_idx [Default %d]\n", rv_idx[0]);
printf("\t-t rv_idx2 [Default %d]\n", rv_idx[1]);
printf("\t-R rnti [Default %d]\n", rnti);
printf("\t-F cfi [Default %d]\n", cfi);
printf("\t-X Number of repetitions for time measurement [Default %d]\n", M);
printf("\t-x Transmission mode [single|diversity|cdd|multiplex] [Default %s]\n", mimo_type_str);
printf("\t-n cell.nof_prb [Default %d]\n", cell.nof_prb);
printf("\t-a nof_rx_antennas [Default %d]\n", nof_rx_antennas);
@ -86,7 +91,7 @@ void usage(char *prog) {
void parse_args(int argc, char **argv) {
int opt;
while ((opt = getopt(argc, argv, "fmMcsrtRFpnawvxj")) != -1) {
while ((opt = getopt(argc, argv, "fmMcsbrtRFpnawvXxj")) != -1) {
switch(opt) {
case 'f':
input_file = argv[optind];
@ -94,12 +99,18 @@ void parse_args(int argc, char **argv) {
case 'm':
mcs[0] = (uint32_t) atoi(argv[optind]);
break;
case 'b':
use_8_bit = true;
break;
case 'M':
mcs[1] = (uint32_t) atoi(argv[optind]);
break;
case 's':
subframe = atoi(argv[optind]);
break;
case 'X':
M = (uint32_t) atoi(argv[optind]);
break;
case 'r':
rv_idx[0] = (uint32_t) atoi(argv[optind]);
break;
@ -166,7 +177,6 @@ int main(int argc, char **argv) {
int ret = -1;
struct timeval t[3];
srslte_softbuffer_tx_t *softbuffers_tx[SRSLTE_MAX_CODEWORDS];
int M=1;
bool acks[SRSLTE_MAX_CODEWORDS] = {false};
parse_args(argc,argv);
@ -319,6 +329,9 @@ int main(int argc, char **argv) {
goto quit;
}
pdsch_rx.llr_is_8bit = use_8_bit;
pdsch_rx.dl_sch.llr_is_8bit = use_8_bit;
srslte_pdsch_set_rnti(&pdsch_rx, rnti);
for (i = 0; i < SRSLTE_MAX_CODEWORDS; i++) {
@ -517,8 +530,8 @@ int main(int argc, char **argv) {
for (int byte = 0; byte < grant.mcs[tb].tbs / 8; byte++) {
if (data_tx[tb][byte] != data_rx[tb][byte]) {
ERROR("Found BYTE (%d) error in TB %d (%02X != %02X), quiting...", byte, tb, data_tx[tb][byte], data_rx[tb][byte]);
printf("Tx: "); srslte_vec_fprint_byte(stdout, data_tx[tb], grant.mcs[tb].tbs / 8);
printf("Rx: "); srslte_vec_fprint_byte(stdout, data_rx[tb], grant.mcs[tb].tbs / 8);
//printf("Tx: "); srslte_vec_fprint_byte(stdout, data_tx[tb], grant.mcs[tb].tbs / 8);
//printf("Rx: "); srslte_vec_fprint_byte(stdout, data_rx[tb], grant.mcs[tb].tbs / 8);
ret = SRSLTE_ERROR;
goto quit;
}

@ -65,7 +65,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
return;
}
srslte_pucch_t pucch;
if (srslte_pucch_init(&pucch, cell)) {
if (srslte_pucch_init_ue(&pucch, cell)) {
mexErrMsgTxt("Error initiating PUSCH\n");
return;
}

@ -149,7 +149,7 @@ int main(int argc, char **argv) {
return test_uci_cqi_pucch();
}
if (srslte_pucch_init(&pucch)) {
if (srslte_pucch_init_ue(&pucch)) {
fprintf(stderr, "Error creating PDSCH object\n");
exit(-1);
}

@ -67,7 +67,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
return;
}
srslte_pucch_t pucch;
if (srslte_pucch_init(&pucch, cell)) {
if (srslte_pucch_init_ue(&pucch, cell)) {
mexErrMsgTxt("Error initiating PUSCH\n");
return;
}

@ -25,22 +25,19 @@
*/
#include <libbladeRF.h>
#include <sys/time.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include "srslte/srslte.h"
#include "rf_blade_imp.h"
#include "srslte/phy/rf/rf.h"
#define CONVERT_BUFFER_SIZE 240*1024
#define CONVERT_BUFFER_SIZE (240*1024)
typedef struct {
struct bladerf *dev;
uint32_t rx_rate;
uint32_t tx_rate;
bladerf_sample_rate rx_rate;
bladerf_sample_rate tx_rate;
int16_t rx_buffer[CONVERT_BUFFER_SIZE];
int16_t tx_buffer[CONVERT_BUFFER_SIZE];
bool rx_stream_enabled;
@ -83,7 +80,7 @@ int rf_blade_start_tx_stream(void *h)
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
status = bladerf_sync_config(handler->dev,
BLADERF_MODULE_TX,
BLADERF_TX_X1,
BLADERF_FORMAT_SC16_Q11_META,
num_buffers,
buffer_size_tx,
@ -112,7 +109,7 @@ int rf_blade_start_rx_stream(void *h, bool now)
uint32_t buffer_size_rx = ms_buffer_size_rx*(handler->rx_rate/1000/1024);
status = bladerf_sync_config(handler->dev,
BLADERF_MODULE_RX,
BLADERF_RX_X1,
BLADERF_FORMAT_SC16_Q11_META,
num_buffers,
buffer_size_rx,
@ -123,7 +120,7 @@ int rf_blade_start_rx_stream(void *h, bool now)
return status;
}
status = bladerf_sync_config(handler->dev,
BLADERF_MODULE_TX,
BLADERF_TX_X1,
BLADERF_FORMAT_SC16_Q11_META,
num_buffers,
buffer_size_tx,
@ -186,6 +183,8 @@ int rf_blade_open_multi(char *args, void **h, uint32_t nof_channels)
int rf_blade_open(char *args, void **h)
{
const struct bladerf_range *range_tx = NULL;
const struct bladerf_range *range_rx = NULL;
*h = NULL;
rf_blade_handler_t *handler = (rf_blade_handler_t*) malloc(sizeof(rf_blade_handler_t));
@ -202,32 +201,44 @@ int rf_blade_open(char *args, void **h)
return status;
}
status = bladerf_set_gain_mode(handler->dev, BLADERF_MODULE_RX, BLADERF_GAIN_MGC);
if (status) {
fprintf(stderr, "Unable to open device: %s\n", bladerf_strerror(status));
return status;
}
//bladerf_log_set_verbosity(BLADERF_LOG_LEVEL_VERBOSE);
/* Configure the gains of the RX LNA and RX VGA1*/
status = bladerf_set_lna_gain(handler->dev, BLADERF_LNA_GAIN_MAX);
if (status != 0) {
fprintf(stderr, "Failed to set RX LNA gain: %s\n", bladerf_strerror(status));
/* Get Gain ranges and set Rx to maximum */
status = bladerf_get_gain_range(handler->dev, BLADERF_MODULE_RX, &range_rx);
if ((status != 0) | (range_rx == NULL)) {
fprintf(stderr, "Failed to get RX gain range: %s\n", bladerf_strerror(status));
return status;
}
status = bladerf_set_rxvga1(handler->dev, 27);
if (status != 0) {
fprintf(stderr, "Failed to set RX VGA1 gain: %s\n", bladerf_strerror(status));
bladerf_get_gain_range(handler->dev, BLADERF_MODULE_RX, &range_tx);
if ((status != 0) | (range_tx == NULL)) {
fprintf(stderr, "Failed to get TX gain range: %s\n", bladerf_strerror(status));
return status;
}
status = bladerf_set_txvga1(handler->dev, BLADERF_TXVGA1_GAIN_MAX);
status = bladerf_set_gain(handler->dev, BLADERF_MODULE_RX, (bladerf_gain) range_rx->max);
if (status != 0) {
fprintf(stderr, "Failed to set TX VGA1 gain: %s\n", bladerf_strerror(status));
fprintf(stderr, "Failed to set RX LNA gain: %s\n", bladerf_strerror(status));
return status;
}
handler->rx_stream_enabled = false;
handler->tx_stream_enabled = false;
/* Set default sampling rates */
rf_blade_set_tx_srate(handler, 1.92e6);
rf_blade_set_rx_srate(handler, 1.92e6);
/* Set info structure */
handler->info.min_tx_gain = BLADERF_TXVGA2_GAIN_MIN;
handler->info.max_tx_gain = BLADERF_TXVGA2_GAIN_MAX;
handler->info.min_rx_gain = BLADERF_RXVGA2_GAIN_MIN;
handler->info.max_rx_gain = BLADERF_RXVGA2_GAIN_MAX;
handler->info.min_tx_gain = range_tx->min;
handler->info.max_tx_gain = range_tx->max;
handler->info.min_rx_gain = range_rx->min;
handler->info.max_rx_gain = range_rx->max;
return 0;
}
@ -265,7 +276,7 @@ double rf_blade_set_rx_srate(void *h, double freq)
return -1;
}
} else {
status = bladerf_set_bandwidth(handler->dev, BLADERF_MODULE_RX, handler->rx_rate*0.8, &bw);
status = bladerf_set_bandwidth(handler->dev, BLADERF_MODULE_RX, (bladerf_bandwidth) (handler->rx_rate * 0.8), &bw);
if (status != 0) {
fprintf(stderr, "Failed to set bandwidth = %u: %s\n", handler->rx_rate, bladerf_strerror(status));
return -1;
@ -296,9 +307,9 @@ double rf_blade_set_rx_gain(void *h, double gain)
{
int status;
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
status = bladerf_set_rxvga2(handler->dev, (int) gain);
status = bladerf_set_gain(handler->dev, BLADERF_MODULE_RX, (bladerf_gain) gain);
if (status != 0) {
fprintf(stderr, "Failed to set RX VGA2 gain: %s\n", bladerf_strerror(status));
fprintf(stderr, "Failed to set RX gain: %s\n", bladerf_strerror(status));
return -1;
}
return rf_blade_get_rx_gain(h);
@ -308,9 +319,9 @@ double rf_blade_set_tx_gain(void *h, double gain)
{
int status;
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
status = bladerf_set_txvga2(handler->dev, (int) gain);
status = bladerf_set_gain(handler->dev, BLADERF_MODULE_TX, (bladerf_gain) gain);
if (status != 0) {
fprintf(stderr, "Failed to set TX VGA2 gain: %s\n", bladerf_strerror(status));
fprintf(stderr, "Failed to set TX gain: %s\n", bladerf_strerror(status));
return -1;
}
return rf_blade_get_tx_gain(h);
@ -319,29 +330,29 @@ double rf_blade_set_tx_gain(void *h, double gain)
double rf_blade_get_rx_gain(void *h)
{
int status;
int gain;
bladerf_gain gain = 0;
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
status = bladerf_get_rxvga2(handler->dev, &gain);
status = bladerf_get_gain(handler->dev, BLADERF_MODULE_RX, &gain);
if (status != 0) {
fprintf(stderr, "Failed to get RX VGA2 gain: %s\n",
fprintf(stderr, "Failed to get RX gain: %s\n",
bladerf_strerror(status));
return -1;
}
return gain; // Add rxvga1 and LNA
return gain;
}
double rf_blade_get_tx_gain(void *h)
{
int status;
int gain;
bladerf_gain gain = 0;
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
status = bladerf_get_txvga2(handler->dev, &gain);
status = bladerf_get_gain(handler->dev, BLADERF_MODULE_TX, &gain);
if (status != 0) {
fprintf(stderr, "Failed to get TX VGA2 gain: %s\n",
fprintf(stderr, "Failed to get TX gain: %s\n",
bladerf_strerror(status));
return -1;
}
return gain; // Add txvga1
return gain;
}
srslte_rf_info_t *rf_blade_get_info(void *h)
@ -360,7 +371,7 @@ srslte_rf_info_t *rf_blade_get_info(void *h)
double rf_blade_set_rx_freq(void *h, double freq)
{
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
uint32_t f_int = (uint32_t) round(freq);
bladerf_frequency f_int = (uint32_t) round(freq);
int status = bladerf_set_frequency(handler->dev, BLADERF_MODULE_RX, f_int);
if (status != 0) {
fprintf(stderr, "Failed to set samplerate = %u: %s\n",
@ -369,7 +380,7 @@ double rf_blade_set_rx_freq(void *h, double freq)
}
f_int=0;
bladerf_get_frequency(handler->dev, BLADERF_MODULE_RX, &f_int);
printf("set RX frequency to %u\n", f_int);
printf("set RX frequency to %lu\n", f_int);
return freq;
}
@ -377,7 +388,7 @@ double rf_blade_set_rx_freq(void *h, double freq)
double rf_blade_set_tx_freq(void *h, double freq)
{
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
uint32_t f_int = (uint32_t) round(freq);
bladerf_frequency f_int = (uint32_t) round(freq);
int status = bladerf_set_frequency(handler->dev, BLADERF_MODULE_TX, f_int);
if (status != 0) {
fprintf(stderr, "Failed to set samplerate = %u: %s\n",
@ -387,22 +398,22 @@ double rf_blade_set_tx_freq(void *h, double freq)
f_int=0;
bladerf_get_frequency(handler->dev, BLADERF_MODULE_TX, &f_int);
printf("set TX frequency to %u\n", f_int);
printf("set TX frequency to %lu\n", f_int);
return freq;
}
void rf_blade_set_tx_cal(void *h, srslte_rf_cal_t *cal) {
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_FPGA_PHASE, cal->dc_gain);
bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_FPGA_GAIN, cal->dc_phase);
bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_FPGA_PHASE, cal->dc_phase);
bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_FPGA_GAIN, cal->dc_gain);
bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_LMS_DCOFF_I, cal->iq_i);
bladerf_set_correction(handler->dev, BLADERF_MODULE_TX, BLADERF_CORR_LMS_DCOFF_Q, cal->iq_q);
}
void rf_blade_set_rx_cal(void *h, srslte_rf_cal_t *cal) {
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_FPGA_PHASE, cal->dc_gain);
bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_FPGA_GAIN, cal->dc_phase);
bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_FPGA_PHASE, cal->dc_phase);
bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_FPGA_GAIN, cal->dc_gain);
bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_LMS_DCOFF_I, cal->iq_i);
bladerf_set_correction(handler->dev, BLADERF_MODULE_RX, BLADERF_CORR_LMS_DCOFF_Q, cal->iq_q);
}
@ -431,7 +442,7 @@ void rf_blade_get_time(void *h, time_t *secs, double *frac_secs)
rf_blade_handler_t *handler = (rf_blade_handler_t*) h;
struct bladerf_metadata meta;
int status = bladerf_get_timestamp(handler->dev, BLADERF_MODULE_RX, &meta.timestamp);
int status = bladerf_get_timestamp(handler->dev, BLADERF_RX, &meta.timestamp);
if (status != 0) {
fprintf(stderr, "Failed to get current RX timestamp: %s\n",
bladerf_strerror(status));
@ -479,8 +490,8 @@ int rf_blade_recv_with_time(void *h,
error.type = SRSLTE_RF_ERROR_OVERFLOW;
blade_error_handler(error);
} else {
fprintf(stderr, "Overrun detected in scheduled RX. "
"%u valid samples were read.\n\n", meta.actual_count);
/*fprintf(stderr, "Overrun detected in scheduled RX. "
"%u valid samples were read.\n\n", meta.actual_count);*/
}
}

@ -228,7 +228,7 @@ int rf_soapy_start_rx_stream(void *h, bool now)
{
rf_soapy_handler_t *handler = (rf_soapy_handler_t*) h;
if(handler->rx_stream_active == false){
if(SoapySDRDevice_activateStream(handler->device, handler->rxStream, 0, 0, 0) != 0)
if(SoapySDRDevice_activateStream(handler->device, handler->rxStream, SOAPY_SDR_HAS_TIME | SOAPY_SDR_END_BURST, 0, 0) != 0)
return SRSLTE_ERROR;
handler->rx_stream_active = true;
}
@ -339,9 +339,19 @@ int rf_soapy_open_multi(char *args, void **h, uint32_t nof_rx_antennas)
rf_soapy_set_rx_srate(handler, 1.92e6);
rf_soapy_set_tx_srate(handler, 1.92e6);
if(SoapySDRDevice_getNumChannels(handler->device, SOAPY_SDR_RX) > 0){
size_t channels = SoapySDRDevice_getNumChannels(handler->device, SOAPY_SDR_RX);
if((channels > 0) && (nof_rx_antennas > 0)){
printf("Setting up RX stream\n");
if(SoapySDRDevice_setupStream(handler->device, &(handler->rxStream), SOAPY_SDR_RX, SOAPY_SDR_CF32, NULL, 0, NULL) != 0) {
size_t numChannels = channels;
if (channels > nof_rx_antennas) {
numChannels = nof_rx_antennas;
}
size_t rx_channels[numChannels];
for(int i = 0 ; i < numChannels ; i++) {
rx_channels[i] = i;
}
if(SoapySDRDevice_setupStream(handler->device, &(handler->rxStream), SOAPY_SDR_RX, SOAPY_SDR_CF32, rx_channels, numChannels, NULL) != 0) {
printf("Rx setupStream fail: %s\n", SoapySDRDevice_lastError());
return SRSLTE_ERROR;
}
@ -439,10 +449,12 @@ int rf_soapy_open_multi(char *args, void **h, uint32_t nof_rx_antennas)
#if HAVE_ASYNC_THREAD
bool start_async_thread = true;
if (args) {
if (strstr(args, "silent")) {
REMOVE_SUBSTRING_WITHCOMAS(args, "silent");
start_async_thread = false;
}
}
#endif
// receive one subframe to allow for transceiver calibration
@ -839,7 +851,7 @@ int rf_soapy_send_timed_multi(void *h,
// Convert initial tx time
if (has_time_spec) {
timeNs = secs * 1000000000;
timeNs = (long long)secs * 1000000000;
timeNs = timeNs + (frac_secs * 1000000000);
}

@ -788,17 +788,17 @@ int rf_uhd_recv_with_time_multi(void *h,
rf_uhd_handler_t *handler = (rf_uhd_handler_t*) h;
uhd_rx_metadata_handle *md = &handler->rx_md_first;
size_t rxd_samples = 0;
size_t rxd_samples_total = 0;
int trials = 0;
if (blocking) {
int n = 0;
while (n < nsamples && trials < 100) {
while (rxd_samples_total < nsamples && trials < 100) {
void *buffs_ptr[4];
for (int i=0;i<handler->nof_rx_channels;i++) {
cf_t *data_c = (cf_t*) data[i];
buffs_ptr[i] = &data_c[n];
buffs_ptr[i] = &data_c[rxd_samples_total];
}
size_t num_samps_left = nsamples - n;
size_t num_samps_left = nsamples - rxd_samples_total;
size_t num_rx_samples = (num_samps_left > handler->rx_nof_samples) ? handler->rx_nof_samples : num_samps_left;
rxd_samples = 0;
@ -814,7 +814,7 @@ int rf_uhd_recv_with_time_multi(void *h,
uhd_rx_metadata_error_code(*md, &error_code);
md = &handler->rx_md;
n += rxd_samples;
rxd_samples_total += rxd_samples;
trials++;
if (error_code == UHD_RX_METADATA_ERROR_CODE_OVERFLOW) {
@ -830,6 +830,7 @@ int rf_uhd_recv_with_time_multi(void *h,
}
} else {
uhd_error error = uhd_rx_streamer_recv(handler->rx_stream, data, nsamples, md, 0.0, false, &rxd_samples);
rxd_samples_total = rxd_samples;
if (error) {
fprintf(stderr, "Error receiving from UHD: %d\n", error);
log_rx_error(handler);
@ -839,7 +840,7 @@ int rf_uhd_recv_with_time_multi(void *h,
if (secs && frac_secs) {
uhd_rx_metadata_time_spec(handler->rx_md_first, secs, frac_secs);
}
return nsamples;
return rxd_samples_total;
}
int rf_uhd_send_timed(void *h,

@ -47,7 +47,12 @@ void srslte_scrambling_s(srslte_sequence_t *s, short *data) {
void srslte_scrambling_s_offset(srslte_sequence_t *s, short *data, int offset, int len) {
assert (len + offset <= s->cur_len);
srslte_vec_prod_sss(data, &s->c_short[offset], data, len);
srslte_vec_neg_sss(data, &s->c_short[offset], data, len);
}
void srslte_scrambling_sb_offset(srslte_sequence_t *s, int8_t *data, int offset, int len) {
assert (len + offset <= s->cur_len);
srslte_vec_neg_bbb(data, &s->c_char[offset], data, len);
}
void srslte_scrambling_c(srslte_sequence_t *s, cf_t *data) {

@ -177,6 +177,83 @@ int main(int argc, char **argv) {
free(input_f);
free(scrambled_f);
int16_t *input_s, *scrambled_s;
// Scramble also shorts
input_s= malloc(sizeof(int16_t) * seq.cur_len);
if (!input_s) {
perror("malloc");
exit(-1);
}
scrambled_s = malloc(sizeof(int16_t) * seq.cur_len);
if (!scrambled_s) {
perror("malloc");
exit(-1);
}
for (i=0;i<seq.cur_len;i++) {
input_s[i] = 100*(rand()/RAND_MAX)-50;
scrambled_s[i] = input_s[i];
}
gettimeofday(&t[1], NULL);
srslte_scrambling_s(&seq, scrambled_s);
gettimeofday(&t[2], NULL);
srslte_scrambling_s(&seq, scrambled_s);
get_time_interval(t);
printf("Texec short=%ld us for %d bits\n", t[0].tv_usec, seq.cur_len);
for (i=0;i<seq.cur_len;i++) {
if (scrambled_s[i] != input_s[i]) {
printf("Error in %d\n", i);
exit(-1);
}
}
free(input_s);
free(scrambled_s);
int8_t *input_b, *scrambled_b;
// Scramble also bytes
input_b= malloc(sizeof(int8_t) * seq.cur_len);
if (!input_b) {
perror("malloc");
exit(-1);
}
scrambled_b = malloc(sizeof(int8_t) * seq.cur_len);
if (!scrambled_b) {
perror("malloc");
exit(-1);
}
for (i=0;i<seq.cur_len;i++) {
input_b[i] = 100*(rand()/RAND_MAX)-50;
scrambled_b[i] = input_b[i];
}
gettimeofday(&t[1], NULL);
srslte_scrambling_sb_offset(&seq, scrambled_b, 0, seq.cur_len);
gettimeofday(&t[2], NULL);
srslte_scrambling_sb_offset(&seq, scrambled_b, 0, seq.cur_len);
get_time_interval(t);
printf("Texec char=%ld us for %d bits\n", t[0].tv_usec, seq.cur_len);
for (i=0;i<seq.cur_len;i++) {
if (scrambled_b[i] != input_b[i]) {
printf("Error in %d\n", i);
exit(-1);
}
}
free(input_b);
free(scrambled_b);
}
printf("Ok\n");
srslte_sequence_free(&seq);

@ -78,7 +78,7 @@ int srslte_ue_ul_init(srslte_ue_ul_t *q,
fprintf(stderr, "Error creating PUSCH object\n");
goto clean_exit;
}
if (srslte_pucch_init(&q->pucch)) {
if (srslte_pucch_init_ue(&q->pucch)) {
fprintf(stderr, "Error creating PUSCH object\n");
goto clean_exit;
}

@ -60,6 +60,8 @@ bool verbose = false;
#define TEST(X, CODE) static bool test_##X (char *func_name, double *timing, uint32_t block_size) {\
struct timeval start, end;\
bzero(&start, sizeof(start));\
bzero(&end, sizeof(end));\
float mse = 0.0f;\
bool passed;\
strncpy(func_name, #X, 32);\
@ -449,7 +451,10 @@ TEST(srslte_vec_convert_fi,
for (int i = 0; i < block_size; i++) {
gold = (short) ((x[i] * scale));
mse += cabsf((float)gold - (float) z[i]);
double err = cabsf((float)gold - (float) z[i]);
if (err > mse) {
mse = err;
}
}
free(x);
@ -471,7 +476,10 @@ TEST(srslte_vec_convert_if,
for (int i = 0; i < block_size; i++) {
gold = ((float)x[i]) * k;
mse += fabsf(gold - z[i]);
double err = cabsf((float)gold - (float) z[i]);
if (err > mse) {
mse = err;
}
}
free(x);
@ -775,7 +783,8 @@ TEST(srslte_vec_apply_cfo,
)
TEST(srslte_cfo_correct,
srslte_cfo_t srslte_cfo = {0};
srslte_cfo_t srslte_cfo;
bzero(&srslte_cfo, sizeof(srslte_cfo));
MALLOC(cf_t, x);
MALLOC(cf_t, z);
@ -801,7 +810,8 @@ TEST(srslte_cfo_correct,
)
TEST(srslte_cfo_correct_change,
srslte_cfo_t srslte_cfo = {0};
srslte_cfo_t srslte_cfo;
bzero(&srslte_cfo, sizeof(srslte_cfo));
MALLOC(cf_t, x);
MALLOC(cf_t, z);

@ -58,6 +58,10 @@ void srslte_vec_sub_sss(const int16_t *x, const int16_t *y, int16_t *z, const ui
srslte_vec_sub_sss_simd(x, y, z, len);
}
void srslte_vec_sub_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len) {
srslte_vec_sub_bbb_simd(x, y, z, len);
}
// Noise estimation in chest_dl, interpolation
void srslte_vec_sub_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len) {
return srslte_vec_sub_fff((const float*) x,(const float*) y,(float*) z, 2*len);
@ -100,10 +104,18 @@ void srslte_vec_convert_fi(const float *x, const float scale, int16_t *z, const
srslte_vec_convert_fi_simd(x, z, scale, len);
}
void srslte_vec_convert_fb(const float *x, const float scale, int8_t *z, const uint32_t len) {
srslte_vec_convert_fb_simd(x, z, scale, len);
}
void srslte_vec_lut_sss(const short *x, const unsigned short *lut, short *y, const uint32_t len) {
srslte_vec_lut_sss_simd(x, lut, y, len);
}
void srslte_vec_lut_bbb(const int8_t *x, const unsigned short *lut, int8_t *y, const uint32_t len) {
srslte_vec_lut_bbb_simd(x, lut, y, len);
}
void srslte_vec_lut_sis(const short *x, const unsigned int *lut, short *y, const uint32_t len) {
for (int i=0; i < len; i++) {
y[lut[i]] = x[i];
@ -163,6 +175,15 @@ void srslte_vec_fprint_b(FILE *stream, uint8_t *x, const uint32_t len) {
fprintf(stream, "];\n");
}
void srslte_vec_fprint_bs(FILE *stream, int8_t *x, const uint32_t len) {
int i;
fprintf(stream, "[");
for (i=0;i<len;i++) {
fprintf(stream, "%4d, ", x[i]);
}
fprintf(stream, "];\n");
}
void srslte_vec_fprint_byte(FILE *stream, uint8_t *x, const uint32_t len) {
int i;
fprintf(stream, "[");
@ -185,7 +206,7 @@ void srslte_vec_fprint_s(FILE *stream, short *x, const uint32_t len) {
int i;
fprintf(stream, "[");
for (i=0;i<len;i++) {
fprintf(stream, "%d, ", x[i]);
fprintf(stream, "%4d, ", x[i]);
}
fprintf(stream, "];\n");
}
@ -271,11 +292,18 @@ void srslte_vec_prod_fff(const float *x, const float *y, float *z, const uint32_
srslte_vec_prod_fff_simd(x, y, z, len);
}
// Scrambling Short
void srslte_vec_prod_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len) {
srslte_vec_prod_sss_simd(x,y,z,len);
}
// Scrambling
void srslte_vec_neg_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len) {
srslte_vec_neg_sss_simd(x,y,z,len);
}
void srslte_vec_neg_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len) {
srslte_vec_neg_bbb_simd(x,y,z,len);
}
// CFO and OFDM processing
void srslte_vec_prod_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len) {
srslte_vec_prod_ccc_simd(x,y,z,len);

@ -162,6 +162,35 @@ void srslte_vec_sub_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con
}
}
void srslte_vec_sub_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len) {
int i = 0;
#if SRSLTE_SIMD_B_SIZE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
simd_s_t a = srslte_simd_b_load(&x[i]);
simd_s_t b = srslte_simd_b_load(&y[i]);
simd_s_t r = srslte_simd_b_sub(a, b);
srslte_simd_b_store(&z[i], r);
}
} else {
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
simd_s_t a = srslte_simd_b_loadu(&x[i]);
simd_s_t b = srslte_simd_b_loadu(&y[i]);
simd_s_t r = srslte_simd_b_sub(a, b);
srslte_simd_b_storeu(&z[i], r);
}
}
#endif /* SRSLTE_SIMD_S_SIZE */
for(; i < len; i++){
z[i] = x[i] - y[i];
}
}
void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len) {
int i = 0;
#if SRSLTE_SIMD_S_SIZE
@ -191,12 +220,69 @@ void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, co
}
}
void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len) {
int i = 0;
#if SRSLTE_SIMD_S_SIZE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
simd_s_t a = srslte_simd_s_load(&x[i]);
simd_s_t b = srslte_simd_s_load(&y[i]);
simd_s_t r = srslte_simd_s_neg(a, b);
srslte_simd_s_store(&z[i], r);
}
} else {
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
simd_s_t a = srslte_simd_s_loadu(&x[i]);
simd_s_t b = srslte_simd_s_loadu(&y[i]);
simd_s_t r = srslte_simd_s_neg(a, b);
srslte_simd_s_storeu(&z[i], r);
}
}
#endif /* SRSLTE_SIMD_S_SIZE */
for(; i < len; i++){
z[i] = y[i]<0?-x[i]:x[i];
}
}
void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len) {
int i = 0;
#if SRSLTE_SIMD_B_SIZE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
simd_s_t a = srslte_simd_b_load(&x[i]);
simd_s_t b = srslte_simd_b_load(&y[i]);
simd_s_t r = srslte_simd_b_neg(a, b);
srslte_simd_b_store(&z[i], r);
}
} else {
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
simd_s_t a = srslte_simd_b_loadu(&x[i]);
simd_s_t b = srslte_simd_b_loadu(&y[i]);
simd_s_t r = srslte_simd_b_neg(a, b);
srslte_simd_b_storeu(&z[i], r);
}
}
#endif /* SRSLTE_SIMD_S_SIZE */
for(; i < len; i++){
z[i] = y[i]<0?-x[i]:x[i];
}
}
/* No improvement with AVX */
void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y, const int len) {
int i = 0;
#ifdef LV_HAVE_SSE
#if CMAKE_BUILD_TYPE!=Debug
#ifndef DEBUG_MODE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(lut)) {
for (; i < len - 7; i += 8) {
__m128i xVal = _mm_load_si128((__m128i *) &x[i]);
@ -228,6 +314,53 @@ void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y
}
}
void srslte_vec_lut_bbb_simd(const int8_t *x, const unsigned short *lut, int8_t *y, const int len) {
int i = 0;
#ifdef LV_HAVE_SSE
#ifndef DEBUG_MODE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(lut)) {
for (; i < len - 15; i += 16) {
__m128i xVal = _mm_load_si128((__m128i *) &x[i]);
__m128i lutVal1 = _mm_load_si128((__m128i *) &lut[i]);
__m128i lutVal2 = _mm_load_si128((__m128i *) &lut[i+8]);
for (int k = 0; k < 8; k++) {
int8_t x = (int8_t) _mm_extract_epi8(xVal, k);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal1, k);
y[l] = (char) x;
}
for (int k = 0; k < 8; k++) {
int8_t x = (int8_t) _mm_extract_epi8(xVal, k+8);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal2, k);
y[l] = (char) x;
}
}
} else {
for (; i < len - 15; i += 16) {
__m128i xVal = _mm_loadu_si128((__m128i *) &x[i]);
__m128i lutVal1 = _mm_loadu_si128((__m128i *) &lut[i]);
__m128i lutVal2 = _mm_loadu_si128((__m128i *) &lut[i+8]);
for (int k = 0; k < 8; k++) {
int8_t x = (int8_t) _mm_extract_epi8(xVal, k);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal1, k);
y[l] = (char) x;
}
for (int k = 0; k < 8; k++) {
int8_t x = (int8_t) _mm_extract_epi8(xVal, k+8);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal2, k);
y[l] = (char) x;
}
}
}
#endif
#endif
for (; i < len; i++) {
y[lut[i]] = x[i];
}
}
void srslte_vec_convert_if_simd(const int16_t *x, float *z, const float scale, const int len) {
int i = 0;
const float gain = 1.0f / scale;
@ -295,6 +428,73 @@ void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const float scale, c
}
}
#define SRSLTE_IS_ALIGNED_SSE(PTR) (((size_t)(PTR) & 0x0F) == 0)
void srslte_vec_convert_fb_simd(const float *x, int8_t *z, const float scale, const int len) {
int i = 0;
// Force the use of SSE here instead of AVX since the implementations requires too many permutes across 128-bit boundaries
#ifdef LV_HAVE_SSE
__m128 s = _mm_set1_ps(scale);
if (SRSLTE_IS_ALIGNED_SSE(x) && SRSLTE_IS_ALIGNED_SSE(z)) {
for (; i < len - 16 + 1; i += 16) {
__m128 a = _mm_load_ps(&x[i]);
__m128 b = _mm_load_ps(&x[i + 1*4]);
__m128 c = _mm_load_ps(&x[i + 2*4]);
__m128 d = _mm_load_ps(&x[i + 3*4]);
__m128 sa = _mm_mul_ps(a, s);
__m128 sb = _mm_mul_ps(b, s);
__m128 sc = _mm_mul_ps(c, s);
__m128 sd = _mm_mul_ps(d, s);
__m128i ai = _mm_cvttps_epi32(sa);
__m128i bi = _mm_cvttps_epi32(sb);
__m128i ci = _mm_cvttps_epi32(sc);
__m128i di = _mm_cvttps_epi32(sd);
__m128i ab = _mm_packs_epi32(ai, bi);
__m128i cd = _mm_packs_epi32(ci, di);
__m128i i8 =_mm_packs_epi16(ab, cd);
_mm_store_si128((__m128i*)&z[i], i8);
}
} else {
for (; i < len - 16 + 1; i += 16) {
__m128 a = _mm_load_ps(&x[i]);
__m128 b = _mm_load_ps(&x[i + 1*4]);
__m128 c = _mm_load_ps(&x[i + 2*4]);
__m128 d = _mm_load_ps(&x[i + 3*4]);
__m128 sa = _mm_mul_ps(a, s);
__m128 sb = _mm_mul_ps(b, s);
__m128 sc = _mm_mul_ps(c, s);
__m128 sd = _mm_mul_ps(d, s);
__m128i ai = _mm_cvttps_epi32(sa);
__m128i bi = _mm_cvttps_epi32(sb);
__m128i ci = _mm_cvttps_epi32(sc);
__m128i di = _mm_cvttps_epi32(sd);
__m128i ab = _mm_packs_epi32(ai, bi);
__m128i cd = _mm_packs_epi32(ci, di);
__m128i i8 =_mm_packs_epi16(ab, cd);
_mm_storeu_si128((__m128i*)&z[i], i8);
}
}
#endif
#ifdef HAVE_NEON
#warning srslte_vec_convert_fb_simd not implemented in neon
#endif /* HAVE_NEON */
for(; i < len; i++){
z[i] = (int8_t) (x[i] * scale);
}
}
float srslte_vec_acc_ff_simd(const float *x, const int len) {
int i = 0;
float acc_sum = 0.0f;

@ -358,8 +358,8 @@ void radio::set_tx_srate(double srate)
cur_tx_srate = srslte_rf_set_tx_srate(&rf_device, srate);
burst_preamble_samples = (uint32_t) (cur_tx_srate * burst_preamble_sec);
if (burst_preamble_samples > burst_preamble_max_samples) {
fprintf(stderr, "Error setting TX srate %.1f MHz. Maximum burst preamble samples: %d, requested: %d\n", srate*1e-6, burst_preamble_max_samples, burst_preamble_samples );
burst_preamble_samples = burst_preamble_max_samples;
fprintf(stderr, "Error setting TX srate %.1f MHz. Maximum frequency for zero prepadding is 30.72 MHz\n", srate*1e-6);
}
burst_preamble_time_rounded = (double) burst_preamble_samples/cur_tx_srate;

@ -129,7 +129,9 @@ int main(int argc, char **argv)
{
int ret = SRSLTE_ERROR;
srslte::radio_multi *radio_h = NULL;
srslte_timestamp_t ts_rx = {}, ts_tx = {};
srslte_timestamp_t ts_rx, ts_tx;
bzero(&ts_rx, sizeof(ts_rx));
bzero(&ts_tx, sizeof(ts_tx));
signal(SIGINT, sig_int_handler);

@ -22,3 +22,8 @@ file(GLOB SOURCES "*.cc")
add_library(srslte_upper STATIC ${SOURCES})
target_link_libraries(srslte_upper srslte_common srslte_asn1)
install(TARGETS srslte_upper DESTINATION ${LIBRARY_DIR})
# Run clang-tidy if available
if(CLANG_TIDY_BIN)
set_target_properties(srslte_upper PROPERTIES CXX_CLANG_TIDY "${DO_CLANG_TIDY}")
endif()

@ -187,6 +187,28 @@ void pdcp::del_bearer(uint32_t lcid)
pthread_rwlock_unlock(&rwlock);
}
void pdcp::change_lcid(uint32_t old_lcid, uint32_t new_lcid)
{
pthread_rwlock_wrlock(&rwlock);
// make sure old LCID exists and new LCID is still free
if (valid_lcid(old_lcid) && not valid_lcid(new_lcid)) {
// insert old PDCP entity into new LCID
pdcp_map_t::iterator it = pdcp_array.find(old_lcid);
pdcp_entity_interface *pdcp_entity = it->second;
if (not pdcp_array.insert(pdcp_map_pair_t(new_lcid, pdcp_entity)).second) {
pdcp_log->error("Error inserting PDCP entity into array\n.");
goto exit;
}
// erase from old position
pdcp_array.erase(it);
pdcp_log->warning("Changed LCID of PDCP bearer from %d to %d\n", old_lcid, new_lcid);
} else {
pdcp_log->error("Can't change PDCP of bearer %s from %d to %d. Bearer doesn't exist or new LCID already occupied.\n", rrc->get_rb_name(old_lcid).c_str(), old_lcid, new_lcid);
}
exit:
pthread_rwlock_unlock(&rwlock);
}
void pdcp::config_security(uint32_t lcid,
uint8_t *k_enc,

@ -218,7 +218,7 @@ void pdcp_entity::write_pdu(byte_buffer_t *pdu)
rx_count,
pdu->N_bytes - 4,
&(pdu->msg[pdu->N_bytes - 4]))) {
log->error_hex(pdu->msg, pdu->N_bytes, "RX %s PDU SN: %d", rrc->get_rb_name(lcid).c_str(), sn);
log->error_hex(pdu->msg, pdu->N_bytes, "RX %s PDU SN: %d (Dropping PDU)", rrc->get_rb_name(lcid).c_str(), sn);
goto exit;
}
}

@ -42,6 +42,7 @@ rlc::rlc()
mac_timers = NULL;
ue = NULL;
default_lcid = 0;
buffer_size = 0;
bzero(metrics_time, sizeof(metrics_time));
pthread_rwlock_init(&rwlock, NULL);
}
@ -118,24 +119,21 @@ void rlc::get_metrics(rlc_metrics_t &m)
get_time_interval(metrics_time);
double secs = (double)metrics_time[0].tv_sec + metrics_time[0].tv_usec*1e-6;
m.dl_tput_mbps = 0;
m.ul_tput_mbps = 0;
for (rlc_map_t::iterator it = rlc_array.begin(); it != rlc_array.end(); ++it) {
m.dl_tput_mbps += (it->second->get_num_rx_bytes()*8/(double)1e6)/secs;
m.ul_tput_mbps += (it->second->get_num_tx_bytes()*8/(double)1e6)/secs;
m.dl_tput_mbps[it->first] = (it->second->get_num_rx_bytes()*8/static_cast<double>(1e6))/secs;
m.ul_tput_mbps[it->first] = (it->second->get_num_tx_bytes()*8/static_cast<double>(1e6))/secs;
rlc_log->info("LCID=%d, RX throughput: %4.6f Mbps. TX throughput: %4.6f Mbps.\n",
it->first,
(it->second->get_num_rx_bytes()*8/(double)1e6)/secs,
(it->second->get_num_tx_bytes()*8/(double)1e6)/secs);
(it->second->get_num_rx_bytes()*8/static_cast<double>(1e6))/secs,
(it->second->get_num_tx_bytes()*8/static_cast<double>(1e6))/secs);
}
// Add multicast metrics
for (rlc_map_t::iterator it = rlc_array_mrb.begin(); it != rlc_array_mrb.end(); ++it) {
m.dl_tput_mbps += (it->second->get_num_rx_bytes()*8/(double)1e6)/secs;
m.dl_tput_mbps[it->first] = (it->second->get_num_rx_bytes()*8/static_cast<double>(1e6))/secs;
rlc_log->info("MCH_LCID=%d, RX throughput: %4.6f Mbps\n",
it->first,
(it->second->get_num_rx_bytes()*8/(double)1e6)/secs);
(it->second->get_num_rx_bytes()*8/static_cast<double>(1e6))/secs);
}
memcpy(&metrics_time[1], &metrics_time[2], sizeof(struct timeval));
@ -144,7 +142,7 @@ void rlc::get_metrics(rlc_metrics_t &m)
pthread_rwlock_unlock(&rwlock);
}
// A call to reestablish stops all lcids but does not delete the instances. The mapping lcid to rlc mode can not change
// Reestablish all RLC bearer
void rlc::reestablish()
{
pthread_rwlock_rdlock(&rwlock);
@ -160,6 +158,18 @@ void rlc::reestablish()
pthread_rwlock_unlock(&rwlock);
}
// Reestablish a specific RLC bearer
void rlc::reestablish(uint32_t lcid)
{
pthread_rwlock_rdlock(&rwlock);
if (valid_lcid(lcid)) {
rlc_array.at(lcid)->reestablish();
} else {
rlc_log->warning("RLC LCID %d doesn't exist. Deallocating SDU\n", lcid);
}
pthread_rwlock_unlock(&rwlock);
}
// Resetting the RLC layer returns the object to the state after the call to init():
// All LCIDs are removed, except SRB0
void rlc::reset()
@ -270,7 +280,7 @@ uint32_t rlc::get_total_mch_buffer_state(uint32_t lcid)
uint32_t ret = 0;
pthread_rwlock_rdlock(&rwlock);
if (valid_lcid(lcid)) {
if (valid_lcid_mrb(lcid)) {
ret = rlc_array_mrb.at(lcid)->get_total_buffer_state();
}
pthread_rwlock_unlock(&rwlock);
@ -296,7 +306,7 @@ int rlc::read_pdu_mch(uint32_t lcid, uint8_t *payload, uint32_t nof_bytes)
uint32_t ret = 0;
pthread_rwlock_rdlock(&rwlock);
if (valid_lcid(lcid)) {
if (valid_lcid_mrb(lcid)) {
ret = rlc_array_mrb.at(lcid)->read_pdu(payload, nof_bytes);
}
pthread_rwlock_unlock(&rwlock);
@ -318,7 +328,7 @@ void rlc::write_pdu_bcch_bch(uint8_t *payload, uint32_t nof_bytes)
{
rlc_log->info_hex(payload, nof_bytes, "BCCH BCH message received.");
byte_buffer_t *buf = pool_allocate;
if (buf) {
if (buf != NULL) {
memcpy(buf->msg, payload, nof_bytes);
buf->N_bytes = nof_bytes;
buf->set_timestamp();
@ -333,7 +343,7 @@ void rlc::write_pdu_bcch_dlsch(uint8_t *payload, uint32_t nof_bytes)
{
rlc_log->info_hex(payload, nof_bytes, "BCCH TXSCH message received.");
byte_buffer_t *buf = pool_allocate;
if (buf) {
if (buf != NULL) {
memcpy(buf->msg, payload, nof_bytes);
buf->N_bytes = nof_bytes;
buf->set_timestamp();
@ -348,7 +358,7 @@ void rlc::write_pdu_pcch(uint8_t *payload, uint32_t nof_bytes)
{
rlc_log->info_hex(payload, nof_bytes, "PCCH message received.");
byte_buffer_t *buf = pool_allocate;
if (buf) {
if (buf != NULL) {
memcpy(buf->msg, payload, nof_bytes);
buf->N_bytes = nof_bytes;
buf->set_timestamp();
@ -385,7 +395,7 @@ void rlc::add_bearer(uint32_t lcid)
add_bearer(lcid, srslte_rlc_config_t());
} else {
// SRB1 and SRB2 are AM
LIBLTE_RRC_RLC_CONFIG_STRUCT cnfg;
LIBLTE_RRC_RLC_CONFIG_STRUCT cnfg = {};
cnfg.rlc_mode = LIBLTE_RRC_RLC_MODE_AM;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS45;
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_INFINITY;
@ -440,7 +450,7 @@ void rlc::add_bearer(uint32_t lcid, srslte_rlc_config_t cnfg)
rlc_log->error("Error instantiating RLC\n");
goto delete_and_exit;
}
rlc_log->warning("Added radio bearer %s with mode %s\n", rrc->get_rb_name(lcid).c_str(), liblte_rrc_rlc_mode_text[cnfg.rlc_mode]);
rlc_log->warning("Added radio bearer %s in %s\n", rrc->get_rb_name(lcid).c_str(), rlc_mode_text[cnfg.rlc_mode]);
goto unlock_and_exit;
} else {
rlc_log->warning("Bearer %s already created.\n", rrc->get_rb_name(lcid).c_str());
@ -463,10 +473,10 @@ void rlc::add_bearer_mrb(uint32_t lcid)
if (not valid_lcid_mrb(lcid)) {
rlc_entity = new rlc_um();
if (rlc_entity) {
if (rlc_entity != NULL) {
// configure and add to array
rlc_entity->init(rlc_log, lcid, pdcp, rrc, mac_timers);
if (rlc_entity->configure(srslte_rlc_config_t::mch_config()) == false) {
if (not rlc_entity->configure(srslte_rlc_config_t::mch_config())) {
rlc_log->error("Error configuring RLC entity\n.");
goto delete_and_exit;
}
@ -485,7 +495,7 @@ void rlc::add_bearer_mrb(uint32_t lcid)
}
delete_and_exit:
if (rlc_entity) {
if (rlc_entity != NULL) {
delete(rlc_entity);
}
@ -498,20 +508,62 @@ void rlc::del_bearer(uint32_t lcid)
{
pthread_rwlock_wrlock(&rwlock);
if (valid_lcid_mrb(lcid)) {
if (valid_lcid(lcid)) {
rlc_map_t::iterator it = rlc_array.find(lcid);
it->second->stop();
delete(it->second);
rlc_array.erase(it);
rlc_log->warning("Deleted RLC bearer %s\n", rrc->get_rb_name(lcid).c_str());
} else {
rlc_log->warning("Can't delete bearer %s. Bearer doesn't exist.\n", rrc->get_rb_name(lcid).c_str());
rlc_log->error("Can't delete bearer %s. Bearer doesn't exist.\n", rrc->get_rb_name(lcid).c_str());
}
pthread_rwlock_unlock(&rwlock);
}
void rlc::del_bearer_mrb(uint32_t lcid)
{
pthread_rwlock_wrlock(&rwlock);
if (valid_lcid_mrb(lcid)) {
rlc_map_t::iterator it = rlc_array_mrb.find(lcid);
it->second->stop();
delete(it->second);
rlc_array_mrb.erase(it);
rlc_log->warning("Deleted RLC MRB bearer %s\n", rrc->get_rb_name(lcid).c_str());
} else {
rlc_log->error("Can't delete bearer %s. Bearer doesn't exist.\n", rrc->get_rb_name(lcid).c_str());
}
pthread_rwlock_unlock(&rwlock);
}
void rlc::change_lcid(uint32_t old_lcid, uint32_t new_lcid)
{
pthread_rwlock_wrlock(&rwlock);
// make sure old LCID exists and new LCID is still free
if (valid_lcid(old_lcid) && not valid_lcid(new_lcid)) {
// insert old rlc entity into new LCID
rlc_map_t::iterator it = rlc_array.find(old_lcid);
rlc_common *rlc_entity = it->second;
if (not rlc_array.insert(rlc_map_pair_t(new_lcid, rlc_entity)).second) {
rlc_log->error("Error inserting RLC entity into array\n.");
goto exit;
}
// erase from old position
rlc_array.erase(it);
rlc_log->warning("Changed LCID of RLC bearer from %d to %d\n", old_lcid, new_lcid);
} else {
rlc_log->error("Can't change LCID of bearer %s from %d to %d. Bearer doesn't exist or new LCID already occupied.\n", rrc->get_rb_name(old_lcid).c_str(), old_lcid, new_lcid);
}
exit:
pthread_rwlock_unlock(&rwlock);
}
/*******************************************************************************
Helpers (Lock must be hold when calling those)
*******************************************************************************/
@ -544,4 +596,4 @@ bool rlc::valid_lcid_mrb(uint32_t lcid)
return true;
}
} // namespace srsue
} // namespace srslte

File diff suppressed because it is too large Load Diff

@ -28,8 +28,9 @@
#include "srslte/upper/rlc_um.h"
#include <sstream>
#include <srslte/upper/rlc_interface.h>
#include <srslte/upper/rlc_common.h>
#define RX_MOD_BASE(x) (x-vr_uh-cfg.rx_window_size)%cfg.rx_mod
#define RX_MOD_BASE(x) (((x)-vr_uh-cfg.rx_window_size)%cfg.rx_mod)
namespace srslte {
@ -75,13 +76,13 @@ bool rlc_um::configure(srslte_rlc_config_t cnfg_)
return false;
}
log->warning("%s configured in %s mode: t_reordering=%d ms, rx_sn_field_length=%u bits, tx_sn_field_length=%u bits\n",
rb_name.c_str(), rlc_mode_text[cnfg_.rlc_mode],
cfg.t_reordering, rlc_umd_sn_size_num[cfg.rx_sn_field_length], rlc_umd_sn_size_num[cfg.rx_sn_field_length]);
// store config
cfg = cnfg_.um;
log->warning("%s configured in %s mode: ft_reordering=%d ms, rx_sn_field_length=%u bits, tx_sn_field_length=%u bits\n",
rb_name.c_str(), rlc_mode_text[cnfg_.rlc_mode],
cfg.t_reordering, rlc_umd_sn_size_num[cfg.rx_sn_field_length], rlc_umd_sn_size_num[cfg.rx_sn_field_length]);
return true;
}
@ -95,6 +96,11 @@ bool rlc_um::rlc_um_rx::configure(srslte_rlc_config_t cnfg_, std::string rb_name
return false;
}
// set reordering timer
if (reordering_timer != NULL) {
reordering_timer->set(this, cfg.t_reordering);
}
rb_name = rb_name_;
rx_enabled = true;
@ -149,7 +155,6 @@ void rlc_um::write_sdu(byte_buffer_t *sdu, bool blocking)
} else {
tx.try_write_sdu(sdu);
}
}
/****************************************************************************
@ -465,12 +470,11 @@ int rlc_um::rlc_um_tx::build_data_pdu(uint8_t *payload, uint32_t nof_bytes)
vt_us = (vt_us + 1)%cfg.tx_mod;
// Add header and TX
log->debug("%s packing PDU with length %d\n", get_rb_name(), pdu->N_bytes);
rlc_um_write_data_pdu_header(&header, pdu);
memcpy(payload, pdu->msg, pdu->N_bytes);
uint32_t ret = pdu->N_bytes;
log->debug("%s returning length %d\n", get_rb_name(), pdu->N_bytes);
log->info("%s Transmitting PDU SN=%d (%d B)\n", get_rb_name(), header.sn, pdu->N_bytes);
pool->deallocate(pdu);
debug_state();
@ -546,9 +550,7 @@ void rlc_um::rlc_um_rx::reestablish()
void rlc_um::rlc_um_rx::stop()
{
pthread_mutex_lock(&mutex);
if(reordering_timer) {
reordering_timer->stop();
}
vr_ur = 0;
vr_ux = 0;
vr_uh = 0;
@ -560,7 +562,8 @@ void rlc_um::rlc_um_rx::stop()
rx_sdu = NULL;
}
if (mac_timers && reordering_timer) {
if (mac_timers != NULL && reordering_timer != NULL) {
reordering_timer->stop();
mac_timers->timer_release_id(reordering_timer_id);
reordering_timer = NULL;
}
@ -643,7 +646,7 @@ void rlc_um::rlc_um_rx::handle_data_pdu(uint8_t *payload, uint32_t nof_bytes)
}
if(!reordering_timer->is_running()) {
if(RX_MOD_BASE(vr_uh) > RX_MOD_BASE(vr_ur)) {
reordering_timer->set(this, cfg.t_reordering);
reordering_timer->reset();
reordering_timer->run();
vr_ux = vr_uh;
}
@ -715,7 +718,7 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus()
// Handle last segment
if (rx_sdu->N_bytes > 0 || rlc_um_start_aligned(rx_window[vr_ur].header.fi)) {
log->debug("Writing last segment in SDU buffer. Lower edge vr_ur=%d, Buffer size=%d, segment size=%d\n",
log->info("Writing last segment in SDU buffer. Lower edge vr_ur=%d, Buffer size=%d, segment size=%d\n",
vr_ur, rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes);
memcpy(&rx_sdu->msg[rx_sdu->N_bytes], rx_window[vr_ur].buf->msg, rx_window[vr_ur].buf->N_bytes);
@ -754,18 +757,36 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus()
// Now update vr_ur until we reach an SN we haven't yet received
while(rx_window.end() != rx_window.find(vr_ur)) {
log->debug("Reassemble loop for vr_ur=%d\n", vr_ur);
if ((vr_ur_in_rx_sdu+1)%cfg.rx_mod != vr_ur) {
log->warning("PDU SN=%d lost, dropping remainder of %d\n", vr_ur_in_rx_sdu+1, vr_ur);
rx_sdu->reset();
}
// Handle any SDU segments
for(uint32_t i=0; i<rx_window[vr_ur].header.N_li; i++) {
int len = rx_window[vr_ur].header.li[i];
// Check if the first part of the PDU is a middle or end segment
if (rx_sdu->N_bytes == 0 && i == 0 && !rlc_um_start_aligned(rx_window[vr_ur].header.fi)) {
log->warning("Dropping PDU %d due to lost start segment\n", vr_ur);
log->warning_hex(rx_window[vr_ur].buf->msg, len, "Dropping first part of SN %d due to lost start segment\n", vr_ur);
// Advance data pointers and continue with next segment
rx_window[vr_ur].buf->msg += len;
rx_window[vr_ur].buf->N_bytes -= len;
rx_sdu->reset();
break;
// beginning of next SDU?
if (rx_window[vr_ur].header.fi == RLC_FI_FIELD_NOT_START_OR_END_ALIGNED) {
len = rx_window[vr_ur].buf->N_bytes;
log->info_hex(rx_window[vr_ur].buf->msg, len, "Copying first %d bytes of new SDU\n", len);
memcpy(rx_sdu->msg, rx_window[vr_ur].buf->msg, len);
rx_sdu->N_bytes = len;
rx_window[vr_ur].buf->msg += len;
rx_window[vr_ur].buf->N_bytes -= len;
log->info("Updating vr_ur_in_rx_sdu. old=%d, new=%d\n", vr_ur_in_rx_sdu, vr_ur);
vr_ur_in_rx_sdu = vr_ur;
goto clean_up_rx_window;
}
}
// Check available space in SDU
@ -775,7 +796,7 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus()
goto clean_up_rx_window;
}
log->debug("Concatenating %d bytes in to current length %d. rx_window remaining bytes=%d, vr_ur_in_rx_sdu=%d, vr_ur=%d, rx_mod=%d, last_mod=%d\n",
log->info_hex(rx_window[vr_ur].buf->msg, len, "Concatenating %d bytes in to current length %d. rx_window remaining bytes=%d, vr_ur_in_rx_sdu=%d, vr_ur=%d, rx_mod=%d, last_mod=%d\n",
len, rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes, vr_ur_in_rx_sdu, vr_ur, cfg.rx_mod, (vr_ur_in_rx_sdu+1)%cfg.rx_mod);
memcpy(&rx_sdu->msg[rx_sdu->N_bytes], rx_window[vr_ur].buf->msg, len);
rx_sdu->N_bytes += len;
@ -812,11 +833,10 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus()
rx_window[vr_ur].buf->N_bytes < SRSLTE_MAX_BUFFER_SIZE_BYTES &&
rx_window[vr_ur].buf->N_bytes + rx_sdu->N_bytes < SRSLTE_MAX_BUFFER_SIZE_BYTES)
{
log->info_hex(rx_window[vr_ur].buf->msg, rx_window[vr_ur].buf->N_bytes, "Writing last segment in SDU buffer. Updating vr_ur=%d, Buffer size=%d, segment size=%d\n",
vr_ur, rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes);
memcpy(&rx_sdu->msg[rx_sdu->N_bytes], rx_window[vr_ur].buf->msg, rx_window[vr_ur].buf->N_bytes);
rx_sdu->N_bytes += rx_window[vr_ur].buf->N_bytes;
log->debug("Writing last segment in SDU buffer. Updating vr_ur=%d, Buffer size=%d, segment size=%d\n",
vr_ur, rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes);
} else {
log->error("Out of bounds while reassembling SDU buffer in UM: sdu_len=%d, window_buffer_len=%d, vr_ur=%d\n",
rx_sdu->N_bytes, rx_window[vr_ur].buf->N_bytes, vr_ur);
@ -844,7 +864,6 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus()
}
clean_up_rx_window:
// Clean up rx_window
pool->deallocate(rx_window[vr_ur].buf);
rx_window.erase(vr_ur);
@ -854,14 +873,13 @@ void rlc_um::rlc_um_rx::reassemble_rx_sdus()
}
// Only called when lock is hold
// 36.322 Section 5.1.2.2.1
bool rlc_um::rlc_um_rx::inside_reordering_window(uint16_t sn)
{
if(cfg.rx_window_size == 0) {
if (cfg.rx_window_size == 0 || rx_window.empty()) {
return true;
}
if(RX_MOD_BASE(sn) >= RX_MOD_BASE(vr_uh-cfg.rx_window_size) &&
RX_MOD_BASE(sn) < RX_MOD_BASE(vr_uh))
{
if (RX_MOD_BASE(vr_uh-cfg.rx_window_size) <= RX_MOD_BASE(sn) && RX_MOD_BASE(sn) < RX_MOD_BASE(vr_uh)) {
return true;
} else {
return false;
@ -889,8 +907,7 @@ void rlc_um::rlc_um_rx::reset_metrics()
void rlc_um::rlc_um_rx::timer_expired(uint32_t timeout_id)
{
if(reordering_timer_id == timeout_id)
{
if (reordering_timer_id == timeout_id) {
pthread_mutex_lock(&mutex);
// 36.322 v10 Section 5.1.2.2.4
@ -898,19 +915,19 @@ void rlc_um::rlc_um_rx::timer_expired(uint32_t timeout_id)
get_rb_name());
log->warning("Lost PDU SN: %d\n", vr_ur);
pdu_lost = true;
rx_sdu->reset();
while(RX_MOD_BASE(vr_ur) < RX_MOD_BASE(vr_ux))
{
while(RX_MOD_BASE(vr_ur) < RX_MOD_BASE(vr_ux)) {
vr_ur = (vr_ur + 1)%cfg.rx_mod;
log->debug("Entering Reassemble from timeout id=%d\n", timeout_id);
reassemble_rx_sdus();
log->debug("Finished reassemble from timeout id=%d\n", timeout_id);
}
reordering_timer->stop();
if(RX_MOD_BASE(vr_uh) > RX_MOD_BASE(vr_ur))
{
reordering_timer->set(this, cfg.t_reordering);
if (RX_MOD_BASE(vr_uh) > RX_MOD_BASE(vr_ur)) {
reordering_timer->reset();
reordering_timer->run();
vr_ux = vr_uh;
}

@ -190,8 +190,8 @@ uint32_t prbset_to_bitmask() {
int main(int argc, char **argv) {
struct timeval t[3] = {};
size_t tx_nof_bits = 0, rx_nof_bits = 0;
srslte_enb_dl_t enb_dl = {};
srslte_ue_dl_t ue_dl = {};
srslte_enb_dl_t enb_dl;
srslte_ue_dl_t ue_dl;
srslte_softbuffer_tx_t *softbuffer_tx[SRSLTE_MAX_TB] = {};
srslte_softbuffer_rx_t *softbuffer_rx[SRSLTE_MAX_TB] = {};
uint8_t *data_tx[SRSLTE_MAX_TB] = {};
@ -206,6 +206,9 @@ int main(int argc, char **argv) {
cf_t *signal_buffer[SRSLTE_MAX_PORTS] = {NULL};
bzero(&enb_dl, sizeof(enb_dl));
bzero(&ue_dl, sizeof(ue_dl));
/*
* Allocate Memory
*/
@ -304,9 +307,12 @@ int main(int argc, char **argv) {
/*
* Run eNodeB
*/
srslte_ra_dl_dci_t dci = {};
srslte_ra_dl_dci_t dci;
srslte_dci_format_t dci_format = SRSLTE_DCI_FORMAT1A;
srslte_ra_dl_grant_t grant = {};
srslte_ra_dl_grant_t grant;
bzero(&dci, sizeof(dci));
bzero(&grant, sizeof(grant));
prbset_num = (int) ceilf((float) cell.nof_prb / srslte_ra_type0_P(cell.nof_prb));
last_prbset_num = prbset_num;

@ -36,6 +36,11 @@ add_test(rlc_am_stress_test rlc_stress_test --mode=AM --loglevel 1 --sdu_gen_del
add_test(rlc_um_stress_test rlc_stress_test --mode=UM --loglevel 1)
add_test(rlc_tm_stress_test rlc_stress_test --mode=TM --loglevel 1 --opp_sdu_ratio=1.0)
# Run clang-tidy if available
if(CLANG_TIDY_BIN)
set_target_properties(rlc_stress_test PROPERTIES CXX_CLANG_TIDY "${DO_CLANG_TIDY}")
endif()
add_executable(rlc_um_data_test rlc_um_data_test.cc)
target_link_libraries(rlc_um_data_test srslte_upper srslte_phy srslte_common)
add_test(rlc_um_data_test rlc_um_data_test)

@ -42,15 +42,21 @@ class mac_dummy_timers
:public srslte::mac_interface_timers
{
public:
srslte::timers::timer* timer_get(uint32_t timer_id)
{
return &t;
mac_dummy_timers() : timers(8) {}
srslte::timers::timer* timer_get(uint32_t timer_id) {
return timers.get(timer_id);
}
void timer_release_id(uint32_t timer_id) {
timers.release_id(timer_id);
}
uint32_t timer_get_unique_id() {
return timers.get_unique_id();
}
void step_all() {
timers.step_all();
}
uint32_t timer_get_unique_id(){return 0;}
void timer_release_id(uint32_t id){}
private:
srslte::timers::timer t;
srslte::timers timers;
};
class rlc_am_tester
@ -135,7 +141,7 @@ private:
bool running;
};
void basic_test()
bool basic_test()
{
srslte::log_filter log1("RLC_AM_1");
srslte::log_filter log2("RLC_AM_2");
@ -166,8 +172,13 @@ void basic_test()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 5 SDUs into RLC1
byte_buffer_t sdu_bufs[NBUFS];
@ -215,11 +226,17 @@ void basic_test()
}
// Check statistics
assert(rlc1.get_num_tx_bytes() == rlc2.get_num_rx_bytes());
assert(rlc2.get_num_tx_bytes() == rlc1.get_num_rx_bytes());
if (rlc1.get_num_tx_bytes() != rlc2.get_num_rx_bytes()) {
return -1;
}
if (rlc2.get_num_tx_bytes() != rlc1.get_num_rx_bytes()) {
return -1;
}
return 0;
}
void concat_test()
bool concat_test()
{
srslte::log_filter log1("RLC_AM_1");
srslte::log_filter log2("RLC_AM_2");
@ -250,8 +267,13 @@ void concat_test()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 5 SDUs into RLC1
byte_buffer_t sdu_bufs[NBUFS];
@ -284,11 +306,17 @@ void concat_test()
}
// check statistics
assert(rlc1.get_num_tx_bytes() == rlc2.get_num_rx_bytes());
assert(rlc2.get_num_tx_bytes() == rlc1.get_num_rx_bytes());
if (rlc1.get_num_tx_bytes() != rlc2.get_num_rx_bytes()) {
return -1;
}
if (rlc2.get_num_tx_bytes() != rlc1.get_num_rx_bytes()) {
return -1;
}
void segment_test()
return 0;
}
bool segment_test()
{
srslte::log_filter log1("RLC_AM_1");
srslte::log_filter log2("RLC_AM_2");
@ -319,8 +347,13 @@ void segment_test()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 5 SDUs into RLC1
byte_buffer_t sdu_bufs[NBUFS];
@ -370,11 +403,17 @@ void segment_test()
assert(tester.sdus[i]->msg[j] == j);
}
assert(rlc1.get_num_tx_bytes() == rlc2.get_num_rx_bytes());
assert(rlc2.get_num_tx_bytes() == rlc1.get_num_rx_bytes());
if (rlc1.get_num_tx_bytes() != rlc2.get_num_rx_bytes()) {
return -1;
}
if (rlc2.get_num_tx_bytes() != rlc1.get_num_rx_bytes()) {
return -1;
}
return 0;
}
void retx_test()
bool retx_test()
{
srslte::log_filter log1("RLC_AM_1");
srslte::log_filter log2("RLC_AM_2");
@ -405,8 +444,13 @@ void retx_test()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 5 SDUs into RLC1
byte_buffer_t sdu_bufs[NBUFS];
@ -436,8 +480,11 @@ void retx_test()
rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes);
}
// Sleep to let reordering timeout expire
usleep(10000);
// Step timers until reordering timeout expires
int cnt = 5;
while (cnt--) {
timers.step_all();
}
assert(4 == rlc2.get_buffer_state());
@ -462,12 +509,14 @@ void retx_test()
assert(tester.n_sdus == 5);
for(int i=0; i<tester.n_sdus; i++)
{
assert(tester.sdus[i]->N_bytes == 1);
assert(*(tester.sdus[i]->msg) == i);
if (tester.sdus[i]->N_bytes != 1) return -1;
if (*(tester.sdus[i]->msg) != i) return -1;
}
return 0;
}
void resegment_test_1()
bool resegment_test_1()
{
// SDUs: | 10 | 10 | 10 | 10 | 10 |
// PDUs: | 10 | 10 | 10 | 10 | 10 |
@ -502,8 +551,13 @@ void resegment_test_1()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 5 SDUs into RLC1
byte_buffer_t sdu_bufs[NBUFS];
@ -534,8 +588,11 @@ void resegment_test_1()
rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes);
}
// Sleep to let reordering timeout expire
usleep(10000);
// Step timers until reordering timeout expires
int cnt = 5;
while (cnt--) {
timers.step_all();
}
assert(4 == rlc2.get_buffer_state());
@ -570,13 +627,15 @@ void resegment_test_1()
assert(tester.n_sdus == 5);
for(int i=0; i<tester.n_sdus; i++)
{
assert(tester.sdus[i]->N_bytes == 10);
if (tester.sdus[i]->N_bytes != 10) return -1;
for(int j=0;j<10;j++)
assert(tester.sdus[i]->msg[j] == j);
if (tester.sdus[i]->msg[j] != j) return -1;
}
return 0;
}
void resegment_test_2()
bool resegment_test_2()
{
// SDUs: | 10 | 10 | 10 | 10 | 10 |
@ -612,8 +671,13 @@ void resegment_test_2()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 5 SDUs into RLC1
byte_buffer_t sdu_bufs[NBUFS];
@ -644,8 +708,11 @@ void resegment_test_2()
rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes);
}
// Sleep to let reordering timeout expire
usleep(10000);
// Step timers until reordering timeout expires
int cnt = 5;
while (cnt--) {
timers.step_all();
}
assert(4 == rlc2.get_buffer_state());
@ -677,13 +744,15 @@ void resegment_test_2()
assert(tester.n_sdus == 5);
for(int i=0; i<tester.n_sdus; i++)
{
assert(tester.sdus[i]->N_bytes == 10);
if (tester.sdus[i]->N_bytes != 10) return -1;
for(int j=0;j<10;j++)
assert(tester.sdus[i]->msg[j] == j);
if (tester.sdus[i]->msg[j] != j) return -1;
}
return 0;
}
void resegment_test_3()
bool resegment_test_3()
{
// SDUs: | 10 | 10 | 10 | 10 | 10 |
@ -719,8 +788,13 @@ void resegment_test_3()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 5 SDUs into RLC1
byte_buffer_t sdu_bufs[NBUFS];
@ -751,8 +825,11 @@ void resegment_test_3()
rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes);
}
// Sleep to let reordering timeout expire
usleep(10000);
// Step timers until reordering timeout expires
int cnt = 5;
while (cnt--) {
timers.step_all();
}
assert(4 == rlc2.get_buffer_state());
@ -780,15 +857,16 @@ void resegment_test_3()
assert(tester.n_sdus == 5);
for(int i=0; i<tester.n_sdus; i++)
{
assert(tester.sdus[i]->N_bytes == 10);
if (tester.sdus[i]->N_bytes != 10) return -1;
for(int j=0;j<10;j++)
assert(tester.sdus[i]->msg[j] == j);
if (tester.sdus[i]->msg[j] != j) return -1;
}
return 0;
}
void resegment_test_4()
bool resegment_test_4()
{
// SDUs: | 10 | 10 | 10 | 10 | 10 |
// PDUs: | 5 | 5| 30 | 5 | 5|
// Retx PDU segments: | 15 | 15 |
@ -822,8 +900,13 @@ void resegment_test_4()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 5 SDUs into RLC1
byte_buffer_t sdu_bufs[NBUFS];
@ -854,8 +937,11 @@ void resegment_test_4()
rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes);
}
// Sleep to let reordering timeout expire
usleep(10000);
// Step timers until reordering timeout expires
int cnt = 5;
while (cnt--) {
timers.step_all();
}
assert(4 == rlc2.get_buffer_state());
@ -883,15 +969,16 @@ void resegment_test_4()
assert(tester.n_sdus == 5);
for(int i=0; i<tester.n_sdus; i++)
{
assert(tester.sdus[i]->N_bytes == 10);
if (tester.sdus[i]->N_bytes != 10) return -1;
for(int j=0;j<10;j++)
assert(tester.sdus[i]->msg[j] == j);
if (tester.sdus[i]->msg[j] != j) return -1;
}
return 0;
}
void resegment_test_5()
bool resegment_test_5()
{
// SDUs: | 10 | 10 | 10 | 10 | 10 |
// PDUs: |2|3| 40 |3|2|
// Retx PDU segments: | 20 | 20 |
@ -908,8 +995,6 @@ void resegment_test_5()
rlc_am rlc1;
rlc_am rlc2;
int len;
log1.set_level(srslte::LOG_LEVEL_DEBUG);
log2.set_level(srslte::LOG_LEVEL_DEBUG);
@ -925,8 +1010,13 @@ void resegment_test_5()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 5 SDUs into RLC1
byte_buffer_t sdu_bufs[NBUFS];
@ -957,8 +1047,11 @@ void resegment_test_5()
rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes);
}
// Sleep to let reordering timeout expire
usleep(10000);
// Step timers until reordering timeout expires
int cnt = 5;
while (cnt--) {
timers.step_all();
}
assert(4 == rlc2.get_buffer_state());
@ -986,13 +1079,15 @@ void resegment_test_5()
assert(tester.n_sdus == 5);
for(int i=0; i<tester.n_sdus; i++)
{
assert(tester.sdus[i]->N_bytes == 10);
if (tester.sdus[i]->N_bytes != 10) return -1;
for(int j=0;j<10;j++)
assert(tester.sdus[i]->msg[j] == j);
if (tester.sdus[i]->msg[j] != j) return -1;
}
return 0;
}
void resegment_test_6()
bool resegment_test_6()
{
// SDUs: |10|10|10| 54 | 54 | 54 | 54 | 54 | 54 |
// PDUs: |10|10|10| 270 | 54 |
@ -1027,8 +1122,13 @@ void resegment_test_6()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push SDUs into RLC1
byte_buffer_t sdu_bufs[9];
@ -1069,8 +1169,11 @@ void resegment_test_6()
rlc2.write_pdu(pdu_bufs[i].msg, pdu_bufs[i].N_bytes);
}
// Sleep to let reordering timeout expire
usleep(10000);
// Step timers until reordering timeout expires
int cnt = 5;
while (cnt--) {
timers.step_all();
}
assert(4 == rlc2.get_buffer_state());
@ -1111,14 +1214,17 @@ void resegment_test_6()
}
for(int i=3;i<9;i++)
{
assert(tester.sdus[i]->N_bytes == 54);
for(int j=0;j<54;j++)
assert(tester.sdus[i]->msg[j] == j);
if(tester.sdus[i]->N_bytes != 54) return -1;
for(int j=0;j<54;j++) {
if (tester.sdus[i]->msg[j] != j) return -1;
}
}
return 0;
}
// Retransmission of PDU segments of the same size
void resegment_test_7()
bool resegment_test_7()
{
// SDUs: | 30 | 30 |
// PDUs: | 13 | 13 | 11 | 13 | 10 |
@ -1148,8 +1254,6 @@ void resegment_test_7()
rlc_am rlc1;
rlc_am rlc2;
int len;
log1.set_level(srslte::LOG_LEVEL_DEBUG);
log2.set_level(srslte::LOG_LEVEL_DEBUG);
@ -1165,8 +1269,13 @@ void resegment_test_7()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 2 SDUs into RLC1
byte_buffer_t sdu_bufs[N_SDU_BUFS];
@ -1189,7 +1298,14 @@ void resegment_test_7()
assert(pdu_bufs[i].N_bytes);
}
assert(0 == rlc1.get_buffer_state());
// Step timers until poll_retx timeout expires
int cnt = 5;
while (cnt--) {
timers.step_all();
}
// RLC should try to retx a random PDU because it needs to request a status from the receiver
assert(0 != rlc1.get_buffer_state());
// Skip PDU with SN 2
for(uint32_t i=0;i<N_PDU_BUFS;i++) {
@ -1201,15 +1317,19 @@ void resegment_test_7()
}
}
// Sleep to let reordering timeout expire
usleep(10000);
// Step timers until reordering timeout expires
cnt = 5;
while (cnt--) {
timers.step_all();
}
assert(12 == rlc1.get_buffer_state());
// RLC should try to retransmit a random PDU because it needs to re-request a status PDU from the receiver
assert(0 != rlc1.get_buffer_state());
// first round of retx, forcing resegmentation
byte_buffer_t retx[4];
for (uint32_t i = 0; i < 4; i++) {
assert(rlc1.get_buffer_state());
assert(0 != rlc1.get_buffer_state());
retx[i].N_bytes = rlc1.read_pdu(retx[i].msg, 7);
assert(retx[i].N_bytes);
@ -1222,8 +1342,6 @@ void resegment_test_7()
}
}
usleep(10000);
// Read status PDU from RLC2
assert(rlc2.get_buffer_state());
byte_buffer_t status_buf;
@ -1237,7 +1355,6 @@ void resegment_test_7()
assert(15 == rlc1.get_buffer_state());
// second round of retx, forcing resegmentation
byte_buffer_t retx2[4];
for (uint32_t i = 0; i < 4; i++) {
@ -1253,26 +1370,47 @@ void resegment_test_7()
// check buffer states
assert(0 == rlc1.get_buffer_state());
// Step timers until poll_retx timeout expires
cnt = 5;
while (cnt--) {
timers.step_all();
}
// Read status PDU from RLC2
assert(rlc2.get_buffer_state());
status_buf.N_bytes = rlc2.read_pdu(status_buf.msg, 10); // 10 bytes is enough to hold the status
// Write status PDU to RLC1
rlc1.write_pdu(status_buf.msg, status_buf.N_bytes);
#if HAVE_PCAP
pcap.write_ul_am_ccch(status_buf.msg, status_buf.N_bytes);
#endif
// check status again
assert(0 == rlc1.get_buffer_state());
assert(0 == rlc2.get_buffer_state());
// Check number of SDUs and their content
assert(tester.n_sdus == N_SDU_BUFS);
for(int i=0; i<tester.n_sdus; i++)
{
assert(tester.sdus[i]->N_bytes == sdu_size);
if (tester.sdus[i]->N_bytes != sdu_size) return -1;
for(uint32_t j=0;j<N_SDU_BUFS;j++) {
assert(tester.sdus[i]->msg[j] == i);
if (tester.sdus[i]->msg[j] != i) return -1;
}
}
#if HAVE_PCAP
pcap.close();
#endif
return 0;
}
// Retransmission of PDU segments with different size
void resegment_test_8()
bool resegment_test_8()
{
// SDUs: | 30 | 30 |
// PDUs: | 15 | 15 | 15 | 15 | 15 |
@ -1318,8 +1456,13 @@ void resegment_test_8()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
rlc2.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
if (not rlc2.configure(&cnfg)) {
return -1;
}
// Push 2 SDUs into RLC1
byte_buffer_t sdu_bufs[N_SDU_BUFS];
@ -1354,10 +1497,14 @@ void resegment_test_8()
}
}
// Sleep to let reordering timeout expire
usleep(10000);
// Step timers until reordering timeout expires
int cnt = 5;
while (cnt--) {
timers.step_all();
}
assert(12 == rlc1.get_buffer_state());
// what PDU to retransmit is random but it must not be zero
assert(0 != rlc1.get_buffer_state());
// first round of retx, forcing resegmentation
byte_buffer_t retx[4];
@ -1375,7 +1522,11 @@ void resegment_test_8()
}
}
usleep(20000);
// Step timers until reordering timeout expires
cnt = 7;
while (cnt--) {
timers.step_all();
}
// Read status PDU from RLC2
assert(rlc2.get_buffer_state());
@ -1402,27 +1553,41 @@ void resegment_test_8()
#endif
}
// get BSR from RLC2
status_buf.N_bytes = rlc2.read_pdu(status_buf.msg, 10); // 10 bytes is enough to hold the status
// Write status PDU to RLC1
rlc1.write_pdu(status_buf.msg, status_buf.N_bytes);
#if HAVE_PCAP
pcap.write_ul_am_ccch(status_buf.msg, status_buf.N_bytes);
#endif
// check buffer states
assert(0 == rlc1.get_buffer_state());
assert(0 == rlc2.get_buffer_state());
if (rlc1.get_buffer_state() != 0) {
return -1;
};
if (rlc2.get_buffer_state() != 0) {
return -1;
};
// Check number of SDUs and their content
assert(tester.n_sdus == N_SDU_BUFS);
for(int i=0; i<tester.n_sdus; i++)
{
assert(tester.sdus[i]->N_bytes == sdu_size);
for(int i=0; i<tester.n_sdus; i++) {
if (tester.sdus[i]->N_bytes != sdu_size) return -1;
for(uint32_t j=0;j<N_SDU_BUFS;j++) {
assert(tester.sdus[i]->msg[j] == i);
if (tester.sdus[i]->msg[j] != i) return -1;
}
}
#if HAVE_PCAP
pcap.close();
#endif
return 0;
}
void reset_test()
bool reset_test()
{
srslte::log_filter log1("RLC_AM_1");
srslte::log_filter log2("RLC_AM_2");
@ -1449,7 +1614,9 @@ void reset_test()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
// Push 1 SDU of size 10 into RLC1
byte_buffer_t sdu_buf;
@ -1473,10 +1640,14 @@ void reset_test()
len = rlc1.read_pdu(pdu_bufs.msg, 100);
pdu_bufs.N_bytes = len;
assert(0 == rlc1.get_buffer_state());
if (0 != rlc1.get_buffer_state()) {
return -1;
}
void stop_test()
return 0;
}
bool stop_test()
{
srslte::log_filter log1("RLC_AM_1");
log1.set_level(srslte::LOG_LEVEL_DEBUG);
@ -1499,7 +1670,9 @@ void stop_test()
cnfg.ul_am_rlc.poll_pdu = LIBLTE_RRC_POLL_PDU_P4;
cnfg.ul_am_rlc.t_poll_retx = LIBLTE_RRC_T_POLL_RETRANSMIT_MS5;
rlc1.configure(&cnfg);
if (not rlc1.configure(&cnfg)) {
return -1;
}
// start thread reading
ul_writer writer(&rlc1);
@ -1510,48 +1683,93 @@ void stop_test()
// stop RLC1
rlc1.stop();
return 0;
}
int main(int argc, char **argv) {
basic_test();
int main(int argc, char **argv)
{
if (basic_test()) {
printf("basic_test failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
concat_test();
if (concat_test()) {
printf("concat_test failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
segment_test();
if (segment_test()) {
printf("segment_test failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
retx_test();
if (retx_test()) {
printf("retx_test failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
resegment_test_1();
if (resegment_test_1()) {
printf("resegment_test_1 failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
resegment_test_2();
if (resegment_test_2()) {
printf("resegment_test_2 failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
resegment_test_3();
if (resegment_test_3()) {
printf("resegment_test_3 failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
resegment_test_4();
if (resegment_test_4()) {
printf("resegment_test_4 failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
resegment_test_5();
if (resegment_test_5()) {
printf("resegment_test_5 failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
resegment_test_6();
if (resegment_test_6()) {
printf("resegment_test_6 failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
resegment_test_7();
if (resegment_test_7()) {
printf("resegment_test_7 failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
resegment_test_8();
if (resegment_test_8()) {
printf("resegment_test_8 failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
reset_test();
if (reset_test()) {
printf("reset_test failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
stop_test();
if (stop_test()) {
printf("stop_test failed\n");
exit(-1);
};
byte_buffer_pool::get_instance()->cleanup();
}

@ -25,7 +25,7 @@
*/
#include <iostream>
#include <stdlib.h>
#include <cstdlib>
#include <pthread.h>
#include "srslte/common/log_filter.h"
#include "srslte/common/logger_stdout.h"
@ -34,10 +34,10 @@
#include "srslte/upper/rlc.h"
#include <boost/program_options.hpp>
#include <boost/program_options/parsers.hpp>
#include <assert.h>
#include <cassert>
#include <srslte/upper/rlc_interface.h>
#define SDU_SIZE 1500
#define LOG_HEX_LIMIT (-1)
using namespace std;
using namespace srsue;
@ -46,6 +46,7 @@ namespace bpo = boost::program_options;
typedef struct {
std::string mode;
uint32_t sdu_size;
uint32_t test_duration_sec;
float error_rate;
uint32_t sdu_gen_delay_usec;
@ -55,6 +56,8 @@ typedef struct {
bool single_tx;
bool write_pcap;
float opp_sdu_ratio;
bool zero_seed;
bool pedantic;
} stress_test_args_t;
void parse_args(stress_test_args_t *args, int argc, char *argv[]) {
@ -71,6 +74,7 @@ void parse_args(stress_test_args_t *args, int argc, char *argv[]) {
common.add_options()
("mode", bpo::value<std::string>(&args->mode)->default_value("AM"), "Whether to test RLC acknowledged or unacknowledged mode (AM/UM)")
("duration", bpo::value<uint32_t>(&args->test_duration_sec)->default_value(5), "Duration (sec)")
("sdu_size", bpo::value<uint32_t>(&args->sdu_size)->default_value(1500), "Size of SDUs")
("sdu_gen_delay", bpo::value<uint32_t>(&args->sdu_gen_delay_usec)->default_value(0), "SDU generation delay (usec)")
("pdu_tx_delay", bpo::value<uint32_t>(&args->pdu_tx_delay_usec)->default_value(0), "Delay in MAC for transfering PDU from tx'ing RLC to rx'ing RLC (usec)")
("error_rate", bpo::value<float>(&args->error_rate)->default_value(0.1), "Rate at which RLC PDUs are dropped")
@ -78,7 +82,9 @@ void parse_args(stress_test_args_t *args, int argc, char *argv[]) {
("reestablish", bpo::value<bool>(&args->reestablish)->default_value(false), "Mimic RLC reestablish during execution")
("loglevel", bpo::value<uint32_t>(&args->log_level)->default_value(srslte::LOG_LEVEL_DEBUG), "Log level (1=Error,2=Warning,3=Info,4=Debug)")
("singletx", bpo::value<bool>(&args->single_tx)->default_value(false), "If set to true, only one node is generating data")
("pcap", bpo::value<bool>(&args->write_pcap)->default_value(false), "Whether to write all RLC PDU to PCAP file");
("pcap", bpo::value<bool>(&args->write_pcap)->default_value(false), "Whether to write all RLC PDU to PCAP file")
("zeroseed", bpo::value<bool>(&args->zero_seed)->default_value(false), "Whether to initialize random seed to zero")
("pedantic", bpo::value<bool>(&args->pedantic)->default_value(true), "Whether to perform strict SDU size checking at receiver");
// these options are allowed on the command line
bpo::options_description cmdline_options;
@ -90,7 +96,7 @@ void parse_args(stress_test_args_t *args, int argc, char *argv[]) {
bpo::notify(vm);
// help option was given - print usage and exit
if (vm.count("help")) {
if (vm.count("help") > 0) {
cout << "Usage: " << argv[0] << " [OPTIONS] config_file" << endl << endl;
cout << common << endl << general << endl;
exit(0);
@ -102,21 +108,23 @@ void parse_args(stress_test_args_t *args, int argc, char *argv[]) {
}
}
class mac_reader
:public thread
class mac_dummy
:public srslte::mac_interface_timers
,public thread
{
public:
mac_reader(rlc_interface_mac *rlc1_, rlc_interface_mac *rlc2_, float fail_rate_, float opp_sdu_ratio_, uint32_t pdu_tx_delay_usec_, rlc_pcap *pcap_, uint32_t lcid_, bool is_dl_ = true)
mac_dummy(rlc_interface_mac *rlc1_, rlc_interface_mac *rlc2_, stress_test_args_t args_, uint32_t lcid_, rlc_pcap* pcap_ = NULL)
:timers(8)
,run_enable(true)
,rlc1(rlc1_)
,rlc2(rlc2_)
,args(args_)
,pcap(pcap_)
,lcid(lcid_)
,log("MAC ")
{
rlc1 = rlc1_;
rlc2 = rlc2_;
fail_rate = fail_rate_;
opp_sdu_ratio = opp_sdu_ratio_;
run_enable = true;
pdu_tx_delay_usec = pdu_tx_delay_usec_;
pcap = pcap_;
is_dl = is_dl_;
lcid = lcid_;
log.set_level(static_cast<LOG_LEVEL_ENUM>(args.log_level));
log.set_hex_limit(LOG_HEX_LIMIT);
}
void stop()
@ -125,99 +133,94 @@ public:
wait_thread_finish();
}
srslte::timers::timer* timer_get(uint32_t timer_id)
{
return timers.get(timer_id);
}
uint32_t timer_get_unique_id() {
return timers.get_unique_id();
}
void timer_release_id(uint32_t timer_id) {
timers.release_id(timer_id);
}
void step_timer() {
timers.step_all();
}
private:
void run_thread()
void run_tti(rlc_interface_mac *tx_rlc, rlc_interface_mac *rx_rlc, bool is_dl)
{
byte_buffer_t *pdu = byte_buffer_pool::get_instance()->allocate("mac_reader::run_thread");
byte_buffer_t *pdu = byte_buffer_pool::get_instance()->allocate(__PRETTY_FUNCTION__);
if (!pdu) {
printf("Fatal Error: Could not allocate PDU in mac_reader::run_thread\n");
exit(-1);
}
while(run_enable) {
// generate MAC opportunities of random size or with fixed ratio
float r = opp_sdu_ratio ? opp_sdu_ratio : (float)rand()/RAND_MAX;
int opp_size = r*SDU_SIZE;
uint32_t buf_state = rlc1->get_buffer_state(lcid);
if (buf_state) {
int read = rlc1->read_pdu(lcid, pdu->msg, opp_size);
if (pdu_tx_delay_usec) usleep(pdu_tx_delay_usec);
if(((float)rand()/RAND_MAX > fail_rate) && read>0) {
float r = args.opp_sdu_ratio ? args.opp_sdu_ratio : static_cast<float>(rand())/RAND_MAX;
int opp_size = r*args.sdu_size;
uint32_t buf_state = tx_rlc->get_buffer_state(lcid);
if (buf_state > 0) {
int read = tx_rlc->read_pdu(lcid, pdu->msg, opp_size);
pdu->N_bytes = read;
rlc2->write_pdu(lcid, pdu->msg, pdu->N_bytes);
if (args.pdu_tx_delay_usec > 0) {
usleep(args.pdu_tx_delay_usec);
}
if(((float)rand()/RAND_MAX > args.error_rate) && read>0) {
rx_rlc->write_pdu(lcid, pdu->msg, pdu->N_bytes);
if (is_dl) {
pcap->write_dl_am_ccch(pdu->msg, pdu->N_bytes);
} else {
pcap->write_ul_am_ccch(pdu->msg, pdu->N_bytes);
}
}
} else {
log.warning_hex(pdu->msg, pdu->N_bytes, "Dropping RLC PDU (%d B)\n", pdu->N_bytes);
}
}
byte_buffer_pool::get_instance()->deallocate(pdu);
}
rlc_interface_mac *rlc1;
rlc_interface_mac *rlc2;
float fail_rate;
float opp_sdu_ratio;
uint32_t pdu_tx_delay_usec;
rlc_pcap *pcap;
uint32_t lcid;
bool is_dl;
bool run_enable;
};
class mac_dummy
:public srslte::mac_interface_timers
{
public:
mac_dummy(rlc_interface_mac *rlc1_, rlc_interface_mac *rlc2_, float fail_rate_, float opp_sdu_ratio_, int32_t pdu_tx_delay, uint32_t lcid, rlc_pcap* pcap = NULL)
:r1(rlc1_, rlc2_, fail_rate_, opp_sdu_ratio_, pdu_tx_delay, pcap, lcid, true)
,r2(rlc2_, rlc1_, fail_rate_, opp_sdu_ratio_, pdu_tx_delay, pcap, lcid, false)
void run_thread()
{
}
while (run_enable) {
// Downlink direction first (RLC1->RLC2)
run_tti(rlc1, rlc2, true);
void start()
{
r1.start(7);
r2.start(7);
}
// UL direction (RLC2->RLC1)
run_tti(rlc2, rlc1, false);
void stop()
{
r1.stop();
r2.stop();
// step timer
step_timer();
}
srslte::timers::timer* timer_get(uint32_t timer_id)
{
return &t;
}
uint32_t timer_get_unique_id(){return 0;}
void timer_release_id(uint32_t id){}
private:
srslte::timers::timer t;
mac_reader r1;
mac_reader r2;
rlc_interface_mac *rlc1;
rlc_interface_mac *rlc2;
srslte::timers timers;
bool run_enable;
stress_test_args_t args;
rlc_pcap *pcap;
uint32_t lcid;
srslte::log_filter log;
};
class rlc_tester
:public pdcp_interface_rlc
,public rrc_interface_rlc
,public thread
{
public:
rlc_tester(rlc_interface_pdcp *rlc_, std::string name_, uint32_t sdu_gen_delay_usec_, uint32_t lcid_){
rlc = rlc_;
run_enable = true;
rx_pdus = 0;
name = name_;
sdu_gen_delay_usec = sdu_gen_delay_usec_;
lcid = lcid_;
rlc_tester(rlc_interface_pdcp *rlc_, std::string name_, stress_test_args_t args_, uint32_t lcid_)
:log("Testr")
,rlc(rlc_)
,run_enable(true)
,rx_pdus()
,name(name_)
,args(args_)
,lcid(lcid_)
{
log.set_level(srslte::LOG_LEVEL_ERROR);
log.set_hex_limit(LOG_HEX_LIMIT);
}
void stop()
@ -230,10 +233,13 @@ public:
void write_pdu(uint32_t rx_lcid, byte_buffer_t *sdu)
{
assert(rx_lcid == lcid);
if (sdu->N_bytes != SDU_SIZE) {
printf("Received PDU with size %d, expected %d. Exiting.\n", sdu->N_bytes, SDU_SIZE);
if (sdu->N_bytes != args.sdu_size) {
log.error_hex(sdu->msg, sdu->N_bytes, "Received SDU with size %d, expected %d.\n", sdu->N_bytes, args.sdu_size);
// exit if in pedantic mode or SDU is not a multiple of the expected size
if (args.pedantic || sdu->N_bytes % args.sdu_size != 0) {
exit(-1);
}
}
byte_buffer_pool::get_instance()->deallocate(sdu);
rx_pdus++;
@ -245,39 +251,41 @@ public:
// RRC interface
void max_retx_attempted(){}
std::string get_rb_name(uint32_t rx_lcid) { return std::string(""); }
std::string get_rb_name(uint32_t rx_lcid) { return std::string("DRB1"); }
int get_nof_rx_pdus() { return rx_pdus; }
private:
void run_thread()
{
void run_thread() {
uint8_t sn = 0;
while(run_enable) {
byte_buffer_t *pdu = byte_buffer_pool::get_instance()->allocate("rlc_tester::run_thread");
if (!pdu) {
if (pdu == NULL) {
printf("Error: Could not allocate PDU in rlc_tester::run_thread\n\n\n");
// backoff for a bit
usleep(1000);
continue;
}
for (uint32_t i = 0; i < SDU_SIZE; i++) {
for (uint32_t i = 0; i < args.sdu_size; i++) {
pdu->msg[i] = sn;
}
sn++;
pdu->N_bytes = SDU_SIZE;
pdu->N_bytes = args.sdu_size;
rlc->write_sdu(lcid, pdu);
if (sdu_gen_delay_usec) usleep(sdu_gen_delay_usec);
if (args.sdu_gen_delay_usec > 0) {
usleep(args.sdu_gen_delay_usec);
}
}
}
bool run_enable;
long rx_pdus;
uint64_t rx_pdus;
uint32_t lcid;
srslte::log_filter log;
std::string name;
uint32_t sdu_gen_delay_usec;
stress_test_args_t args;
rlc_interface_pdcp *rlc;
};
@ -286,10 +294,10 @@ void stress_test(stress_test_args_t args)
{
srslte::log_filter log1("RLC_1");
srslte::log_filter log2("RLC_2");
log1.set_level((LOG_LEVEL_ENUM)args.log_level);
log2.set_level((LOG_LEVEL_ENUM)args.log_level);
log1.set_hex_limit(-1);
log2.set_hex_limit(-1);
log1.set_level(static_cast<LOG_LEVEL_ENUM>(args.log_level));
log2.set_level(static_cast<LOG_LEVEL_ENUM>(args.log_level));
log1.set_hex_limit(LOG_HEX_LIMIT);
log2.set_hex_limit(LOG_HEX_LIMIT);
rlc_pcap pcap;
uint32_t lcid = 1;
@ -327,9 +335,9 @@ void stress_test(stress_test_args_t args)
rlc rlc1;
rlc rlc2;
rlc_tester tester1(&rlc1, "tester1", args.sdu_gen_delay_usec, lcid);
rlc_tester tester2(&rlc2, "tester2", args.sdu_gen_delay_usec, lcid);
mac_dummy mac(&rlc1, &rlc2, args.error_rate, args.opp_sdu_ratio, args.pdu_tx_delay_usec, lcid, &pcap);
rlc_tester tester1(&rlc1, "tester1", args, lcid);
rlc_tester tester2(&rlc2, "tester2", args, lcid);
mac_dummy mac(&rlc1, &rlc2, args, lcid, &pcap);
ue_interface ue;
rlc1.init(&tester1, &tester1, &ue, &log1, &mac, 0);
@ -347,6 +355,10 @@ void stress_test(stress_test_args_t args)
}
mac.start();
if (args.test_duration_sec < 1) {
args.test_duration_sec = 1;
}
for (uint32_t i = 0; i < args.test_duration_sec; i++) {
// if enabled, mimic reestablishment every second
if (args.reestablish) {
@ -356,41 +368,55 @@ void stress_test(stress_test_args_t args)
usleep(1e6);
}
printf("Test finished, tearing down ..\n");
// Stop RLC instances first to release blocking writers
rlc1.stop();
rlc2.stop();
printf("RLC entities stopped.\n");
// Stop upper layer writers
tester1.stop();
tester2.stop();
printf("Writers stopped.\n");
mac.stop();
if (args.write_pcap) {
pcap.close();
}
rlc_metrics_t metrics;
rlc_metrics_t metrics = {};
rlc1.get_metrics(metrics);
printf("RLC1 received %d SDUs in %ds (%.2f PDU/s), Throughput: DL=%4.2f Mbps, UL=%4.2f Mbps\n",
printf("RLC1 received %d SDUs in %ds (%.2f/s), Throughput: DL=%4.2f Mbps, UL=%4.2f Mbps\n",
tester1.get_nof_rx_pdus(),
args.test_duration_sec,
(float)tester1.get_nof_rx_pdus()/args.test_duration_sec,
metrics.dl_tput_mbps,
metrics.ul_tput_mbps);
static_cast<double>(tester1.get_nof_rx_pdus()/args.test_duration_sec),
metrics.dl_tput_mbps[lcid],
metrics.ul_tput_mbps[lcid]);
rlc2.get_metrics(metrics);
printf("RLC2 received %d SDUs in %ds (%.2f PDU/s), Throughput: DL=%4.2f Mbps, UL=%4.2f Mbps\n",
printf("RLC2 received %d SDUs in %ds (%.2f/s), Throughput: DL=%4.2f Mbps, UL=%4.2f Mbps\n",
tester2.get_nof_rx_pdus(),
args.test_duration_sec,
(float)tester2.get_nof_rx_pdus()/args.test_duration_sec,
metrics.dl_tput_mbps,
metrics.ul_tput_mbps);
static_cast<double>(tester2.get_nof_rx_pdus()/args.test_duration_sec),
metrics.dl_tput_mbps[lcid],
metrics.ul_tput_mbps[lcid]);
}
int main(int argc, char **argv) {
stress_test_args_t args;
stress_test_args_t args = {};
parse_args(&args, argc, argv);
if (args.zero_seed) {
srand(0);
} else {
srand(time(NULL));
}
stress_test(args);
byte_buffer_pool::get_instance()->cleanup();

@ -141,7 +141,8 @@ nof_ctrl_symbols = 3
#####################################################################
# Expert configuration options
#
# pdsch_max_its: Maximum number of turbo decoder iterations (Default 4)
# pusch_max_its: Maximum number of turbo decoder iterations (Default 4)
# pusch_8bit_decoder: Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental)
# nof_phy_threads: Selects the number of PHY threads (maximum 4, minimum 1, default 2)
# metrics_period_secs: Sets the period at which metrics are requested from the UE.
# pregenerate_signals: Pregenerate uplink signals after attach. Improves CPU performance.
@ -152,7 +153,8 @@ nof_ctrl_symbols = 3
#
#####################################################################
[expert]
#pdsch_max_its = 4
#pusch_max_its = 8 # These are half iterations
#pusch_8bit_decoder = false
#nof_phy_threads = 2
#pregenerate_signals = false
#tx_amplitude = 0.6

@ -28,6 +28,7 @@
#define SRSENB_PHCH_COMMON_H
#include <map>
#include <semaphore.h>
#include "srslte/interfaces/enb_interfaces.h"
#include "srslte/interfaces/enb_metrics_interface.h"
#include "srslte/common/gen_mch_tables.h"
@ -36,11 +37,13 @@
#include "srslte/common/thread_pool.h"
#include "srslte/radio/radio.h"
#include <string.h>
namespace srsenb {
typedef struct {
float max_prach_offset_us;
int pusch_max_its;
bool pusch_8bit_decoder;
float tx_amplitude;
int nof_phy_threads;
std::string equalizer_mode;
@ -73,28 +76,15 @@ class phch_common
public:
phch_common(uint32_t max_mutex_) : tx_mutex(max_mutex_) {
nof_mutex = 0;
max_mutex = max_mutex_;
params.max_prach_offset_us = 20;
radio = NULL;
mac = NULL;
is_first_tx = false;
is_first_of_burst = false;
pdsch_p_b = 0;
nof_workers = 0;
bzero(&pusch_cfg, sizeof(pusch_cfg));
bzero(&hopping_cfg, sizeof(hopping_cfg));
bzero(&pucch_cfg, sizeof(pucch_cfg));
bzero(&ul_grants, sizeof(ul_grants));
}
phch_common(uint32_t nof_workers);
~phch_common();
void set_nof_workers(uint32_t nof_workers);
bool init(srslte_cell_t *cell, srslte::radio *radio_handler, mac_interface_phy *mac);
void reset();
void stop();
void set_nof_mutex(uint32_t nof_mutex);
void worker_end(uint32_t tx_mutex_cnt, cf_t *buffer[SRSLTE_MAX_PORTS], uint32_t nof_samples, srslte_timestamp_t tx_time);
// Common objects
@ -147,13 +137,12 @@ public:
private:
std::vector<pthread_mutex_t> tx_mutex;
std::vector<sem_t> tx_sem;
bool is_first_tx;
bool is_first_of_burst;
uint32_t nof_workers;
uint32_t nof_mutex;
uint32_t max_mutex;
uint32_t max_workers;
pthread_mutex_t user_mutex;

@ -46,7 +46,7 @@ public:
void reset();
cf_t *get_buffer_rx(uint32_t antenna_idx);
void set_time(uint32_t tti, uint32_t tx_mutex_cnt, srslte_timestamp_t tx_time);
void set_time(uint32_t tti, uint32_t tx_worker_cnt, srslte_timestamp_t tx_time);
int add_rnti(uint16_t rnti);
void rem_rnti(uint16_t rnti);
@ -93,8 +93,9 @@ private:
cf_t *signal_buffer_rx[SRSLTE_MAX_PORTS];
cf_t *signal_buffer_tx[SRSLTE_MAX_PORTS];
uint32_t tti_rx, tti_tx_dl, tti_tx_ul;
uint32_t sf_rx, sf_tx, tx_mutex_cnt;
uint32_t sf_rx, sf_tx;
uint32_t t_rx, t_tx_dl, t_tx_ul;
uint32_t tx_worker_cnt;
srslte_enb_dl_t enb_dl;
srslte_enb_ul_t enb_ul;
srslte_softbuffer_tx_t temp_mbsfn_softbuffer;

@ -83,9 +83,9 @@ private:
const static int MAX_WORKERS = 4;
const static int DEFAULT_WORKERS = 2;
const static int PRACH_WORKER_THREAD_PRIO = 80;
const static int PRACH_WORKER_THREAD_PRIO = 3;
const static int SF_RECV_THREAD_PRIO = 1;
const static int WORKERS_THREAD_PRIO = 0;
const static int WORKERS_THREAD_PRIO = 2;
srslte::radio *radio_handler;
srslte::log *log_h;

@ -30,25 +30,24 @@
#include "srslte/interfaces/enb_interfaces.h"
#include "srslte/common/log.h"
#include "srslte/common/threads.h"
#include "srslte/common/block_queue.h"
#include "srslte/common/buffer_pool.h"
namespace srsenb {
class prach_worker : thread
{
public:
prach_worker() : initiated(false), prach_nof_det(0), max_prach_offset_us(0), pending_tti(0), processed_tti(0),
prach_worker() : initiated(false), prach_nof_det(0), max_prach_offset_us(0), buffer_pool(8),
running(false), nof_sf(0), sf_cnt(0) {
log_h = NULL;
mac = NULL;
signal_buffer_rx = NULL;
bzero(&prach, sizeof(srslte_prach_t));
bzero(&prach_indices, sizeof(prach_indices));
bzero(&prach_offsets, sizeof(prach_offsets));
bzero(&prach_p2avg, sizeof(prach_p2avg));
bzero(&cell, sizeof(cell));
bzero(&prach_cfg, sizeof(prach_cfg));
bzero(&mutex, sizeof(mutex));
bzero(&cvar, sizeof(cvar));
}
int init(srslte_cell_t *cell, srslte_prach_cfg_t *prach_cfg, mac_interface_phy *mac, srslte::log *log_h, int priority);
@ -57,9 +56,6 @@ public:
void stop();
private:
void run_thread();
int run_tti(uint32_t tti);
uint32_t prach_nof_det;
uint32_t prach_indices[165];
float prach_offsets[165];
@ -69,20 +65,32 @@ private:
srslte_prach_cfg_t prach_cfg;
srslte_prach_t prach;
pthread_mutex_t mutex;
pthread_cond_t cvar;
cf_t *signal_buffer_rx;
const static int sf_buffer_sz = 128*1024;
class sf_buffer {
public:
sf_buffer() { nof_samples = 0; tti = 0; }
void reset() { nof_samples = 0; tti = 0; }
cf_t samples[sf_buffer_sz];
uint32_t nof_samples;
uint32_t tti;
char debug_name[SRSLTE_BUFFER_POOL_LOG_NAME_LEN];
};
srslte::buffer_pool<sf_buffer> buffer_pool;
srslte::block_queue<sf_buffer*> pending_buffers;
sf_buffer* current_buffer;
srslte::log* log_h;
mac_interface_phy *mac;
float max_prach_offset_us;
bool initiated;
uint32_t pending_tti;
int processed_tti;
bool running;
uint32_t nof_sf;
uint32_t sf_cnt;
void run_thread();
int run_tti(sf_buffer *b);
};
}
#endif // SRSENB_PRACH_WORKER_H

@ -50,8 +50,6 @@ public:
uint32_t prio);
void stop();
const static int MUTEX_X_WORKER = 4;
private:
void run_thread();
@ -62,12 +60,12 @@ private:
prach_worker *prach;
phch_common *worker_com;
uint32_t tx_mutex_cnt;
uint32_t nof_tx_mutex;
// Main system TTI counter
uint32_t tti;
uint32_t tx_worker_cnt;
uint32_t nof_workers;
bool running;
};

@ -36,7 +36,6 @@
namespace srsenb {
#define ENB_METRICS_MAX_USERS 64
#define SRSENB_RRC_MAX_N_PLMN_IDENTITIES 6
#define SRSENB_N_SRB 3
@ -74,111 +73,6 @@ static const char rb_id_text[RB_ID_N_ITEMS][20] = { "SRB0",
#define SRSENB_MAX_BUFFER_SIZE_BITS 102048
#define SRSENB_MAX_BUFFER_SIZE_BYTES 12756
#define SRSENB_BUFFER_HEADER_OFFSET 1024
/******************************************************************************
* Convert PLMN to BCD-coded MCC and MNC.
* Digits are represented by 4-bit nibbles. Unused nibbles are filled with 0xF.
* MNC 001 represented as 0xF001
* MNC 01 represented as 0xFF01
* PLMN encoded as per TS 36.413 sec 9.2.3.8
*****************************************************************************/
inline void s1ap_plmn_to_mccmnc(uint32_t plmn, uint16_t *mcc, uint16_t *mnc)
{
uint8_t nibbles[6];
nibbles[0] = (plmn & 0xF00000) >> 20;
nibbles[1] = (plmn & 0x0F0000) >> 16;
nibbles[2] = (plmn & 0x00F000) >> 12;
nibbles[3] = (plmn & 0x000F00) >> 8;
nibbles[4] = (plmn & 0x0000F0) >> 4;
nibbles[5] = (plmn & 0x00000F);
*mcc = 0xF000;
*mnc = 0xF000;
*mcc |= nibbles[1] << 8; // MCC digit 1
*mcc |= nibbles[0] << 4; // MCC digit 2
*mcc |= nibbles[3]; // MCC digit 3
if(nibbles[2] == 0xF) {
// 2-digit MNC
*mnc |= 0x0F00; // MNC digit 1
*mnc |= nibbles[5] << 4; // MNC digit 2
*mnc |= nibbles[4]; // MNC digit 3
} else {
// 3-digit MNC
*mnc |= nibbles[5] << 8; // MNC digit 1
*mnc |= nibbles[4] << 4; // MNC digit 2
*mnc |= nibbles[2] ; // MNC digit 3
}
}
/******************************************************************************
* Convert BCD-coded MCC and MNC to PLMN.
* Digits are represented by 4-bit nibbles. Unused nibbles are filled with 0xF.
* MNC 001 represented as 0xF001
* MNC 01 represented as 0xFF01
* PLMN encoded as per TS 36.413 sec 9.2.3.8
*****************************************************************************/
inline void s1ap_mccmnc_to_plmn(uint16_t mcc, uint16_t mnc, uint32_t *plmn)
{
uint8_t nibbles[6];
nibbles[1] = (mcc & 0x0F00) >> 8; // MCC digit 1
nibbles[0] = (mcc & 0x00F0) >> 4; // MCC digit 2
nibbles[3] = (mcc & 0x000F); // MCC digit 3
if((mnc & 0xFF00) == 0xFF00) {
// 2-digit MNC
nibbles[2] = 0x0F; // MNC digit 1
nibbles[5] = (mnc & 0x00F0) >> 4; // MNC digit 2
nibbles[4] = (mnc & 0x000F); // MNC digit 3
} else {
// 3-digit MNC
nibbles[5] = (mnc & 0x0F00) >> 8; // MNC digit 1
nibbles[4] = (mnc & 0x00F0) >> 4; // MNC digit 2
nibbles[2] = (mnc & 0x000F); // MNC digit 3
}
*plmn = 0x000000;
*plmn |= nibbles[0] << 20;
*plmn |= nibbles[1] << 16;
*plmn |= nibbles[2] << 12;
*plmn |= nibbles[3] << 8;
*plmn |= nibbles[4] << 4;
*plmn |= nibbles[5];
}
/******************************************************************************
* Safe conversions between byte buffers and integer types.
* Note: these don't perform endian conversion - use e.g. htonl/ntohl if required
*****************************************************************************/
inline void uint8_to_uint32(uint8_t *buf, uint32_t *i)
{
*i = (uint32_t)buf[0] << 24 |
(uint32_t)buf[1] << 16 |
(uint32_t)buf[2] << 8 |
(uint32_t)buf[3];
}
inline void uint32_to_uint8(uint32_t i, uint8_t *buf)
{
buf[0] = (i >> 24) & 0xFF;
buf[1] = (i >> 16) & 0xFF;
buf[2] = (i >> 8) & 0xFF;
buf[3] = i & 0xFF;
}
inline void uint8_to_uint16(uint8_t *buf, uint16_t *i)
{
*i = (uint32_t)buf[0] << 8 |
(uint32_t)buf[1];
}
inline void uint16_to_uint8(uint16_t i, uint8_t *buf)
{
buf[0] = (i >> 8) & 0xFF;
buf[1] = i & 0xFF;
}
} // namespace srsenb
#endif // SRSENB_COMMON_ENB_H

@ -296,6 +296,7 @@ public:
int cqi_sched_sf_idx;
int cqi_sched_prb_idx;
int get_drbid_config(LIBLTE_RRC_DRB_TO_ADD_MOD_STRUCT *drb, int drbid);
srslte::byte_buffer_t erab_info;
};
@ -333,7 +334,6 @@ private:
srslte::byte_buffer_pool *pool;
srslte::bit_buffer_t bit_buf;
srslte::bit_buffer_t bit_buf_paging;
srslte::byte_buffer_t erab_info;
phy_interface_rrc *phy;
mac_interface_rrc *mac;

@ -211,16 +211,16 @@ int enb::parse_sib2(std::string filename, LIBLTE_RRC_SYS_INFO_BLOCK_TYPE_2_STRUC
liblte_rrc_time_alignment_timer_text, LIBLTE_RRC_TIME_ALIGNMENT_TIMER_N_ITEMS)
);
sib2.add_field(
new parser::field<uint32>
("mbsfnSubframeConfigListLength", &data->mbsfn_subfr_cnfg_list_size)
);
parser::section mbsfnSubframeConfigList("mbsfnSubframeConfigList");
sib2.add_subsection(&mbsfnSubframeConfigList);
bool mbsfn_present=false;
mbsfnSubframeConfigList.set_optional(&mbsfn_present);
if (mbsfn_present) {
data->mbsfn_subfr_cnfg_list_size = 1;
}
mbsfnSubframeConfigList.add_field(
new parser::field<uint32>
("subframeAllocation", &data->mbsfn_subfr_cnfg_list[0].subfr_alloc)

@ -489,7 +489,7 @@ int mac::rach_detected(uint32_t tti, uint32_t preamble_idx, uint32_t time_adv)
// Save RA info
pending_rars[ra_id].preamble_idx = preamble_idx;
pending_rars[ra_id].ta_cmd = 2*time_adv;
pending_rars[ra_id].ta_cmd = time_adv;
pending_rars[ra_id].temp_crnti = last_rnti;
// Add new user to the scheduler so that it can RX/TX SRB0

@ -157,9 +157,13 @@ void parse_args(all_args_t *args, int argc, char* argv[]) {
"Pregenerate uplink signals after attach. Improves CPU performance.")
("expert.pusch_max_its",
bpo::value<int>(&args->expert.phy.pusch_max_its)->default_value(4),
bpo::value<int>(&args->expert.phy.pusch_max_its)->default_value(8),
"Maximum number of turbo decoder iterations")
("expert.pusch_8bit_decoder",
bpo::value<bool>(&args->expert.phy.pusch_8bit_decoder)->default_value(false),
"Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental)")
("expert.tx_amplitude",
bpo::value<float>(&args->expert.phy.tx_amplitude)->default_value(0.6),
"Transmit amplitude factor")

@ -41,9 +41,35 @@ using namespace std;
namespace srsenb {
void phch_common::set_nof_mutex(uint32_t nof_mutex_) {
nof_mutex = nof_mutex_;
assert(nof_mutex <= max_mutex);
phch_common::phch_common(uint32_t max_workers) : tx_sem(max_workers)
{
this->nof_workers = nof_workers;
params.max_prach_offset_us = 20;
radio = NULL;
mac = NULL;
is_first_tx = false;
is_first_of_burst = false;
pdsch_p_b = 0;
this->max_workers = max_workers;
bzero(&pusch_cfg, sizeof(pusch_cfg));
bzero(&hopping_cfg, sizeof(hopping_cfg));
bzero(&pucch_cfg, sizeof(pucch_cfg));
bzero(&ul_grants, sizeof(ul_grants));
for (uint32_t i=0;i<max_workers;i++) {
sem_init(&tx_sem[i], 0, 0); // All semaphores start blocked
}
}
phch_common::~phch_common() {
for (uint32_t i=0;i<max_workers;i++) {
sem_destroy(&tx_sem[i]);
}
}
void phch_common::set_nof_workers(uint32_t nof_workers)
{
this->nof_workers = nof_workers;
}
void phch_common::reset() {
@ -61,35 +87,42 @@ bool phch_common::init(srslte_cell_t *cell_, srslte::radio* radio_h_, mac_interf
is_first_of_burst = true;
is_first_tx = true;
for (uint32_t i=0;i<max_mutex;i++) {
pthread_mutex_init(&tx_mutex[i], NULL);
}
reset();
return true;
}
void phch_common::stop() {
for (uint32_t i=0;i<nof_mutex;i++) {
pthread_mutex_trylock(&tx_mutex[i]);
pthread_mutex_unlock(&tx_mutex[i]);
for (uint32_t i=0;i<max_workers;i++) {
sem_post(&tx_sem[i]);
}
}
void phch_common::worker_end(uint32_t tx_mutex_cnt, cf_t* buffer[SRSLTE_MAX_PORTS], uint32_t nof_samples, srslte_timestamp_t tx_time)
/* The transmission of UL subframes must be in sequence. The correct sequence is guaranteed by a chain of N semaphores,
* one per TTI%nof_workers. Each threads waits for the semaphore for the current thread and after transmission allows
* next TTI to be transmitted
*
* Each worker uses this function to indicate that all processing is done and data is ready for transmission or
* there is no transmission at all (tx_enable). In that case, the end of burst message will be sent to the radio
*/
void phch_common::worker_end(uint32_t tti, cf_t* buffer[SRSLTE_MAX_PORTS], uint32_t nof_samples, srslte_timestamp_t tx_time)
{
// Wait previous TTIs to be transmitted
// This variable is not protected but it is very unlikely that 2 threads arrive here simultaneously since at the beginning
// there is no workload and threads are separated by 1 ms
if (is_first_tx) {
is_first_tx = false;
} else {
pthread_mutex_lock(&tx_mutex[tx_mutex_cnt%nof_mutex]);
// Allow my own transmission if I'm the first to transmit
sem_post(&tx_sem[tti%nof_workers]);
}
radio->set_tti(tx_mutex_cnt);
// Wait for the green light to transmit in the current TTI
sem_wait(&tx_sem[tti%nof_workers]);
radio->set_tti(tti);
radio->tx((void **) buffer, nof_samples, tx_time);
// Trigger next transmission
pthread_mutex_unlock(&tx_mutex[(tx_mutex_cnt+1)%nof_mutex]);
// Allow next TTI to transmit
sem_post(&tx_sem[(tti+1)%nof_workers]);
// Trigger MAC clock
mac->tti_clock();

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save