Fixed bug in SSE turbo decoder

master
Ismael Gomez 7 years ago
parent 23179262c3
commit db17c67194

@ -74,7 +74,7 @@ SRSLTE_API int srslte_tdec_reset_cb(srslte_tdec_t * h,
SRSLTE_API int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h, SRSLTE_API int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h,
uint32_t cb_idx); uint32_t cb_idx);
SRSLTE_API int srslte_tdec_get_nof_parallel(srslte_tdec_t * h); SRSLTE_API uint32_t srslte_tdec_get_nof_parallel(srslte_tdec_t * h);
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h, SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
int16_t* input, int16_t* input,
@ -95,15 +95,15 @@ SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_iteration_par(srslte_tdec_t * h, SRSLTE_API void srslte_tdec_iteration_par(srslte_tdec_t * h,
int16_t* input[SRSLTE_TDEC_NPAR], int16_t* input[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_par(srslte_tdec_t * h, SRSLTE_API void srslte_tdec_decision_par(srslte_tdec_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_byte_par(srslte_tdec_t * h, SRSLTE_API void srslte_tdec_decision_byte_par(srslte_tdec_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h, SRSLTE_API void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h,
@ -112,8 +112,8 @@ SRSLTE_API void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API int srslte_tdec_run_all_par(srslte_tdec_t * h, SRSLTE_API int srslte_tdec_run_all_par(srslte_tdec_t * h,
int16_t * input[SRSLTE_TDEC_NPAR], int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations, uint32_t nof_iterations,
uint32_t long_cb); uint32_t long_cb);

@ -44,7 +44,7 @@
#include "srslte/phy/fec/cbsegm.h" #include "srslte/phy/fec/cbsegm.h"
// Define maximum number of CB decoded in parallel (2 for AVX2) // Define maximum number of CB decoded in parallel (2 for AVX2)
#define SRSLTE_TDEC_NPAR 2 #define SRSLTE_TDEC_MAX_NPAR 2
#define SRSLTE_TCOD_RATE 3 #define SRSLTE_TCOD_RATE 3
#define SRSLTE_TCOD_TOTALTAIL 12 #define SRSLTE_TCOD_TOTALTAIL 12
@ -65,18 +65,18 @@ typedef struct SRSLTE_API {
map_gen_t dec; map_gen_t dec;
int16_t *app1[SRSLTE_TDEC_NPAR]; int16_t *app1[SRSLTE_TDEC_MAX_NPAR];
int16_t *app2[SRSLTE_TDEC_NPAR]; int16_t *app2[SRSLTE_TDEC_MAX_NPAR];
int16_t *ext1[SRSLTE_TDEC_NPAR]; int16_t *ext1[SRSLTE_TDEC_MAX_NPAR];
int16_t *ext2[SRSLTE_TDEC_NPAR]; int16_t *ext2[SRSLTE_TDEC_MAX_NPAR];
int16_t *syst[SRSLTE_TDEC_NPAR]; int16_t *syst[SRSLTE_TDEC_MAX_NPAR];
int16_t *parity0[SRSLTE_TDEC_NPAR]; int16_t *parity0[SRSLTE_TDEC_MAX_NPAR];
int16_t *parity1[SRSLTE_TDEC_NPAR]; int16_t *parity1[SRSLTE_TDEC_MAX_NPAR];
int cb_mask; int cb_mask;
int current_cbidx; int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES]; srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter[SRSLTE_TDEC_NPAR]; int n_iter[SRSLTE_TDEC_MAX_NPAR];
} srslte_tdec_simd_t; } srslte_tdec_simd_t;
SRSLTE_API int srslte_tdec_simd_init(srslte_tdec_simd_t * h, SRSLTE_API int srslte_tdec_simd_init(srslte_tdec_simd_t * h,
@ -88,6 +88,8 @@ SRSLTE_API void srslte_tdec_simd_free(srslte_tdec_simd_t * h);
SRSLTE_API int srslte_tdec_simd_reset(srslte_tdec_simd_t * h, SRSLTE_API int srslte_tdec_simd_reset(srslte_tdec_simd_t * h,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API
SRSLTE_API int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h, SRSLTE_API int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h,
uint32_t cb_idx); uint32_t cb_idx);
@ -95,15 +97,15 @@ SRSLTE_API int srslte_tdec_simd_reset_cb(srslte_tdec_simd_t * h,
uint32_t cb_idx); uint32_t cb_idx);
SRSLTE_API void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, SRSLTE_API void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h,
int16_t * input[SRSLTE_TDEC_NPAR], int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, SRSLTE_API void srslte_tdec_simd_decision(srslte_tdec_simd_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, SRSLTE_API void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h, SRSLTE_API void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h,
@ -112,8 +114,8 @@ SRSLTE_API void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h, SRSLTE_API int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h,
int16_t * input[SRSLTE_TDEC_NPAR], int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations, uint32_t nof_iterations,
uint32_t long_cb); uint32_t long_cb);

@ -49,9 +49,9 @@
#include "srslte/phy/fec/cbsegm.h" #include "srslte/phy/fec/cbsegm.h"
#if LV_HAVE_AVX2 #if LV_HAVE_AVX2
#define SRSLTE_TDEC_NPAR 16 #define SRSLTE_TDEC_MAX_NPAR 16
#else #else
#define SRSLTE_TDEC_NPAR 8 #define SRSLTE_TDEC_MAX_NPAR 8
#endif #endif
@ -71,7 +71,7 @@ typedef struct SRSLTE_API {
int current_cbidx; int current_cbidx;
uint32_t current_long_cb; uint32_t current_long_cb;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES]; srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter[SRSLTE_TDEC_NPAR]; int n_iter[SRSLTE_TDEC_MAX_NPAR];
} srslte_tdec_simd_inter_t; } srslte_tdec_simd_inter_t;
SRSLTE_API int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h, SRSLTE_API int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h,
@ -90,17 +90,17 @@ SRSLTE_API int srslte_tdec_simd_inter_reset_cb(srslte_tdec_simd_inter_t * h,
uint32_t cb_idx); uint32_t cb_idx);
SRSLTE_API void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h, SRSLTE_API void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h,
int16_t * input[SRSLTE_TDEC_NPAR], int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_cb, uint32_t nof_cb,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h, SRSLTE_API void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_cb, uint32_t nof_cb,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h, SRSLTE_API void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_cb, uint32_t nof_cb,
uint32_t long_cb); uint32_t long_cb);
@ -110,8 +110,8 @@ SRSLTE_API void srslte_tdec_simd_inter_decision_byte_cb(srslte_tdec_simd_inter_t
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h, SRSLTE_API int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h,
int16_t *input[SRSLTE_TDEC_NPAR], int16_t *input[SRSLTE_TDEC_MAX_NPAR],
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations, uint32_t nof_iterations,
uint32_t nof_cb, uint32_t nof_cb,
uint32_t long_cb); uint32_t long_cb);

@ -117,7 +117,7 @@ int main(int argc, char **argv) {
float *llr; float *llr;
short *llr_s; short *llr_s;
uint8_t *llr_c; uint8_t *llr_c;
uint8_t *data_tx, *data_rx, *data_rx_bytes[SRSLTE_TDEC_NPAR], *symbols; uint8_t *data_tx, *data_rx, *data_rx_bytes[SRSLTE_TDEC_MAX_NPAR], *symbols;
uint32_t i, j; uint32_t i, j;
float var[SNR_POINTS]; float var[SNR_POINTS];
uint32_t snr_points; uint32_t snr_points;
@ -159,7 +159,7 @@ int main(int argc, char **argv) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
} }
for (int cb=0;cb<SRSLTE_TDEC_NPAR;cb++) { for (int cb=0;cb<SRSLTE_TDEC_MAX_NPAR;cb++) {
data_rx_bytes[cb] = srslte_vec_malloc(frame_length * sizeof(uint8_t)); data_rx_bytes[cb] = srslte_vec_malloc(frame_length * sizeof(uint8_t));
if (!data_rx_bytes[cb]) { if (!data_rx_bytes[cb]) {
perror("malloc"); perror("malloc");
@ -254,10 +254,10 @@ int main(int argc, char **argv) {
t = nof_iterations; t = nof_iterations;
} }
int16_t *input[SRSLTE_TDEC_NPAR]; int16_t *input[SRSLTE_TDEC_MAX_NPAR];
uint8_t *output[SRSLTE_TDEC_NPAR]; uint8_t *output[SRSLTE_TDEC_MAX_NPAR];
for (int n=0;n<SRSLTE_TDEC_NPAR;n++) { for (int n=0;n<SRSLTE_TDEC_MAX_NPAR;n++) {
if (n < nof_cb) { if (n < nof_cb) {
input[n] = llr_s; input[n] = llr_s;
} else { } else {

@ -43,7 +43,7 @@
int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) { int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
return srslte_tdec_simd_init(&h->tdec_simd, SRSLTE_TDEC_NPAR, max_long_cb); return srslte_tdec_simd_init(&h->tdec_simd, SRSLTE_TDEC_MAX_NPAR, max_long_cb);
#else #else
h->input_conv = srslte_vec_malloc(sizeof(float) * (3*max_long_cb+12)); h->input_conv = srslte_vec_malloc(sizeof(float) * (3*max_long_cb+12));
if (!h->input_conv) { if (!h->input_conv) {
@ -91,7 +91,7 @@ int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h, uint32_t cb_idx)
#endif #endif
} }
void srslte_tdec_iteration_par(srslte_tdec_t * h, int16_t* input[SRSLTE_TDEC_NPAR], uint32_t long_cb) { void srslte_tdec_iteration_par(srslte_tdec_t * h, int16_t* input[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
srslte_tdec_simd_iteration(&h->tdec_simd, input, long_cb); srslte_tdec_simd_iteration(&h->tdec_simd, input, long_cb);
#else #else
@ -101,12 +101,12 @@ void srslte_tdec_iteration_par(srslte_tdec_t * h, int16_t* input[SRSLTE_TDEC_NPA
} }
void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) { void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) {
int16_t *input_par[SRSLTE_TDEC_NPAR]; int16_t *input_par[SRSLTE_TDEC_MAX_NPAR];
input_par[0] = input; input_par[0] = input;
return srslte_tdec_iteration_par(h, input_par, long_cb); return srslte_tdec_iteration_par(h, input_par, long_cb);
} }
void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) { void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
return srslte_tdec_simd_decision(&h->tdec_simd, output, long_cb); return srslte_tdec_simd_decision(&h->tdec_simd, output, long_cb);
#else #else
@ -114,13 +114,21 @@ void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPA
#endif #endif
} }
uint32_t srslte_tdec_get_nof_parallel(srslte_tdec_t *h) {
#ifdef LV_HAVE_AVX2
return 2;
#else
return 1;
#endif
}
void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) { void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
uint8_t *output_par[SRSLTE_TDEC_NPAR]; uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR];
output_par[0] = output; output_par[0] = output;
srslte_tdec_decision_par(h, output_par, long_cb); srslte_tdec_decision_par(h, output_par, long_cb);
} }
void srslte_tdec_decision_byte_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) { void srslte_tdec_decision_byte_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
srslte_tdec_simd_decision_byte(&h->tdec_simd, output, long_cb); srslte_tdec_simd_decision_byte(&h->tdec_simd, output, long_cb);
#else #else
@ -137,13 +145,13 @@ void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h, uint8_t *output, uint32
} }
void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) { void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
uint8_t *output_par[SRSLTE_TDEC_NPAR]; uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR];
output_par[0] = output; output_par[0] = output;
srslte_tdec_decision_byte_par(h, output_par, long_cb); srslte_tdec_decision_byte_par(h, output_par, long_cb);
} }
int srslte_tdec_run_all_par(srslte_tdec_t * h, int16_t * input[SRSLTE_TDEC_NPAR], int srslte_tdec_run_all_par(srslte_tdec_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR],
uint8_t *output[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations, uint32_t long_cb) { uint32_t nof_iterations, uint32_t long_cb) {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
return srslte_tdec_simd_run_all(&h->tdec_simd, input, output, nof_iterations, long_cb); return srslte_tdec_simd_run_all(&h->tdec_simd, input, output, nof_iterations, long_cb);
@ -155,9 +163,9 @@ int srslte_tdec_run_all_par(srslte_tdec_t * h, int16_t * input[SRSLTE_TDEC_NPAR]
int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb) int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb)
{ {
uint8_t *output_par[SRSLTE_TDEC_NPAR]; uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR];
output_par[0] = output; output_par[0] = output;
int16_t *input_par[SRSLTE_TDEC_NPAR]; int16_t *input_par[SRSLTE_TDEC_MAX_NPAR];
input_par[0] = input; input_par[0] = input;
return srslte_tdec_run_all_par(h, input_par, output_par, nof_iterations, long_cb); return srslte_tdec_run_all_par(h, input_par, output_par, nof_iterations, long_cb);

@ -81,7 +81,7 @@ static inline int16_t hMax1(__m256i masked_value)
} }
/* Computes beta values */ /* Computes beta values */
void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_NPAR], uint32_t long_cb) void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
{ {
int k; int k;
uint32_t end = long_cb + 3; uint32_t end = long_cb + 3;

@ -54,13 +54,13 @@ void map_sse_alpha(map_gen_t * s, uint32_t long_cb);
void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb); void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb);
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_NPAR], uint32_t long_cb); void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb);
void map_avx_alpha(map_gen_t * s, uint32_t long_cb); void map_avx_alpha(map_gen_t * s, uint32_t long_cb);
void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb); void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb);
#endif #endif
void map_simd_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) void map_simd_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
{ {
if (nof_cb == 1) { if (nof_cb == 1) {
map_sse_beta(s, output[0], long_cb); map_sse_beta(s, output[0], long_cb);
@ -128,12 +128,12 @@ void map_simd_free(map_gen_t * h)
} }
/* Runs one instance of a decoder */ /* Runs one instance of a decoder */
void map_simd_dec(map_gen_t * h, int16_t * input[SRSLTE_TDEC_NPAR], int16_t *app[SRSLTE_TDEC_NPAR], int16_t * parity[SRSLTE_TDEC_NPAR], void map_simd_dec(map_gen_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], int16_t *app[SRSLTE_TDEC_MAX_NPAR], int16_t * parity[SRSLTE_TDEC_MAX_NPAR],
int16_t *output[SRSLTE_TDEC_NPAR], uint32_t cb_mask, uint32_t long_cb) int16_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t cb_mask, uint32_t long_cb)
{ {
uint32_t nof_cb = 1; uint32_t nof_cb = 1;
int16_t *outptr[SRSLTE_TDEC_NPAR]; int16_t *outptr[SRSLTE_TDEC_MAX_NPAR];
// Compute branch metrics // Compute branch metrics
switch(cb_mask) { switch(cb_mask) {
@ -354,21 +354,21 @@ void deinterleave_input_simd(srslte_tdec_simd_t *h, int16_t *input, uint32_t cbi
} }
/* Runs 1 turbo decoder iteration */ /* Runs 1 turbo decoder iteration */
void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_NPAR], uint32_t long_cb) void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
{ {
int16_t *tmp_app[SRSLTE_TDEC_NPAR]; int16_t *tmp_app[SRSLTE_TDEC_MAX_NPAR];
if (h->current_cbidx >= 0) { if (h->current_cbidx >= 0) {
uint16_t *inter = h->interleaver[h->current_cbidx].forward; uint16_t *inter = h->interleaver[h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse; uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
#if SRSLTE_TDEC_NPAR == 2 #ifndef LV_HAVE_AVX2
h->cb_mask = (input[0]?1:0) | (input[1]?2:0); input[1] = NULL;
#else
h->cb_mask = input[0]?1:0;
#endif #endif
h->cb_mask = (input[0]?1:0) | (input[1]?2:0);
for (int i=0;i<h->max_par_cb;i++) { for (int i=0;i<h->max_par_cb;i++) {
if (h->n_iter[i] == 0 && input[i]) { if (h->n_iter[i] == 0 && input[i]) {
//printf("deinterleaveing %d\n",i); //printf("deinterleaveing %d\n",i);
@ -484,7 +484,7 @@ void tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx,
} }
} }
void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
{ {
for (int i=0;i<h->max_par_cb;i++) { for (int i=0;i<h->max_par_cb;i++) {
tdec_simd_decision(h, output[i], i, long_cb); tdec_simd_decision(h, output[i], i, long_cb);
@ -510,7 +510,7 @@ void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h, uint8_t *output,
} }
} }
void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
{ {
for (int i=0;i<h->max_par_cb;i++) { for (int i=0;i<h->max_par_cb;i++) {
srslte_tdec_simd_decision_byte_cb(h, output[i], i, long_cb); srslte_tdec_simd_decision_byte_cb(h, output[i], i, long_cb);
@ -519,7 +519,7 @@ void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSL
/* Runs nof_iterations iterations and decides the output bits */ /* Runs nof_iterations iterations and decides the output bits */
int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_NPAR], int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations, uint32_t long_cb) uint32_t nof_iterations, uint32_t long_cb)
{ {
if (srslte_tdec_simd_reset(h, long_cb)) { if (srslte_tdec_simd_reset(h, long_cb)) {

@ -172,7 +172,7 @@ void extract_input(srslte_tdec_simd_inter_t *h, int16_t *input, uint32_t cbidx,
} }
} }
void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h, int16_t *input[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h, int16_t *input[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
{ {
if (h->current_cbidx >= 0) { if (h->current_cbidx >= 0) {
@ -239,7 +239,7 @@ void srslte_tdec_simd_inter_decision_cb(srslte_tdec_simd_inter_t * h, uint8_t *o
} }
} }
void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
{ {
for (int i=0;i<nof_cb;i++) { for (int i=0;i<nof_cb;i++) {
srslte_tdec_simd_inter_decision_cb(h, output[i], i, long_cb); srslte_tdec_simd_inter_decision_cb(h, output[i], i, long_cb);
@ -269,7 +269,7 @@ void srslte_tdec_simd_inter_decision_byte_cb(srslte_tdec_simd_inter_t * h, uint8
} }
} }
void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
{ {
for (int i=0;i<nof_cb;i++) { for (int i=0;i<nof_cb;i++) {
srslte_tdec_simd_inter_decision_byte_cb(h, output[i], i, long_cb); srslte_tdec_simd_inter_decision_byte_cb(h, output[i], i, long_cb);
@ -277,7 +277,7 @@ void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h, uint8_t
} }
int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h, int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h,
int16_t *input[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_NPAR], int16_t *input[SRSLTE_TDEC_MAX_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
uint32_t nof_iterations, uint32_t nof_cb, uint32_t long_cb) uint32_t nof_iterations, uint32_t nof_cb, uint32_t long_cb)
{ {
uint32_t iter = 0; uint32_t iter = 0;

@ -600,6 +600,8 @@ static int srslte_pdsch_codeword_decode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *c
/* Bit scrambling */ /* Bit scrambling */
srslte_scrambling_s_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits); srslte_scrambling_s_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits);
printf("e: "); srslte_vec_fprint_s(stdout, q->e[codeword_idx], 10);
/* Return */ /* Return */
ret = srslte_dlsch_decode2(&q->dl_sch, cfg, softbuffer, q->e[codeword_idx], data, codeword_idx); ret = srslte_dlsch_decode2(&q->dl_sch, cfg, softbuffer, q->e[codeword_idx], data, codeword_idx);
@ -685,7 +687,6 @@ int srslte_pdsch_decode(srslte_pdsch_t *q,
srslte_layerdemap_type(x, q->d, cfg->nof_layers, nof_tb, srslte_layerdemap_type(x, q->d, cfg->nof_layers, nof_tb,
nof_symbols[0], nof_symbols, cfg->mimo_type); nof_symbols[0], nof_symbols, cfg->mimo_type);
} }
// Codeword decoding // Codeword decoding
for (uint32_t tb = 0; tb < SRSLTE_MAX_CODEWORDS; tb ++) { for (uint32_t tb = 0; tb < SRSLTE_MAX_CODEWORDS; tb ++) {
/* Decode only if transport block is enabled and the default ACK is not true */ /* Decode only if transport block is enabled and the default ACK is not true */

@ -310,8 +310,8 @@ bool decode_tb_cb(srslte_sch_t *q,
bool cb_map[SRSLTE_MAX_CODEBLOCKS]; bool cb_map[SRSLTE_MAX_CODEBLOCKS];
uint32_t cb_idx[SRSLTE_TDEC_NPAR]; uint32_t cb_idx[SRSLTE_TDEC_MAX_NPAR];
int16_t *decoder_input[SRSLTE_TDEC_NPAR]; int16_t *decoder_input[SRSLTE_TDEC_MAX_NPAR];
uint32_t nof_cb = cb_size_group?cb_segm->C2:cb_segm->C1; uint32_t nof_cb = cb_size_group?cb_segm->C2:cb_segm->C1;
uint32_t first_cb = cb_size_group?cb_segm->C1:0; uint32_t first_cb = cb_size_group?cb_segm->C1:0;
@ -328,7 +328,7 @@ bool decode_tb_cb(srslte_sch_t *q,
return false; return false;
} }
for (int i=0;i<SRSLTE_TDEC_NPAR;i++) { for (int i=0;i<srslte_tdec_get_nof_parallel(&q->decoder);i++) {
cb_idx[i] = i+first_cb; cb_idx[i] = i+first_cb;
decoder_input[i] = NULL; decoder_input[i] = NULL;
} }
@ -346,7 +346,7 @@ bool decode_tb_cb(srslte_sch_t *q,
while(remaining_cb>0) { while(remaining_cb>0) {
// Unratematch the codeblocks left to decode // Unratematch the codeblocks left to decode
for (int i=0;i<SRSLTE_TDEC_NPAR;i++) { for (int i=0;i<srslte_tdec_get_nof_parallel(&q->decoder);i++) {
if (!decoder_input[i] && remaining_cb > 0) { if (!decoder_input[i] && remaining_cb > 0) {
// Find an unprocessed CB // Find an unprocessed CB
@ -372,6 +372,9 @@ bool decode_tb_cb(srslte_sch_t *q,
} }
decoder_input[i] = softbuffer->buffer_f[cb_idx[i]]; decoder_input[i] = softbuffer->buffer_f[cb_idx[i]];
printf("input: "); srslte_vec_fprint_s(stdout, decoder_input[i], 10);
} }
} }
} }
@ -380,7 +383,7 @@ bool decode_tb_cb(srslte_sch_t *q,
srslte_tdec_iteration_par(&q->decoder, decoder_input, cb_len); srslte_tdec_iteration_par(&q->decoder, decoder_input, cb_len);
// Decide output bits and compute CRC // Decide output bits and compute CRC
for (int i=0;i<SRSLTE_TDEC_NPAR;i++) { for (int i=0;i<srslte_tdec_get_nof_parallel(&q->decoder);i++) {
if (decoder_input[i]) { if (decoder_input[i]) {
srslte_tdec_decision_byte_par_cb(&q->decoder, q->cb_in, i, cb_len); srslte_tdec_decision_byte_par_cb(&q->decoder, q->cb_in, i, cb_len);
@ -395,6 +398,8 @@ bool decode_tb_cb(srslte_sch_t *q,
crc_ptr = &q->crc_tb; crc_ptr = &q->crc_tb;
} }
printf("output: %d", i); srslte_vec_fprint_b(stdout, q->cb_in, 10);
// CRC is OK // CRC is OK
if (!srslte_crc_checksum_byte(crc_ptr, q->cb_in, len_crc)) { if (!srslte_crc_checksum_byte(crc_ptr, q->cb_in, len_crc)) {
@ -474,7 +479,7 @@ static int decode_tb(srslte_sch_t *q,
data[cb_segm->tbs/8+1] = 0; data[cb_segm->tbs/8+1] = 0;
data[cb_segm->tbs/8+2] = 0; data[cb_segm->tbs/8+2] = 0;
// Process Codeblocks in groups of equal CB size to parallelize according to SRSLTE_TDEC_NPAR // Process Codeblocks in groups of equal CB size to parallelize according to SRSLTE_TDEC_MAX_NPAR
for (uint32_t i=0;i<nof_cb_groups && crc_ok;i++) { for (uint32_t i=0;i<nof_cb_groups && crc_ok;i++) {
crc_ok = decode_tb_cb(q, softbuffer, cb_segm, Qm, rv, nof_e_bits, e_bits, data, i); crc_ok = decode_tb_cb(q, softbuffer, cb_segm, Qm, rv, nof_e_bits, e_bits, data, i);
} }

@ -190,7 +190,7 @@ int main(int argc, char **argv) {
bzero(&uci_data_tx, sizeof(srslte_uci_data_t)); bzero(&uci_data_tx, sizeof(srslte_uci_data_t));
uci_data_tx.uci_cqi_len = 4; uci_data_tx.uci_cqi_len = 4;
uci_data_tx.uci_ri_len = 0; uci_data_tx.uci_ri_len = 0;
uci_data_tx.uci_ack_len = 2; uci_data_tx.uci_ack_len = 1;
memcpy(&uci_data_rx, &uci_data_tx, sizeof(srslte_uci_data_t)); memcpy(&uci_data_rx, &uci_data_tx, sizeof(srslte_uci_data_t));
for (uint32_t i=0;i<20;i++) { for (uint32_t i=0;i<20;i++) {

Loading…
Cancel
Save