Turbo decoder working OK @ 100 Mbps

master
ismagom 9 years ago
parent bb56d4895d
commit ad06998d91

@ -84,7 +84,7 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
ENDIF(CMAKE_COMPILER_IS_GNUCXX) ENDIF(CMAKE_COMPILER_IS_GNUCXX)
IF(CMAKE_COMPILER_IS_GNUCC) IF(CMAKE_COMPILER_IS_GNUCC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-write-strings -Wno-format-extra-args -Winline -Wno-unused-result -Wno-format -std=c99 -D_GNU_SOURCE -g") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-write-strings -Wno-format-extra-args -Winline -Wno-unused-result -Wno-format -std=c99 -D_GNU_SOURCE -g -mfpmath=sse -mavx -O3")
# IF(${CMAKE_BUILD_TYPE} STREQUAL "Debug") # IF(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror -Wno-error=implicit-function-declaration -Wno-error=unused-but-set-variable") # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror -Wno-error=implicit-function-declaration -Wno-error=unused-but-set-variable")
# ENDIF(${CMAKE_BUILD_TYPE} STREQUAL "Debug") # ENDIF(${CMAKE_BUILD_TYPE} STREQUAL "Debug")

@ -40,8 +40,8 @@
#include <stdint.h> #include <stdint.h>
typedef struct SRSLTE_API { typedef struct SRSLTE_API {
uint32_t *forward; uint16_t *forward;
uint32_t *reverse; uint16_t *reverse;
uint32_t max_long_cb; uint32_t max_long_cb;
} srslte_tc_interl_t; } srslte_tc_interl_t;

@ -50,11 +50,12 @@
#define SRSLTE_TCOD_MAX_LEN_CB 6144 #define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL) #define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef float srslte_llr_t; typedef short llr_t;
typedef struct SRSLTE_API { typedef struct SRSLTE_API {
int max_long_cb; int max_long_cb;
srslte_llr_t *beta; llr_t *alpha;
llr_t *branch;
} srslte_map_gen_t; } srslte_map_gen_t;
typedef struct SRSLTE_API { typedef struct SRSLTE_API {
@ -62,14 +63,17 @@ typedef struct SRSLTE_API {
srslte_map_gen_t dec; srslte_map_gen_t dec;
srslte_llr_t *llr1; llr_t *app1;
srslte_llr_t *llr2; llr_t *app2;
srslte_llr_t *w; llr_t *ext1;
srslte_llr_t *syst; llr_t *ext2;
srslte_llr_t *parity; llr_t *syst;
llr_t *parity0;
llr_t *parity1;
int current_cbidx; int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES]; srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter;
} srslte_tdec_t; } srslte_tdec_t;
SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h, SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h,
@ -80,7 +84,7 @@ SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h);
SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb); SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb);
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h, SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
srslte_llr_t * input, float * input,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h, SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h,
@ -92,7 +96,7 @@ SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h, SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
srslte_llr_t * input, float * input,
uint8_t *output, uint8_t *output,
uint32_t nof_iterations, uint32_t nof_iterations,
uint32_t long_cb); uint32_t long_cb);

@ -0,0 +1,100 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 The srsLTE Developers. See the
* COPYRIGHT file at the top-level directory of this distribution.
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/**********************************************************************************************
* File: turbodecoder.h
*
* Description: Turbo Decoder.
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
* encoders and one turbo code internal interleaver. The coding rate of turbo
* encoder is 1/3.
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
*
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef TURBODECODER_VL_
#define TURBODECODER_VL_
#include "srslte/config.h"
#include "srslte/fec/tc_interl.h"
#include "srslte/fec/cbsegm.h"
#define SRSLTE_TCOD_RATE 3
#define SRSLTE_TCOD_TOTALTAIL 12
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef float srslte_llr_t;
typedef struct SRSLTE_API {
int max_long_cb;
srslte_llr_t *beta;
} srslte_map_gen_vl_t;
typedef struct SRSLTE_API {
int max_long_cb;
srslte_map_gen_vl_t dec;
srslte_llr_t *llr1;
srslte_llr_t *llr2;
srslte_llr_t *w;
srslte_llr_t *syst;
srslte_llr_t *parity;
int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
} srslte_tdec_vl_t;
SRSLTE_API int srslte_tdec_vl_init(srslte_tdec_vl_t * h,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_vl_free(srslte_tdec_vl_t * h);
SRSLTE_API int srslte_tdec_vl_reset(srslte_tdec_vl_t * h, uint32_t long_cb);
SRSLTE_API void srslte_tdec_vl_iteration(srslte_tdec_vl_t * h,
srslte_llr_t * input,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_vl_decision(srslte_tdec_vl_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_vl_decision_byte(srslte_tdec_vl_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_vl_run_all(srslte_tdec_vl_t * h,
srslte_llr_t * input,
uint8_t *output,
uint32_t nof_iterations,
uint32_t long_cb);
#endif

@ -69,6 +69,7 @@ SRSLTE_API void srslte_vec_fprint_f(FILE *stream, float *x, uint32_t len);
SRSLTE_API void srslte_vec_fprint_b(FILE *stream, uint8_t *x, uint32_t len); SRSLTE_API void srslte_vec_fprint_b(FILE *stream, uint8_t *x, uint32_t len);
SRSLTE_API void srslte_vec_fprint_byte(FILE *stream, uint8_t *x, uint32_t len); SRSLTE_API void srslte_vec_fprint_byte(FILE *stream, uint8_t *x, uint32_t len);
SRSLTE_API void srslte_vec_fprint_i(FILE *stream, int *x, uint32_t len); SRSLTE_API void srslte_vec_fprint_i(FILE *stream, int *x, uint32_t len);
SRSLTE_API void srslte_vec_fprint_s(FILE *stream, short *x, uint32_t len);
SRSLTE_API void srslte_vec_fprint_hex(FILE *stream, uint8_t *x, uint32_t len); SRSLTE_API void srslte_vec_fprint_hex(FILE *stream, uint8_t *x, uint32_t len);
/* Saves/loads a vector to a file */ /* Saves/loads a vector to a file */
@ -79,6 +80,8 @@ SRSLTE_API void srslte_vec_load_file(char *filename, void *buffer, uint32_t len)
SRSLTE_API void srslte_vec_sum_ch(uint8_t *x, uint8_t *y, char *z, uint32_t len); SRSLTE_API void srslte_vec_sum_ch(uint8_t *x, uint8_t *y, char *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sum_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len);
/* substract two vectors z=x-y */ /* substract two vectors z=x-y */
SRSLTE_API void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len);
@ -99,12 +102,18 @@ SRSLTE_API void srslte_vec_sc_add_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss(short *x, int pow2_div, short *z, uint32_t len);
/* Normalization */ /* Normalization */
SRSLTE_API void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len); SRSLTE_API void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len);
SRSLTE_API void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len); SRSLTE_API void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len);
SRSLTE_API void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len);
SRSLTE_API void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len); SRSLTE_API void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len);
SRSLTE_API void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len); SRSLTE_API void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len);

@ -0,0 +1,50 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 The srsLTE Developers. See the
* COPYRIGHT file at the top-level directory of this distribution.
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#ifndef VECTORSIMD_
#define VECTORSIMD_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include <stdint.h>
#include "srslte/config.h"
SRSLTE_API void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len);
#ifdef __cplusplus
}
#endif
#endif

@ -89,7 +89,7 @@ int srslte_tc_interl_UMTS_gen(srslte_tc_interl_t *h, uint32_t long_cb) {
uint32_t i, j; uint32_t i, j;
uint32_t res, prim, aux; uint32_t res, prim, aux;
uint32_t kp, k; uint32_t kp, k;
uint32_t *per, *desper; uint16_t *per, *desper;
uint8_t v; uint8_t v;
uint16_t p; uint16_t p;
uint16_t s[MAX_COLS], q[MAX_ROWS], r[MAX_ROWS], T[MAX_ROWS]; uint16_t s[MAX_COLS], q[MAX_ROWS], r[MAX_ROWS], T[MAX_ROWS];

@ -35,139 +35,272 @@
#include "srslte/fec/turbodecoder.h" #include "srslte/fec/turbodecoder.h"
#include "srslte/utils/vector.h" #include "srslte/utils/vector.h"
#include <inttypes.h>
#include <emmintrin.h>
#include <immintrin.h>
#define NUMSTATES 8 #define NUMSTATES 8
#define NINPUTS 2 #define NINPUTS 2
#define TAIL 3 #define TAIL 3
#define TOTALTAIL 12 #define TOTALTAIL 12
#define INF 9e4 #define INF 10000
#define ZERO 9e-4 #define ZERO 0
#define SCALE 100
/************************************************ static void print128_num(__m128i var)
*
* MAP_GEN is the MAX-LOG-MAP generic implementation of the
* Decoder
*
************************************************/
void srslte_map_gen_beta(srslte_map_gen_t * s, srslte_llr_t * input, srslte_llr_t * parity,
uint32_t long_cb)
{ {
srslte_llr_t m_b[8], new[8], old[8]; int16_t *val = (int16_t*) &var;//can also use uint32_t instead of 16_t
srslte_llr_t x, y, xy; printf("[%d %d %d %d %d %d %d %d]\n",
int k; val[0], val[1], val[2], val[3], val[4], val[5],
uint32_t end = long_cb + SRSLTE_TCOD_RATE; val[6], val[7]);
srslte_llr_t *beta = s->beta; }
uint32_t i;
for (i = 0; i < 8; i++) {
old[i] = beta[8 * (end) + i];
}
for (k = end - 1; k >= 0; k--) { void print128f_num(__m128 var)
x = input[k]; {
y = parity[k]; float *val = (float*) &var;
printf("[%f %f %f %f]\n",
val[0], val[1], val[2], val[3]);
}
xy = x + y;
m_b[0] = old[4] + xy; /************************************************
m_b[1] = old[4]; *
m_b[2] = old[5] + y; * MAP_GEN is the MAX-LOG-MAP generic implementation
m_b[3] = old[5] + x; *
m_b[4] = old[6] + x; ************************************************/
m_b[5] = old[6] + y;
m_b[6] = old[7];
m_b[7] = old[7] + xy;
new[0] = old[0]; static inline int16_t hMax(__m128i buffer)
new[1] = old[0] + xy; {
new[2] = old[1] + x; __m128i tmp1 = _mm_sub_epi8(_mm_set1_epi16(0x7FFF), buffer);
new[3] = old[1] + y; __m128i tmp3 = _mm_minpos_epu16(tmp1);
new[4] = old[2] + y; return (int16_t)(_mm_cvtsi128_si32(tmp3));
new[5] = old[2] + x; }
new[6] = old[3] + xy;
new[7] = old[3];
for (i = 0; i < 8; i++) { void srslte_map_gen_beta(srslte_map_gen_t * s, llr_t * output, uint32_t long_cb)
if (m_b[i] > new[i]) {
new[i] = m_b[i]; int k;
beta[8 * k + i] = new[i]; uint32_t end = long_cb + 3;
old[i] = new[i]; const __m128i *alphaPtr = (const __m128i*) s->alpha;
}
__m128i beta_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
__m128i g, bp, bn, alpha_k;
__m128i shuf_bp = _mm_set_epi8(
15, 14, // 7
7, 6, // 3
5, 4, // 2
13, 12, // 6
11, 10, // 5
3, 2, // 1
1, 0, // 0
9, 8 // 4
);
__m128i shuf_bn = _mm_set_epi8(
7, 6, // 3
15, 14, // 7
13, 12, // 6
5, 4, // 2
3, 2, // 1
11, 10, // 5
9, 8, // 4
1, 0 // 0
);
alphaPtr += long_cb-1;
__m128i shuf_g[4];
shuf_g[3] = _mm_set_epi8(3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2);
shuf_g[2] = _mm_set_epi8(7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6);
shuf_g[1] = _mm_set_epi8(11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10);
shuf_g[0] = _mm_set_epi8(15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14);
__m128i gv;
llr_t *b = &s->branch[2*long_cb-8];
__m128i *gPtr = (__m128i*) b;
__m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
#define BETA_STEP(g) bp = _mm_add_epi16(beta_k, g);\
bn = _mm_sub_epi16(beta_k, g);\
bp = _mm_shuffle_epi8(bp, shuf_bp);\
bn = _mm_shuffle_epi8(bn, shuf_bn);\
beta_k = _mm_max_epi16(bp, bn);
#define BETA_STEP_CNT(c,d) g = _mm_shuffle_epi8(gv, shuf_g[c]);\
BETA_STEP(g)\
alpha_k = _mm_load_si128(alphaPtr);\
alphaPtr--;\
bp = _mm_add_epi16(bp, alpha_k);\
bn = _mm_add_epi16(bn, alpha_k); output[k-d] = hMax(bn) - hMax(bp);
for (k=end-1; k>=long_cb; k--) {
llr_t g0 = s->branch[2*k];
llr_t g1 = s->branch[2*k+1];
g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1);
BETA_STEP(g);
}
for (; k >= 0; k-=8) {
gv = _mm_load_si128(gPtr);
gPtr--;
BETA_STEP_CNT(0,0);
BETA_STEP_CNT(1,1);
BETA_STEP_CNT(2,2);
BETA_STEP_CNT(3,3);
gv = _mm_load_si128(gPtr);
gPtr--;
BETA_STEP_CNT(0,4);
BETA_STEP_CNT(1,5);
BETA_STEP_CNT(2,6);
BETA_STEP_CNT(3,7);
__m128i norm = _mm_shuffle_epi8(beta_k, shuf_norm);
beta_k = _mm_sub_epi16(beta_k, norm);
} }
} }
void srslte_map_gen_alpha(srslte_map_gen_t * s, srslte_llr_t * input, srslte_llr_t * parity, srslte_llr_t * output, void srslte_map_gen_alpha(srslte_map_gen_t * s, uint32_t long_cb)
uint32_t long_cb)
{ {
srslte_llr_t m_b[8], new[8], old[8], max1[8], max0[8];
srslte_llr_t m1, m0;
srslte_llr_t x, y, xy;
srslte_llr_t out;
uint32_t k; uint32_t k;
uint32_t end = long_cb; llr_t *alpha = s->alpha;
srslte_llr_t *beta = s->beta;
uint32_t i; uint32_t i;
old[0] = 0; alpha[0] = 0;
for (i = 1; i < 8; i++) { for (i = 1; i < 8; i++) {
old[i] = -INF; alpha[i] = -INF;
}
__m128i shuf_ap = _mm_set_epi8(
15, 14, // 7
9, 8, // 4
7, 6, // 3
1, 0, // 0
13, 12, // 6
11, 10, // 5
5, 4, // 2
3, 2 // 1
);
__m128i shuf_an = _mm_set_epi8(
13, 12, // 6
11, 10, // 5
5, 4, // 2
3, 2, // 1
15, 14, // 7
9, 8, // 4
7, 6, // 3
1, 0 // 0
);
__m128i shuf_g[4];
shuf_g[0] = _mm_set_epi8(3,2,3,2,1,0,1,0,1,0,1,0,3,2,3,2);
shuf_g[1] = _mm_set_epi8(7,6,7,6,5,4,5,4,5,4,5,4,7,6,7,6);
shuf_g[2] = _mm_set_epi8(11,10,11,10,9,8,9,8,9,8,9,8,11,10,11,10);
shuf_g[3] = _mm_set_epi8(15,14,15,14,13,12,13,12,13,12,13,12,15,14,15,14);
__m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
__m128i* alphaPtr = (__m128i*) alpha;
alphaPtr++;
__m128i gv;
__m128i *gPtr = (__m128i*) s->branch;
__m128i g, ap, an;
__m128i alpha_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
#define ALPHA_STEP(c) g = _mm_shuffle_epi8(gv, shuf_g[c]); \
ap = _mm_add_epi16(alpha_k, g);\
an = _mm_sub_epi16(alpha_k, g);\
ap = _mm_shuffle_epi8(ap, shuf_ap);\
an = _mm_shuffle_epi8(an, shuf_an);\
alpha_k = _mm_max_epi16(ap, an);\
_mm_store_si128(alphaPtr, alpha_k);\
alphaPtr++; \
for (k = 0; k < long_cb/8; k++) {
gv = _mm_load_si128(gPtr);
gPtr++;
ALPHA_STEP(0);
ALPHA_STEP(1);
ALPHA_STEP(2);
ALPHA_STEP(3);
gv = _mm_load_si128(gPtr);
gPtr++;
ALPHA_STEP(0);
ALPHA_STEP(1);
ALPHA_STEP(2);
ALPHA_STEP(3);
__m128i norm = _mm_shuffle_epi8(alpha_k, shuf_norm);
alpha_k = _mm_sub_epi16(alpha_k, norm);
} }
}
for (k = 1; k < end + 1; k++) { void srslte_map_gen_gamma(srslte_map_gen_t * h, llr_t *input, llr_t *app, llr_t *parity, uint32_t long_cb)
x = input[k - 1]; {
y = parity[k - 1]; __m128i res10, res20, res11, res21, res1, res2;
__m128i in, ap, pa, g1, g0;
xy = x + y; __m128i *inPtr = (__m128i*) input;
__m128i *appPtr = (__m128i*) app;
__m128i *paPtr = (__m128i*) parity;
__m128i *resPtr = (__m128i*) h->branch;
m_b[0] = old[0]; __m128i res10_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
m_b[1] = old[3] + y; __m128i res20_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
m_b[2] = old[4] + y; __m128i res11_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
m_b[3] = old[7]; __m128i res21_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
m_b[4] = old[1];
m_b[5] = old[2] + y;
m_b[6] = old[5] + y;
m_b[7] = old[6];
new[0] = old[1] + xy; for (int i=0;i<long_cb/8;i++) {
new[1] = old[2] + x; in = _mm_load_si128(inPtr);
new[2] = old[5] + x; inPtr++;
new[3] = old[6] + xy; pa = _mm_load_si128(paPtr);
new[4] = old[0] + xy; paPtr++;
new[5] = old[3] + x;
new[6] = old[4] + x;
new[7] = old[7] + xy;
for (i = 0; i < 8; i++) { if (appPtr) {
max0[i] = m_b[i] + beta[8 * k + i]; ap = _mm_load_si128(appPtr);
max1[i] = new[i] + beta[8 * k + i]; appPtr++;
in = _mm_add_epi16(ap, in);
} }
m1 = max1[0]; g1 = _mm_add_epi16(in, pa);
m0 = max0[0]; g0 = _mm_sub_epi16(in, pa);
for (i = 1; i < 8; i++) { g1 = _mm_srai_epi16(g1, 1);
if (max1[i] > m1) g0 = _mm_srai_epi16(g0, 1);
m1 = max1[i];
if (max0[i] > m0) res10 = _mm_shuffle_epi8(g0, res10_mask);
m0 = max0[i]; res20 = _mm_shuffle_epi8(g0, res20_mask);
} res11 = _mm_shuffle_epi8(g1, res11_mask);
res21 = _mm_shuffle_epi8(g1, res21_mask);
for (i = 0; i < 8; i++) { res1 = _mm_or_si128(res10, res11);
if (m_b[i] > new[i]) res2 = _mm_or_si128(res20, res21);
new[i] = m_b[i];
old[i] = new[i]; _mm_store_si128(resPtr, res1);
resPtr++;
_mm_store_si128(resPtr, res2);
resPtr++;
} }
out = m1 - m0; for (int i=long_cb;i<long_cb+3;i++) {
output[k - 1] = out; h->branch[2*i] = (input[i] - parity[i])/2;
h->branch[2*i+1] = (input[i] + parity[i])/2;
} }
} }
int srslte_map_gen_init(srslte_map_gen_t * h, int max_long_cb) int srslte_map_gen_init(srslte_map_gen_t * h, int max_long_cb)
{ {
bzero(h, sizeof(srslte_map_gen_t)); bzero(h, sizeof(srslte_map_gen_t));
h->beta = srslte_vec_malloc(sizeof(srslte_llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES); h->alpha = srslte_vec_malloc(sizeof(llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->beta) { if (!h->alpha) {
perror("srslte_vec_malloc");
return -1;
}
h->branch = srslte_vec_malloc(sizeof(llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->branch) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
return -1; return -1;
} }
@ -177,23 +310,28 @@ int srslte_map_gen_init(srslte_map_gen_t * h, int max_long_cb)
void srslte_map_gen_free(srslte_map_gen_t * h) void srslte_map_gen_free(srslte_map_gen_t * h)
{ {
if (h->beta) { if (h->alpha) {
free(h->beta); free(h->alpha);
}
if (h->branch) {
free(h->branch);
} }
bzero(h, sizeof(srslte_map_gen_t)); bzero(h, sizeof(srslte_map_gen_t));
} }
void srslte_map_gen_dec(srslte_map_gen_t * h, srslte_llr_t * input, srslte_llr_t * parity, srslte_llr_t * output, void srslte_map_gen_dec(srslte_map_gen_t * h, llr_t * input, llr_t *app, llr_t * parity, llr_t * output,
uint32_t long_cb) uint32_t long_cb)
{ {
uint32_t k;
h->beta[(long_cb + TAIL) * NUMSTATES] = 0; // Compute branch metrics
for (k = 1; k < NUMSTATES; k++) srslte_map_gen_gamma(h, input, app, parity, long_cb);
h->beta[(long_cb + TAIL) * NUMSTATES + k] = -INF;
// Forward recursion
srslte_map_gen_alpha(h, long_cb);
// Backwards recursion + LLR computation
srslte_map_gen_beta(h, output, long_cb);
srslte_map_gen_beta(h, input, parity, long_cb);
srslte_map_gen_alpha(h, input, parity, output, long_cb);
} }
/************************************************ /************************************************
@ -209,28 +347,38 @@ int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb)
h->max_long_cb = max_long_cb; h->max_long_cb = max_long_cb;
h->llr1 = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->app1 = srslte_vec_malloc(sizeof(llr_t) * len);
if (!h->llr1) { if (!h->app1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->app2 = srslte_vec_malloc(sizeof(llr_t) * len);
if (!h->app2) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->llr2 = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->ext1 = srslte_vec_malloc(sizeof(llr_t) * len);
if (!h->llr2) { if (!h->ext1) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->w = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->ext2 = srslte_vec_malloc(sizeof(llr_t) * len);
if (!h->w) { if (!h->ext2) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->syst = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->syst = srslte_vec_malloc(sizeof(llr_t) * len);
if (!h->syst) { if (!h->syst) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->parity = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->parity0 = srslte_vec_malloc(sizeof(llr_t) * len);
if (!h->parity) { if (!h->parity0) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity1 = srslte_vec_malloc(sizeof(llr_t) * len);
if (!h->parity1) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
@ -255,20 +403,26 @@ clean_and_exit:if (ret == -1) {
void srslte_tdec_free(srslte_tdec_t * h) void srslte_tdec_free(srslte_tdec_t * h)
{ {
if (h->llr1) { if (h->app1) {
free(h->llr1); free(h->app1);
} }
if (h->llr2) { if (h->app2) {
free(h->llr2); free(h->app2);
} }
if (h->w) { if (h->ext1) {
free(h->w); free(h->ext1);
}
if (h->ext2) {
free(h->ext2);
} }
if (h->syst) { if (h->syst) {
free(h->syst); free(h->syst);
} }
if (h->parity) { if (h->parity0) {
free(h->parity); free(h->parity0);
}
if (h->parity1) {
free(h->parity1);
} }
srslte_map_gen_free(&h->dec); srslte_map_gen_free(&h->dec);
@ -280,48 +434,145 @@ void srslte_tdec_free(srslte_tdec_t * h)
bzero(h, sizeof(srslte_tdec_t)); bzero(h, sizeof(srslte_tdec_t));
} }
void srslte_tdec_iteration(srslte_tdec_t * h, srslte_llr_t * input, uint32_t long_cb) void deinterleave_input(srslte_tdec_t *h, float *input, uint32_t long_cb) {
{
uint32_t i; uint32_t i;
if (h->current_cbidx >= 0) { float *inputPtr = input;
__m128 inf0, inf1, inf2, inf3, inf4, inf5;
__m128i in0, in1, in2;
__m128i s0, s1, s2, s;
__m128i p00, p01, p02, p0;
__m128i p10, p11, p12, p1;
__m128i *sysPtr = (__m128i*) h->syst;
__m128i *pa0Ptr = (__m128i*) h->parity0;
__m128i *pa1Ptr = (__m128i*) h->parity1;
// pick bits 0, 3, 6 from 1st word
__m128i s0_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0);
// pick bits 1, 4, 7 from 2st word
__m128i s1_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 2, 5 from 3rd word
__m128i s2_mask = _mm_set_epi8(11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 1, 4, 7 from 1st word
__m128i p00_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,15,14,9,8,3,2);
// pick bits 2, 5, from 2st word
__m128i p01_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 0, 3, 6 from 3rd word
__m128i p02_mask = _mm_set_epi8(13,12,7,6,1,0,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 2, 5 from 1st word
__m128i p10_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4);
// pick bits 0, 3, 6, from 2st word
__m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff);
// pick bits 1, 4, 7 from 3rd word
__m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
__m128 vScalar = _mm_set1_ps(SCALE);
// Split systematic and parity bits
for (i = 0; i < long_cb/8; i++) {
inf0 = _mm_load_ps(inputPtr); inputPtr+=4;
inf1 = _mm_load_ps(inputPtr); inputPtr+=4;
inf2 = _mm_load_ps(inputPtr); inputPtr+=4;
inf3 = _mm_load_ps(inputPtr); inputPtr+=4;
inf4 = _mm_load_ps(inputPtr); inputPtr+=4;
inf5 = _mm_load_ps(inputPtr); inputPtr+=4;
inf0 = _mm_mul_ps(inf0, vScalar);
inf1 = _mm_mul_ps(inf1, vScalar);
inf2 = _mm_mul_ps(inf2, vScalar);
inf3 = _mm_mul_ps(inf3, vScalar);
inf4 = _mm_mul_ps(inf4, vScalar);
inf5 = _mm_mul_ps(inf5, vScalar);
uint32_t *inter = h->interleaver[h->current_cbidx].forward; in0 = _mm_packs_epi32(_mm_cvtps_epi32(inf0), _mm_cvtps_epi32(inf1));
uint32_t *deinter = h->interleaver[h->current_cbidx].reverse; in1 = _mm_packs_epi32(_mm_cvtps_epi32(inf2), _mm_cvtps_epi32(inf3));
in2 = _mm_packs_epi32(_mm_cvtps_epi32(inf4), _mm_cvtps_epi32(inf5));
// Prepare systematic and parity bits for MAP DEC #1 /* Deinterleave Systematic bits */
for (i = 0; i < long_cb; i++) { s0 = _mm_shuffle_epi8(in0, s0_mask);
h->syst[i] = input[SRSLTE_TCOD_RATE * i] + h->w[i]; s1 = _mm_shuffle_epi8(in1, s1_mask);
h->parity[i] = input[SRSLTE_TCOD_RATE * i + 1]; s2 = _mm_shuffle_epi8(in2, s2_mask);
s = _mm_or_si128(s0, s1);
s = _mm_or_si128(s, s2);
_mm_store_si128(sysPtr, s);
sysPtr++;
/* Deinterleave parity 0 bits */
p00 = _mm_shuffle_epi8(in0, p00_mask);
p01 = _mm_shuffle_epi8(in1, p01_mask);
p02 = _mm_shuffle_epi8(in2, p02_mask);
p0 = _mm_or_si128(p00, p01);
p0 = _mm_or_si128(p0, p02);
_mm_store_si128(pa0Ptr, p0);
pa0Ptr++;
/* Deinterleave parity 1 bits */
p10 = _mm_shuffle_epi8(in0, p10_mask);
p11 = _mm_shuffle_epi8(in1, p11_mask);
p12 = _mm_shuffle_epi8(in2, p12_mask);
p1 = _mm_or_si128(p10, p11);
p1 = _mm_or_si128(p1, p12);
_mm_store_si128(pa1Ptr, p1);
pa1Ptr++;
}
for (i = 0; i < 3; i++) {
h->syst[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 2*i];
h->parity0[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 2*i + 1];
} }
for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) { for (i = 0; i < 3; i++) {
h->syst[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb)]; h->app2[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 6 + 2*i];
h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb) + 1]; h->parity1[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 6 + 2*i + 1];
} }
// Run MAP DEC #1 }
srslte_map_gen_dec(&h->dec, h->syst, h->parity, h->llr1, long_cb);
// Prepare systematic and parity bits for MAP DEC #1 void srslte_tdec_iteration(srslte_tdec_t * h, float * input, uint32_t long_cb)
for (i = 0; i < long_cb; i++) { {
h->syst[i] = h->llr1[inter[i]]
- h->w[inter[i]]; if (h->current_cbidx >= 0) {
h->parity[i] = input[SRSLTE_TCOD_RATE * i + 2]; uint16_t *inter = h->interleaver[h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
if (h->n_iter == 0) {
deinterleave_input(h, input, long_cb);
} }
for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) {
h->syst[i] = // Add apriori information to decoder 1
input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE + NINPUTS * (i - long_cb)]; if (h->n_iter > 0) {
h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE srslte_vec_sub_sss(h->app1, h->ext1, h->app1, long_cb);
+ NINPUTS * (i - long_cb) + 1];
} }
// Run MAP DEC #1 // Run MAP DEC #1
srslte_map_gen_dec(&h->dec, h->syst, h->parity, h->llr2, long_cb); if (h->n_iter == 0) {
srslte_map_gen_dec(&h->dec, h->syst, NULL, h->parity0, h->ext1, long_cb);
} else {
srslte_map_gen_dec(&h->dec, h->syst, h->app1, h->parity0, h->ext1, long_cb);
}
// Update a-priori LLR from the last iteration // Convert aposteriori information into extrinsic information
for (i = 0; i < long_cb; i++) { if (h->n_iter > 0) {
h->w[i] += h->llr2[deinter[i]] - h->llr1[i]; srslte_vec_sub_sss(h->ext1, h->app1, h->ext1, long_cb);
} }
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2
srslte_vec_lut_sss(h->ext1, inter, h->app2, long_cb);
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
srslte_map_gen_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, long_cb);
// Deinterleaved extrinsic bits become apriori info for decoder 1
srslte_vec_lut_sss(h->ext2, deinter, h->app1, long_cb);
h->n_iter++;
} else { } else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_reset() first\n"); fprintf(stderr, "Error CB index not set (call srslte_tdec_reset() first\n");
} }
@ -334,7 +585,7 @@ int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb)
h->max_long_cb); h->max_long_cb);
return -1; return -1;
} }
memset(h->w, 0, sizeof(srslte_llr_t) * long_cb); h->n_iter = 0;
h->current_cbidx = srslte_cbsegm_cbindex(long_cb); h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) { if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb); fprintf(stderr, "Invalid CB length %d\n", long_cb);
@ -345,47 +596,59 @@ int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb)
void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb)
{ {
uint32_t *deinter = h->interleaver[h->current_cbidx].reverse; __m128i zero = _mm_set1_epi16(0);
uint32_t i; __m128i lsb_mask = _mm_set1_epi16(1);
for (i = 0; i < long_cb; i++) {
output[i] = (h->llr2[deinter[i]] > 0) ? 1 : 0; __m128i *appPtr = (__m128i*) h->app1;
__m128i *outPtr = (__m128i*) output;
__m128i ap, out, out0, out1;
for (uint32_t i = 0; i < long_cb/16; i++) {
ap = _mm_load_si128(appPtr); appPtr++;
out0 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
ap = _mm_load_si128(appPtr); appPtr++;
out1 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
out = _mm_packs_epi16(out0, out1);
_mm_store_si128(outPtr, out);
outPtr++;
}
if (long_cb%16) {
for (int i=0;i<8;i++) {
output[long_cb-8+i] = h->app1[long_cb-8+i]>0?1:0;
}
} }
} }
void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb)
{ {
uint32_t i;
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1}; uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
uint32_t *deinter = h->interleaver[h->current_cbidx].reverse;
// long_cb is always byte aligned // long_cb is always byte aligned
for (i = 0; i < long_cb/8; i++) { for (uint32_t i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->llr2[deinter[8*i+0]]>0?mask[0]:0; uint8_t out0 = h->app1[i+0]>0?mask[0]:0;
uint8_t out1 = h->llr2[deinter[8*i+1]]>0?mask[1]:0; uint8_t out1 = h->app1[i+1]>0?mask[1]:0;
uint8_t out2 = h->llr2[deinter[8*i+2]]>0?mask[2]:0; uint8_t out2 = h->app1[i+2]>0?mask[2]:0;
uint8_t out3 = h->llr2[deinter[8*i+3]]>0?mask[3]:0; uint8_t out3 = h->app1[i+3]>0?mask[3]:0;
uint8_t out4 = h->llr2[deinter[8*i+4]]>0?mask[4]:0; uint8_t out4 = h->app1[i+4]>0?mask[4]:0;
uint8_t out5 = h->llr2[deinter[8*i+5]]>0?mask[5]:0; uint8_t out5 = h->app1[i+5]>0?mask[5]:0;
uint8_t out6 = h->llr2[deinter[8*i+6]]>0?mask[6]:0; uint8_t out6 = h->app1[i+6]>0?mask[6]:0;
uint8_t out7 = h->llr2[deinter[8*i+7]]>0?mask[7]:0; uint8_t out7 = h->app1[i+7]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7; output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
} }
} }
int srslte_tdec_run_all(srslte_tdec_t * h, srslte_llr_t * input, uint8_t *output, int srslte_tdec_run_all(srslte_tdec_t * h, float * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb) uint32_t nof_iterations, uint32_t long_cb)
{ {
uint32_t iter = 0;
if (srslte_tdec_reset(h, long_cb)) { if (srslte_tdec_reset(h, long_cb)) {
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
do { do {
srslte_tdec_iteration(h, input, long_cb); srslte_tdec_iteration(h, input, long_cb);
iter++; } while (h->n_iter < nof_iterations);
} while (iter < nof_iterations);
srslte_tdec_decision(h, output, long_cb); srslte_tdec_decision(h, output, long_cb);

@ -0,0 +1,393 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 The srsLTE Developers. See the
* COPYRIGHT file at the top-level directory of this distribution.
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "srslte/fec/turbodecoder_vl.h"
#include "srslte/utils/vector.h"
#define NUMSTATES 8
#define NINPUTS 2
#define TAIL 3
#define TOTALTAIL 12
#define INF 9e4
#define ZERO 9e-4
/************************************************
*
* MAP_GEN is the MAX-LOG-MAP generic implementation of the
* Decoder
*
************************************************/
static void map_gen_beta(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_llr_t * parity,
uint32_t long_cb)
{
srslte_llr_t m_b[8], new[8], old[8];
srslte_llr_t x, y, xy;
int k;
uint32_t end = long_cb + SRSLTE_TCOD_RATE;
srslte_llr_t *beta = s->beta;
uint32_t i;
for (i = 0; i < 8; i++) {
old[i] = beta[8 * (end) + i];
}
for (k = end - 1; k >= 0; k--) {
x = input[k];
y = parity[k];
xy = x + y;
m_b[0] = old[4] + xy;
m_b[1] = old[4];
m_b[2] = old[5] + y;
m_b[3] = old[5] + x;
m_b[4] = old[6] + x;
m_b[5] = old[6] + y;
m_b[6] = old[7];
m_b[7] = old[7] + xy;
new[0] = old[0];
new[1] = old[0] + xy;
new[2] = old[1] + x;
new[3] = old[1] + y;
new[4] = old[2] + y;
new[5] = old[2] + x;
new[6] = old[3] + xy;
new[7] = old[3];
for (i = 0; i < 8; i++) {
if (m_b[i] > new[i])
new[i] = m_b[i];
old[i] = new[i];
beta[8 * k + i] = old[i];
}
}
}
static void map_gen_alpha(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_llr_t * parity, srslte_llr_t * output,
uint32_t long_cb)
{
srslte_llr_t m_b[8], new[8], old[8], max1[8], max0[8];
srslte_llr_t m1, m0;
srslte_llr_t x, y, xy;
srslte_llr_t out;
uint32_t k;
uint32_t end = long_cb;
srslte_llr_t *beta = s->beta;
uint32_t i;
old[0] = 0;
for (i = 1; i < 8; i++) {
old[i] = -INF;
}
for (k = 1; k < end + 1; k++) {
x = input[k - 1];
y = parity[k - 1];
xy = x + y;
m_b[0] = old[0];
m_b[1] = old[3] + y;
m_b[2] = old[4] + y;
m_b[3] = old[7];
m_b[4] = old[1];
m_b[5] = old[2] + y;
m_b[6] = old[5] + y;
m_b[7] = old[6];
new[0] = old[1] + xy;
new[1] = old[2] + x;
new[2] = old[5] + x;
new[3] = old[6] + xy;
new[4] = old[0] + xy;
new[5] = old[3] + x;
new[6] = old[4] + x;
new[7] = old[7] + xy;
for (i = 0; i < 8; i++) {
max0[i] = m_b[i] + beta[8 * k + i];
max1[i] = new[i] + beta[8 * k + i];
}
m1 = max1[0];
m0 = max0[0];
for (i = 1; i < 8; i++) {
if (max1[i] > m1)
m1 = max1[i];
if (max0[i] > m0)
m0 = max0[i];
}
for (i = 0; i < 8; i++) {
if (m_b[i] > new[i])
new[i] = m_b[i];
old[i] = new[i];
}
out = m1 - m0;
output[k - 1] = out;
}
}
static int map_gen_init(srslte_map_gen_vl_t * h, int max_long_cb)
{
bzero(h, sizeof(srslte_map_gen_vl_t));
h->beta = srslte_vec_malloc(sizeof(srslte_llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->beta) {
perror("srslte_vec_malloc");
return -1;
}
h->max_long_cb = max_long_cb;
return 0;
}
static void map_gen_free(srslte_map_gen_vl_t * h)
{
if (h->beta) {
free(h->beta);
}
bzero(h, sizeof(srslte_map_gen_vl_t));
}
static void map_gen_dec(srslte_map_gen_vl_t * h, srslte_llr_t * input, srslte_llr_t * parity, srslte_llr_t * output,
uint32_t long_cb)
{
uint32_t k;
h->beta[(long_cb + TAIL) * NUMSTATES] = 0;
for (k = 1; k < NUMSTATES; k++)
h->beta[(long_cb + TAIL) * NUMSTATES + k] = -INF;
map_gen_beta(h, input, parity, long_cb);
map_gen_alpha(h, input, parity, output, long_cb);
}
/************************************************
*
* TURBO DECODER INTERFACE
*
************************************************/
int srslte_tdec_vl_init(srslte_tdec_vl_t * h, uint32_t max_long_cb)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_vl_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->max_long_cb = max_long_cb;
h->llr1 = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
if (!h->llr1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->llr2 = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
if (!h->llr2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->w = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
if (!h->w) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
if (!h->syst) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
if (!h->parity) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
if (map_gen_init(&h->dec, h->max_long_cb)) {
goto clean_and_exit;
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
goto clean_and_exit;
}
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
}
h->current_cbidx = -1;
ret = 0;
clean_and_exit:if (ret == -1) {
srslte_tdec_vl_free(h);
}
return ret;
}
void srslte_tdec_vl_free(srslte_tdec_vl_t * h)
{
if (h->llr1) {
free(h->llr1);
}
if (h->llr2) {
free(h->llr2);
}
if (h->w) {
free(h->w);
}
if (h->syst) {
free(h->syst);
}
if (h->parity) {
free(h->parity);
}
map_gen_free(&h->dec);
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
srslte_tc_interl_free(&h->interleaver[i]);
}
bzero(h, sizeof(srslte_tdec_vl_t));
}
void srslte_tdec_vl_iteration(srslte_tdec_vl_t * h, srslte_llr_t * input, uint32_t long_cb)
{
uint32_t i;
if (h->current_cbidx >= 0) {
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
// Prepare systematic and parity bits for MAP DEC #1
for (i = 0; i < long_cb; i++) {
h->syst[i] = input[SRSLTE_TCOD_RATE * i] + h->w[i];
h->parity[i] = input[SRSLTE_TCOD_RATE * i + 1];
}
for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) {
h->syst[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb)];
h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb) + 1];
}
// Run MAP DEC #1
map_gen_dec(&h->dec, h->syst, h->parity, h->llr1, long_cb);
// Prepare systematic and parity bits for MAP DEC #1
for (i = 0; i < long_cb; i++) {
h->syst[i] = h->llr1[inter[i]]
- h->w[inter[i]];
h->parity[i] = input[SRSLTE_TCOD_RATE * i + 2];
}
for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) {
h->syst[i] =
input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE + NINPUTS * (i - long_cb)];
h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE
+ NINPUTS * (i - long_cb) + 1];
}
// Run MAP DEC #2
map_gen_dec(&h->dec, h->syst, h->parity, h->llr2, long_cb);
// Update a-priori LLR from the last iteration
for (i = 0; i < long_cb; i++) {
h->w[i] += h->llr2[deinter[i]] - h->llr1[i];
}
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_vl_reset() first\n");
}
}
int srslte_tdec_vl_reset(srslte_tdec_vl_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb);
return -1;
}
memset(h->w, 0, sizeof(srslte_llr_t) * long_cb);
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb);
return -1;
}
return 0;
}
void srslte_tdec_vl_decision(srslte_tdec_vl_t * h, uint8_t *output, uint32_t long_cb)
{
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
uint32_t i;
for (i = 0; i < long_cb; i++) {
output[i] = (h->llr2[deinter[i]] > 0) ? 1 : 0;
}
}
void srslte_tdec_vl_decision_byte(srslte_tdec_vl_t * h, uint8_t *output, uint32_t long_cb)
{
uint32_t i;
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
// long_cb is always byte aligned
for (i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->llr2[deinter[8*i+0]]>0?mask[0]:0;
uint8_t out1 = h->llr2[deinter[8*i+1]]>0?mask[1]:0;
uint8_t out2 = h->llr2[deinter[8*i+2]]>0?mask[2]:0;
uint8_t out3 = h->llr2[deinter[8*i+3]]>0?mask[3]:0;
uint8_t out4 = h->llr2[deinter[8*i+4]]>0?mask[4]:0;
uint8_t out5 = h->llr2[deinter[8*i+5]]>0?mask[5]:0;
uint8_t out6 = h->llr2[deinter[8*i+6]]>0?mask[6]:0;
uint8_t out7 = h->llr2[deinter[8*i+7]]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
}
int srslte_tdec_vl_run_all(srslte_tdec_vl_t * h, srslte_llr_t * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb)
{
uint32_t iter = 0;
if (srslte_tdec_vl_reset(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
srslte_tdec_vl_iteration(h, input, long_cb);
iter++;
} while (iter < nof_iterations);
srslte_tdec_vl_decision(h, output, long_cb);
return SRSLTE_SUCCESS;
}

@ -36,6 +36,7 @@
#include <sys/time.h> #include <sys/time.h>
#include <time.h> #include <time.h>
#include "srslte/srslte.h" #include "srslte/srslte.h"
#include "srslte/fec/turbodecoder_vl.h"
#include "turbodecoder_test.h" #include "turbodecoder_test.h"
@ -46,14 +47,15 @@ float ebno_db = 100.0;
uint32_t seed = 0; uint32_t seed = 0;
int K = -1; int K = -1;
#define MAX_ITERATIONS 4 #define MAX_ITERATIONS 10
int nof_iterations = MAX_ITERATIONS; int nof_iterations = MAX_ITERATIONS;
int test_known_data = 0; int test_known_data = 0;
int test_errors = 0; int test_errors = 0;
int nof_repetitions = 1;
#define SNR_POINTS 8 #define SNR_POINTS 4
#define SNR_MIN 0.0 #define SNR_MIN 1.0
#define SNR_MAX 4.0 #define SNR_MAX 8.0
void usage(char *prog) { void usage(char *prog) {
printf("Usage: %s [nlesv]\n", prog); printf("Usage: %s [nlesv]\n", prog);
@ -61,6 +63,7 @@ void usage(char *prog) {
"\t-k Test with known data (ignores frame_length) [Default disabled]\n"); "\t-k Test with known data (ignores frame_length) [Default disabled]\n");
printf("\t-i nof_iterations [Default %d]\n", nof_iterations); printf("\t-i nof_iterations [Default %d]\n", nof_iterations);
printf("\t-n nof_frames [Default %d]\n", nof_frames); printf("\t-n nof_frames [Default %d]\n", nof_frames);
printf("\t-N nof_repetitions [Default %d]\n", nof_repetitions);
printf("\t-l frame_length [Default %d]\n", frame_length); printf("\t-l frame_length [Default %d]\n", frame_length);
printf("\t-e ebno in dB [Default scan]\n"); printf("\t-e ebno in dB [Default scan]\n");
printf("\t-t test: check errors on exit [Default disabled]\n"); printf("\t-t test: check errors on exit [Default disabled]\n");
@ -69,11 +72,14 @@ void usage(char *prog) {
void parse_args(int argc, char **argv) { void parse_args(int argc, char **argv) {
int opt; int opt;
while ((opt = getopt(argc, argv, "inlstvekt")) != -1) { while ((opt = getopt(argc, argv, "inNlstvekt")) != -1) {
switch (opt) { switch (opt) {
case 'n': case 'n':
nof_frames = atoi(argv[optind]); nof_frames = atoi(argv[optind]);
break; break;
case 'N':
nof_repetitions = atoi(argv[optind]);
break;
case 'k': case 'k':
test_known_data = 1; test_known_data = 1;
break; break;
@ -102,29 +108,6 @@ void parse_args(int argc, char **argv) {
} }
} }
void output_matlab(float ber[MAX_ITERATIONS][SNR_POINTS], int snr_points) {
int i, j;
FILE *f = fopen("turbocoder_snr.m", "w");
if (!f) {
perror("fopen");
exit(-1);
}
fprintf(f, "ber=[");
for (j = 0; j < MAX_ITERATIONS; j++) {
for (i = 0; i < snr_points; i++) {
fprintf(f, "%g ", ber[j][i]);
}
fprintf(f, ";\n");
}
fprintf(f, "];\n");
fprintf(f, "snr=linspace(%g,%g-%g/%d,%d);\n", SNR_MIN, SNR_MAX, SNR_MAX,
snr_points, snr_points);
fprintf(f, "semilogy(snr,ber,snr,0.5*erfc(sqrt(10.^(snr/10))));\n");
fprintf(f,
"legend('1 iter','2 iter', '3 iter', '4 iter', 'theory-uncoded');");
fprintf(f, "grid on;\n");
fclose(f);
}
int main(int argc, char **argv) { int main(int argc, char **argv) {
uint32_t frame_cnt; uint32_t frame_cnt;
@ -134,12 +117,13 @@ int main(int argc, char **argv) {
uint32_t i, j; uint32_t i, j;
float var[SNR_POINTS]; float var[SNR_POINTS];
uint32_t snr_points; uint32_t snr_points;
float ber[MAX_ITERATIONS][SNR_POINTS]; uint32_t errors;
uint32_t errors[100]; uint32_t errors_vl;
uint32_t coded_length; uint32_t coded_length;
struct timeval tdata[3]; struct timeval tdata[3];
float mean_usec; float mean_usec, mean_usec_vl;
srslte_tdec_t tdec; srslte_tdec_t tdec;
srslte_tdec_vl_t tdec_vl;
srslte_tcod_t tcod; srslte_tcod_t tcod;
parse_args(argc, argv); parse_args(argc, argv);
@ -200,6 +184,11 @@ int main(int argc, char **argv) {
exit(-1); exit(-1);
} }
if (srslte_tdec_vl_init(&tdec_vl, frame_length)) {
fprintf(stderr, "Error initiating Turbo decoder\n");
exit(-1);
}
float ebno_inc, esno_db; float ebno_inc, esno_db;
ebno_inc = (SNR_MAX - SNR_MIN) / SNR_POINTS; ebno_inc = (SNR_MAX - SNR_MIN) / SNR_POINTS;
if (ebno_db == 100.0) { if (ebno_db == 100.0) {
@ -215,11 +204,13 @@ int main(int argc, char **argv) {
snr_points = 1; snr_points = 1;
} }
for (i = 0; i < snr_points; i++) { for (i = 0; i < snr_points; i++) {
mean_usec = 0; mean_usec = 0;
mean_usec_vl = 0;
errors = 0;
errors_vl = 0;
frame_cnt = 0; frame_cnt = 0;
bzero(errors, sizeof(int) * MAX_ITERATIONS);
while (frame_cnt < nof_frames) { while (frame_cnt < nof_frames) {
/* generate data_tx */ /* generate data_tx */
for (j = 0; j < frame_length; j++) { for (j = 0; j < frame_length; j++) {
if (test_known_data) { if (test_known_data) {
@ -239,13 +230,14 @@ int main(int argc, char **argv) {
} }
for (j = 0; j < coded_length; j++) { for (j = 0; j < coded_length; j++) {
llr[j] = symbols[j] ? sqrt(2) : -sqrt(2); llr[j] = symbols[j] ? 1 : -1;
} }
srslte_ch_awgn_f(llr, llr, var[i], coded_length); srslte_ch_awgn_f(llr, llr, var[i], coded_length);
/* decoder */ /* decoder */
srslte_tdec_reset(&tdec, frame_length); srslte_tdec_reset(&tdec, frame_length);
srslte_tdec_vl_reset(&tdec_vl, frame_length);
uint32_t t; uint32_t t;
if (nof_iterations == -1) { if (nof_iterations == -1) {
@ -253,69 +245,51 @@ int main(int argc, char **argv) {
} else { } else {
t = nof_iterations; t = nof_iterations;
} }
for (j = 0; j < t; j++) {
if (!j) gettimeofday(&tdata[1], NULL);
gettimeofday(&tdata[1], NULL); // Only measure 1 iteration for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_iteration(&tdec, llr, frame_length); srslte_tdec_run_all(&tdec, llr, data_rx, t, frame_length);
srslte_tdec_decision(&tdec, data_rx, frame_length); }
if (!j)
gettimeofday(&tdata[2], NULL); gettimeofday(&tdata[2], NULL);
if (!j)
get_time_interval(tdata); get_time_interval(tdata);
if (!j) mean_usec = (float) mean_usec * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
mean_usec = (float) mean_usec * 0.9 + (float) tdata[0].tv_usec * 0.1;
/* check errors */ errors += srslte_bit_diff(data_tx, data_rx, frame_length);
errors[j] += srslte_bit_diff(data_tx, data_rx, frame_length);
if (j < MAX_ITERATIONS) { gettimeofday(&tdata[1], NULL);
ber[j][i] = (float) errors[j] / (frame_cnt * frame_length); for (int k=0;k<nof_repetitions;k++) {
} srslte_tdec_vl_run_all(&tdec_vl, llr, data_rx, t, frame_length);
} }
gettimeofday(&tdata[2], NULL);
get_time_interval(tdata);
mean_usec_vl = (float) mean_usec_vl * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
/* check errors */
errors_vl += srslte_bit_diff(data_tx, data_rx, frame_length);
frame_cnt++; frame_cnt++;
printf("Eb/No: %3.2f %10d/%d ", printf("Eb/No: %2.2f %10d/%d ", SNR_MIN + i * ebno_inc, frame_cnt, nof_frames);
SNR_MIN + i * ebno_inc, frame_cnt, nof_frames); printf("BER: %.2e ", (float) errors / (frame_cnt * frame_length));
printf("BER: %.2e ", (float) errors[j - 1] / (frame_cnt * frame_length)); printf("BER_vl: %.2e ", (float) errors_vl / (frame_cnt * frame_length));
printf("%3.1f Mbps (%6.2f usec)", (float) frame_length / mean_usec, printf("%3.1f Mbps (%6.2f usec) -- vl: ", (float) frame_length / mean_usec, mean_usec);
mean_usec); printf("%3.1f Mbps (%6.2f usec)", (float) frame_length / mean_usec_vl, mean_usec_vl);
printf("\r"); printf("\r");
} }
printf("\n"); printf("\n");
}
printf("\n");
if (snr_points == 1) { if (snr_points == 1) {
if (test_known_data && seed == KNOWN_DATA_SEED if (errors) {
&& ebno_db == KNOWN_DATA_EBNO && frame_cnt == KNOWN_DATA_NFRAMES) { printf("%d Errors\n", errors);
for (j = 0; j < MAX_ITERATIONS; j++) {
if (errors[j] > known_data_errors[j]) {
fprintf(stderr, "Expected %d errors but got %d\n",
known_data_errors[j], errors[j]);
exit(-1);
} else {
printf("Iter %d ok\n", j + 1);
}
}
} else {
for (j = 0; j < MAX_ITERATIONS; j++) {
printf("BER: %g\t%u errors\n",
(float) errors[j] / (frame_cnt * frame_length), errors[j]);
if (test_errors) {
if (errors[j]
> get_expected_errors(frame_cnt, seed, j + 1, frame_length,
ebno_db)) {
fprintf(stderr, "Expected %d errors but got %d\n",
get_expected_errors(frame_cnt, seed, j + 1, frame_length,
ebno_db), errors[j]);
exit(-1);
} else {
printf("Iter %d ok\n", j + 1);
}
}
}
} }
if (errors_vl) {
printf("%d Errors in VL\n", errors_vl);
} }
} }
free(data_tx); free(data_tx);
free(symbols); free(symbols);
free(llr); free(llr);
@ -326,7 +300,6 @@ int main(int argc, char **argv) {
srslte_tcod_free(&tcod); srslte_tcod_free(&tcod);
printf("\n"); printf("\n");
output_matlab(ber, snr_points);
printf("Done\n"); printf("Done\n");
exit(0); exit(0);
} }

@ -441,6 +441,7 @@ static int decode_tb(srslte_sch_t *q,
early_stop = true; early_stop = true;
} }
} while (q->nof_iterations < SRSLTE_PDSCH_MAX_TDEC_ITERS && !early_stop); } while (q->nof_iterations < SRSLTE_PDSCH_MAX_TDEC_ITERS && !early_stop);
q->average_nof_iterations = SRSLTE_VEC_EMA((float) q->nof_iterations, q->average_nof_iterations, 0.2); q->average_nof_iterations = SRSLTE_VEC_EMA((float) q->nof_iterations, q->average_nof_iterations, 0.2);

@ -228,13 +228,11 @@ int main(int argc, char **argv) {
int r = srslte_pdsch_decode(&pdsch, &pdsch_cfg, &softbuffer_rx, slot_symbols[0], ce, 0, data); int r = srslte_pdsch_decode(&pdsch, &pdsch_cfg, &softbuffer_rx, slot_symbols[0], ce, 0, data);
gettimeofday(&t[2], NULL); gettimeofday(&t[2], NULL);
get_time_interval(t); get_time_interval(t);
printf("DECODED %d in %d:%d (%.2f Mbps)\n", r?"Error":"OK",
(int) t[0].tv_sec, (int) t[0].tv_usec, (float) grant.mcs.tbs/t[0].tv_usec);
if (r) { if (r) {
printf("Error decoding TBS: %d\n", grant.mcs.tbs);
ret = -1; ret = -1;
goto quit; goto quit;
} else {
printf("DECODED OK in %d:%d (%.2f Mbps)\n",
(int) t[0].tv_sec, (int) t[0].tv_usec, (float) grant.mcs.tbs/t[0].tv_usec);
} }
} }
ret = 0; ret = 0;

@ -33,8 +33,11 @@
#include <string.h> #include <string.h>
#include "srslte/utils/vector.h" #include "srslte/utils/vector.h"
#include "srslte/utils/vector_simd.h"
#include "srslte/utils/bit.h" #include "srslte/utils/bit.h"
#define HAVE_VECTOR_SIMD
#ifdef HAVE_VOLK #ifdef HAVE_VOLK
#include "volk/volk.h" #include "volk/volk.h"
#endif #endif
@ -102,6 +105,17 @@ void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
#endif #endif
} }
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD
int i;
for (i=0;i<len;i++) {
z[i] = x[i]-y[i];
}
#else
srslte_vec_sub_sss_simd(x, y, z, len);
#endif
}
void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len) { void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len) {
return srslte_vec_sub_fff((float*) x,(float*) y,(float*) z, 2*len); return srslte_vec_sub_fff((float*) x,(float*) y,(float*) z, 2*len);
} }
@ -117,6 +131,17 @@ void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
#endif #endif
} }
void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD
int i;
for (i=0;i<len;i++) {
z[i] = x[i]+y[i];
}
#else
srslte_vec_sum_sss_simd(x, y, z, len);
#endif
}
void srslte_vec_sum_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len) { void srslte_vec_sum_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len) {
srslte_vec_sum_fff((float*) x,(float*) y,(float*) z,2*len); srslte_vec_sum_fff((float*) x,(float*) y,(float*) z,2*len);
} }
@ -160,6 +185,25 @@ void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len) {
#endif #endif
} }
void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*h;
}
}
void srslte_vec_sc_div2_sss(short *x, int n_rightshift, short *z, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD
int i;
int pow2_div = 1<<n_rightshift;
for (i=0;i<len;i++) {
z[i] = x[i]/pow2_div;
}
#else
srslte_vec_sc_div2_sss_simd(x, n_rightshift, z, len);
#endif
}
// TODO: Improve this implementation // TODO: Improve this implementation
void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) { void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) {
// We should use fabs() here but is statistically should be similar // We should use fabs() here but is statistically should be similar
@ -207,6 +251,18 @@ void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
#endif #endif
} }
void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len) {
for (int i=0;i<len;i++) {
y[i] = x[lut[i]];
}
}
void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
for (int i=0;i<len;i++) {
y[i] = x[lut[i]];
}
}
void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) { void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) {
#ifdef HAVE_VOLK_INTERLEAVE_FUNCTION #ifdef HAVE_VOLK_INTERLEAVE_FUNCTION
volk_32f_x2_interleave_32fc(x, real, imag, len); volk_32f_x2_interleave_32fc(x, real, imag, len);
@ -316,6 +372,15 @@ void srslte_vec_fprint_i(FILE *stream, int *x, uint32_t len) {
fprintf(stream, "];\n"); fprintf(stream, "];\n");
} }
void srslte_vec_fprint_s(FILE *stream, short *x, uint32_t len) {
int i;
fprintf(stream, "[");
for (i=0;i<len;i++) {
fprintf(stream, "%d, ", x[i]);
}
fprintf(stream, "];\n");
}
void srslte_vec_fprint_hex(FILE *stream, uint8_t *x, uint32_t len) { void srslte_vec_fprint_hex(FILE *stream, uint8_t *x, uint32_t len) {
uint32_t i, nbytes; uint32_t i, nbytes;
uint8_t byte; uint8_t byte;

@ -0,0 +1,137 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 The srsLTE Developers. See the
* COPYRIGHT file at the top-level directory of this distribution.
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <float.h>
#include <complex.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "srslte/utils/vector_simd.h"
#include <inttypes.h>
#include <stdio.h>
#include <xmmintrin.h>
void print128_num(__m128i var)
{
int16_t *val = (int16_t*) &var;//can also use uint32_t instead of 16_t
printf("Numerical: %d %d %d %d %d %d %d %d \n",
val[0], val[1], val[2], val[3], val[4], val[5],
val[6], val[7]);
}
void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len)
{
unsigned int number = 0;
const unsigned int points = len / 8;
const __m128i* xPtr = (const __m128i*) x;
const __m128i* yPtr = (const __m128i*) y;
__m128i* zPtr = (__m128i*) z;
__m128i xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
yVal = _mm_load_si128(yPtr);
zVal = _mm_add_epi16(xVal, yVal);
_mm_store_si128(zPtr, zVal);
xPtr ++;
yPtr ++;
zPtr ++;
}
number = points * 8;
for(;number < len; number++){
z[number] = x[number] + y[number];
}
}
void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len)
{
unsigned int number = 0;
const unsigned int points = len / 8;
const __m128i* xPtr = (const __m128i*) x;
const __m128i* yPtr = (const __m128i*) y;
__m128i* zPtr = (__m128i*) z;
__m128i xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
yVal = _mm_load_si128(yPtr);
zVal = _mm_sub_epi16(xVal, yVal);
_mm_store_si128(zPtr, zVal);
xPtr ++;
yPtr ++;
zPtr ++;
}
number = points * 8;
for(;number < len; number++){
z[number] = x[number] - y[number];
}
}
void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
{
unsigned int number = 0;
const unsigned int points = len / 8;
const __m128i* xPtr = (const __m128i*) x;
__m128i* zPtr = (__m128i*) z;
__m128i xVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
zVal = _mm_srai_epi16(xVal, k);
_mm_store_si128(zPtr, zVal);
xPtr ++;
zPtr ++;
}
number = points * 8;
short divn = (1<<k);
for(;number < len; number++){
z[number] = x[number] / divn;
}
}
Loading…
Cancel
Save