Integrated SSE decoder into pdsch receiver

master
ismagom 9 years ago
parent f2b40c57ae
commit f735268202

@ -412,7 +412,7 @@ int update_control() {
} }
} }
#define DATA_BUFF_SZ 1000 #define DATA_BUFF_SZ 1024*128
uint8_t data[8*DATA_BUFF_SZ], data_unpacked[DATA_BUFF_SZ]; uint8_t data[8*DATA_BUFF_SZ], data_unpacked[DATA_BUFF_SZ];
uint8_t data_tmp[DATA_BUFF_SZ]; uint8_t data_tmp[DATA_BUFF_SZ];
@ -511,9 +511,9 @@ int main(int argc, char **argv) {
sigaddset(&sigset, SIGINT); sigaddset(&sigset, SIGINT);
sigprocmask(SIG_UNBLOCK, &sigset, NULL); sigprocmask(SIG_UNBLOCK, &sigset, NULL);
signal(SIGINT, sig_int_handler); signal(SIGINT, sig_int_handler);
cuhd_set_master_clock_rate(uhd, 30.72e6);
if (!output_file_name) { if (!output_file_name) {
cuhd_set_master_clock_rate(uhd, 30.72e6);
printf("Set TX rate: %.2f MHz\n", printf("Set TX rate: %.2f MHz\n",
cuhd_set_tx_srate(uhd, srslte_sampling_freq_hz(cell.nof_prb)) / 1000000); cuhd_set_tx_srate(uhd, srslte_sampling_freq_hz(cell.nof_prb)) / 1000000);
printf("Set TX gain: %.1f dB\n", cuhd_set_tx_gain(uhd, uhd_gain)); printf("Set TX gain: %.1f dB\n", cuhd_set_tx_gain(uhd, uhd_gain));

@ -44,6 +44,7 @@ typedef struct SRSLTE_API {
uint8_t *c; uint8_t *c;
uint8_t *c_bytes; uint8_t *c_bytes;
float *c_float; float *c_float;
short *c_short;
uint32_t len; uint32_t len;
} srslte_sequence_t; } srslte_sequence_t;

@ -77,8 +77,8 @@ SRSLTE_API int srslte_rm_turbo_rx(float *w_buff,
uint32_t rv_idx, uint32_t rv_idx,
uint32_t nof_filler_bits); uint32_t nof_filler_bits);
SRSLTE_API int srslte_rm_turbo_rx_lut(float *input, SRSLTE_API int srslte_rm_turbo_rx_lut(int16_t *input,
float *output, int16_t *output,
uint32_t in_len, uint32_t in_len,
uint32_t cb_idx, uint32_t cb_idx,
uint32_t rv_idx); uint32_t rv_idx);

@ -42,7 +42,7 @@
typedef struct SRSLTE_API { typedef struct SRSLTE_API {
uint32_t max_cb; uint32_t max_cb;
float **buffer_f; int16_t **buffer_f;
} srslte_softbuffer_rx_t; } srslte_softbuffer_rx_t;
typedef struct SRSLTE_API { typedef struct SRSLTE_API {

@ -1,104 +0,0 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 The srsLTE Developers. See the
* COPYRIGHT file at the top-level directory of this distribution.
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/**********************************************************************************************
* File: turbodecoder.h
*
* Description: Turbo Decoder.
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
* encoders and one turbo code internal interleaver. The coding rate of turbo
* encoder is 1/3.
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
*
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef TURBODECODER_
#define TURBODECODER_
#include "srslte/config.h"
#include "srslte/fec/tc_interl.h"
#include "srslte/fec/cbsegm.h"
#define SRSLTE_TCOD_RATE 3
#define SRSLTE_TCOD_TOTALTAIL 12
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef short llr_t;
typedef struct SRSLTE_API {
int max_long_cb;
llr_t *alpha;
llr_t *branch;
} srslte_map_gen_t;
typedef struct SRSLTE_API {
int max_long_cb;
srslte_map_gen_t dec;
llr_t *app1;
llr_t *app2;
llr_t *ext1;
llr_t *ext2;
llr_t *syst;
llr_t *parity0;
llr_t *parity1;
int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter;
} srslte_tdec_t;
SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h);
SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb);
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
llr_t * input,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
llr_t * input,
uint8_t *output,
uint32_t nof_iterations,
uint32_t long_cb);
#endif

@ -37,8 +37,8 @@
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2 * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/ *********************************************************************************************/
#ifndef TURBODECODER_VL_ #ifndef TURBODECODER_GEN_
#define TURBODECODER_VL_ #define TURBODECODER_GEN_
#include "srslte/config.h" #include "srslte/config.h"
#include "srslte/fec/tc_interl.h" #include "srslte/fec/tc_interl.h"
@ -50,11 +50,9 @@
#define SRSLTE_TCOD_MAX_LEN_CB 6144 #define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL) #define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef float srslte_llr_t;
typedef struct SRSLTE_API { typedef struct SRSLTE_API {
int max_long_cb; int max_long_cb;
srslte_llr_t *beta; float *beta;
} srslte_map_gen_vl_t; } srslte_map_gen_vl_t;
typedef struct SRSLTE_API { typedef struct SRSLTE_API {
@ -62,37 +60,37 @@ typedef struct SRSLTE_API {
srslte_map_gen_vl_t dec; srslte_map_gen_vl_t dec;
srslte_llr_t *llr1; float *llr1;
srslte_llr_t *llr2; float *llr2;
srslte_llr_t *w; float *w;
srslte_llr_t *syst; float *syst;
srslte_llr_t *parity; float *parity;
int current_cbidx; int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES]; srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
} srslte_tdec_vl_t; } srslte_tdec_gen_t;
SRSLTE_API int srslte_tdec_vl_init(srslte_tdec_vl_t * h, SRSLTE_API int srslte_tdec_gen_init(srslte_tdec_gen_t * h,
uint32_t max_long_cb); uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_vl_free(srslte_tdec_vl_t * h); SRSLTE_API void srslte_tdec_gen_free(srslte_tdec_gen_t * h);
SRSLTE_API int srslte_tdec_vl_reset(srslte_tdec_vl_t * h, uint32_t long_cb); SRSLTE_API int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb);
SRSLTE_API void srslte_tdec_vl_iteration(srslte_tdec_vl_t * h, SRSLTE_API void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h,
srslte_llr_t * input, float * input,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_vl_decision(srslte_tdec_vl_t * h, SRSLTE_API void srslte_tdec_gen_decision(srslte_tdec_gen_t * h,
uint8_t *output, uint8_t *output,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API void srslte_tdec_vl_decision_byte(srslte_tdec_vl_t * h, SRSLTE_API void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h,
uint8_t *output, uint8_t *output,
uint32_t long_cb); uint32_t long_cb);
SRSLTE_API int srslte_tdec_vl_run_all(srslte_tdec_vl_t * h, SRSLTE_API int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h,
srslte_llr_t * input, float * input,
uint8_t *output, uint8_t *output,
uint32_t nof_iterations, uint32_t nof_iterations,
uint32_t long_cb); uint32_t long_cb);

@ -40,7 +40,8 @@
#include "srslte/common/phy_common.h" #include "srslte/common/phy_common.h"
#include "srslte/fec/rm_turbo.h" #include "srslte/fec/rm_turbo.h"
#include "srslte/fec/turbocoder.h" #include "srslte/fec/turbocoder.h"
#include "srslte/fec/turbodecoder.h" #include "srslte/fec/turbodecoder_gen.h"
#include "srslte/fec/turbodecoder_sse.h"
#include "srslte/fec/crc.h" #include "srslte/fec/crc.h"
#include "srslte/phch/pdsch_cfg.h" #include "srslte/phch/pdsch_cfg.h"
#include "srslte/phch/pusch_cfg.h" #include "srslte/phch/pusch_cfg.h"
@ -73,7 +74,7 @@ typedef struct SRSLTE_API {
uint32_t nof_ri_ack_bits; uint32_t nof_ri_ack_bits;
srslte_tcod_t encoder; srslte_tcod_t encoder;
srslte_tdec_t decoder; srslte_tdec_sse_t decoder;
srslte_crc_t crc_tb; srslte_crc_t crc_tb;
srslte_crc_t crc_cb; srslte_crc_t crc_cb;
@ -99,7 +100,7 @@ SRSLTE_API int srslte_dlsch_encode(srslte_sch_t *q,
SRSLTE_API int srslte_dlsch_decode(srslte_sch_t *q, SRSLTE_API int srslte_dlsch_decode(srslte_sch_t *q,
srslte_pdsch_cfg_t *cfg, srslte_pdsch_cfg_t *cfg,
srslte_softbuffer_rx_t *softbuffer, srslte_softbuffer_rx_t *softbuffer,
float *e_bits, int16_t *e_bits,
uint8_t *data); uint8_t *data);
SRSLTE_API int srslte_ulsch_encode(srslte_sch_t *q, SRSLTE_API int srslte_ulsch_encode(srslte_sch_t *q,
@ -120,7 +121,7 @@ SRSLTE_API int srslte_ulsch_uci_encode(srslte_sch_t *q,
SRSLTE_API int srslte_ulsch_decode(srslte_sch_t *q, SRSLTE_API int srslte_ulsch_decode(srslte_sch_t *q,
srslte_pusch_cfg_t *cfg, srslte_pusch_cfg_t *cfg,
srslte_softbuffer_rx_t *softbuffer, srslte_softbuffer_rx_t *softbuffer,
float *e_bits, int16_t *e_bits,
uint8_t *data); uint8_t *data);
SRSLTE_API float srslte_sch_beta_cqi(uint32_t I_cqi); SRSLTE_API float srslte_sch_beta_cqi(uint32_t I_cqi);

@ -65,6 +65,14 @@ SRSLTE_API void srslte_scrambling_f_offset(srslte_sequence_t *s,
int offset, int offset,
int len); int len);
SRSLTE_API void srslte_scrambling_s(srslte_sequence_t *s,
short *data);
SRSLTE_API void srslte_scrambling_s_offset(srslte_sequence_t *s,
short *data,
int offset,
int len);
SRSLTE_API void srslte_scrambling_c(srslte_sequence_t *s, SRSLTE_API void srslte_scrambling_c(srslte_sequence_t *s,
cf_t *data); cf_t *data);

@ -63,7 +63,8 @@
#include "srslte/fec/crc.h" #include "srslte/fec/crc.h"
#include "srslte/fec/tc_interl.h" #include "srslte/fec/tc_interl.h"
#include "srslte/fec/turbocoder.h" #include "srslte/fec/turbocoder.h"
#include "srslte/fec/turbodecoder.h" #include "srslte/fec/turbodecoder_sse.h"
#include "srslte/fec/turbodecoder_gen.h"
#include "srslte/fec/cbsegm.h" #include "srslte/fec/cbsegm.h"
#include "srslte/fec/rm_conv.h" #include "srslte/fec/rm_conv.h"
#include "srslte/fec/rm_turbo.h" #include "srslte/fec/rm_turbo.h"

@ -130,6 +130,7 @@ SRSLTE_API void srslte_vec_prod_conj_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len
/* real vector product (element-wise) */ /* real vector product (element-wise) */
SRSLTE_API void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len);
/* Dot-product */ /* Dot-product */
SRSLTE_API cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len);

@ -41,6 +41,8 @@ SRSLTE_API void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t l
SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len); SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len); SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len);

@ -85,6 +85,7 @@ int srslte_sequence_LTE_pr(srslte_sequence_t *q, uint32_t len, uint32_t seed) {
srslte_bit_pack_vector(q->c, q->c_bytes, len); srslte_bit_pack_vector(q->c, q->c_bytes, len);
for (int i=0;i<len;i++) { for (int i=0;i<len;i++) {
q->c_float[i] = (1-2*q->c[i]); q->c_float[i] = (1-2*q->c[i]);
q->c_short[i] = (int16_t) q->c_float[i];
} }
return SRSLTE_SUCCESS; return SRSLTE_SUCCESS;
} }
@ -98,13 +99,16 @@ int srslte_sequence_init(srslte_sequence_t *q, uint32_t len) {
if (q->c_float) { if (q->c_float) {
free(q->c_float); free(q->c_float);
} }
if (q->c_short) {
free(q->c_short);
}
} }
if (!q->c) { if (!q->c) {
q->c = srslte_vec_malloc(len * sizeof(uint8_t)); q->c = srslte_vec_malloc(len * sizeof(uint8_t));
if (!q->c) { if (!q->c) {
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
q->c_bytes = srslte_vec_malloc(len * sizeof(uint8_t)/8); q->c_bytes = srslte_vec_malloc(len * sizeof(uint8_t)/8+8);
if (!q->c_bytes) { if (!q->c_bytes) {
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
@ -112,6 +116,10 @@ int srslte_sequence_init(srslte_sequence_t *q, uint32_t len) {
if (!q->c_float) { if (!q->c_float) {
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
q->c_short = srslte_vec_malloc(len * sizeof(short));
if (!q->c_short) {
return SRSLTE_ERROR;
}
q->len = len; q->len = len;
} }
return SRSLTE_SUCCESS; return SRSLTE_SUCCESS;

@ -28,7 +28,7 @@
#include <strings.h> #include <strings.h>
#include <math.h> #include <math.h>
#include "srslte/fec/turbodecoder.h" #include "srslte/fec/turbodecoder_gen.h"
#include "srslte/fec/cbsegm.h" #include "srslte/fec/cbsegm.h"
#include "srslte/utils/debug.h" #include "srslte/utils/debug.h"

@ -272,7 +272,7 @@ int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity
} }
} }
int srslte_rm_turbo_rx_lut(float *input, float *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx) int srslte_rm_turbo_rx_lut(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
{ {
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) { if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12; uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;

@ -36,7 +36,7 @@
#include "srslte/common/phy_common.h" #include "srslte/common/phy_common.h"
#include "srslte/phch/ra.h" #include "srslte/phch/ra.h"
#include "srslte/fec/turbodecoder.h" #include "srslte/fec/turbodecoder_gen.h"
#include "srslte/fec/rm_turbo.h" #include "srslte/fec/rm_turbo.h"
#include "srslte/fec/softbuffer.h" #include "srslte/fec/softbuffer.h"
#include "srslte/utils/vector.h" #include "srslte/utils/vector.h"

@ -32,7 +32,7 @@
#include <strings.h> #include <strings.h>
#include <math.h> #include <math.h>
#include "srslte/fec/turbodecoder_vl.h" #include "srslte/fec/turbodecoder_gen.h"
#include "srslte/utils/vector.h" #include "srslte/utils/vector.h"
#define NUMSTATES 8 #define NUMSTATES 8
@ -49,14 +49,14 @@
* Decoder * Decoder
* *
************************************************/ ************************************************/
static void map_gen_beta(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_llr_t * parity, static void map_gen_beta(srslte_map_gen_vl_t * s, float * input, float * parity,
uint32_t long_cb) uint32_t long_cb)
{ {
srslte_llr_t m_b[8], new[8], old[8]; float m_b[8], new[8], old[8];
srslte_llr_t x, y, xy; float x, y, xy;
int k; int k;
uint32_t end = long_cb + SRSLTE_TCOD_RATE; uint32_t end = long_cb + SRSLTE_TCOD_RATE;
srslte_llr_t *beta = s->beta; float *beta = s->beta;
uint32_t i; uint32_t i;
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
@ -96,16 +96,16 @@ static void map_gen_beta(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_l
} }
} }
static void map_gen_alpha(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_llr_t * parity, srslte_llr_t * output, static void map_gen_alpha(srslte_map_gen_vl_t * s, float * input, float * parity, float * output,
uint32_t long_cb) uint32_t long_cb)
{ {
srslte_llr_t m_b[8], new[8], old[8], max1[8], max0[8]; float m_b[8], new[8], old[8], max1[8], max0[8];
srslte_llr_t m1, m0; float m1, m0;
srslte_llr_t x, y, xy; float x, y, xy;
srslte_llr_t out; float out;
uint32_t k; uint32_t k;
uint32_t end = long_cb; uint32_t end = long_cb;
srslte_llr_t *beta = s->beta; float *beta = s->beta;
uint32_t i; uint32_t i;
old[0] = 0; old[0] = 0;
@ -166,7 +166,7 @@ static void map_gen_alpha(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_
static int map_gen_init(srslte_map_gen_vl_t * h, int max_long_cb) static int map_gen_init(srslte_map_gen_vl_t * h, int max_long_cb)
{ {
bzero(h, sizeof(srslte_map_gen_vl_t)); bzero(h, sizeof(srslte_map_gen_vl_t));
h->beta = srslte_vec_malloc(sizeof(srslte_llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES); h->beta = srslte_vec_malloc(sizeof(float) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->beta) { if (!h->beta) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
return -1; return -1;
@ -183,7 +183,7 @@ static void map_gen_free(srslte_map_gen_vl_t * h)
bzero(h, sizeof(srslte_map_gen_vl_t)); bzero(h, sizeof(srslte_map_gen_vl_t));
} }
static void map_gen_dec(srslte_map_gen_vl_t * h, srslte_llr_t * input, srslte_llr_t * parity, srslte_llr_t * output, static void map_gen_dec(srslte_map_gen_vl_t * h, float * input, float * parity, float * output,
uint32_t long_cb) uint32_t long_cb)
{ {
uint32_t k; uint32_t k;
@ -201,35 +201,35 @@ static void map_gen_dec(srslte_map_gen_vl_t * h, srslte_llr_t * input, srslte_ll
* TURBO DECODER INTERFACE * TURBO DECODER INTERFACE
* *
************************************************/ ************************************************/
int srslte_tdec_vl_init(srslte_tdec_vl_t * h, uint32_t max_long_cb) int srslte_tdec_gen_init(srslte_tdec_gen_t * h, uint32_t max_long_cb)
{ {
int ret = -1; int ret = -1;
bzero(h, sizeof(srslte_tdec_vl_t)); bzero(h, sizeof(srslte_tdec_gen_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL; uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->max_long_cb = max_long_cb; h->max_long_cb = max_long_cb;
h->llr1 = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->llr1 = srslte_vec_malloc(sizeof(float) * len);
if (!h->llr1) { if (!h->llr1) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->llr2 = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->llr2 = srslte_vec_malloc(sizeof(float) * len);
if (!h->llr2) { if (!h->llr2) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->w = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->w = srslte_vec_malloc(sizeof(float) * len);
if (!h->w) { if (!h->w) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->syst = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->syst = srslte_vec_malloc(sizeof(float) * len);
if (!h->syst) { if (!h->syst) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->parity = srslte_vec_malloc(sizeof(srslte_llr_t) * len); h->parity = srslte_vec_malloc(sizeof(float) * len);
if (!h->parity) { if (!h->parity) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
@ -248,12 +248,12 @@ int srslte_tdec_vl_init(srslte_tdec_vl_t * h, uint32_t max_long_cb)
h->current_cbidx = -1; h->current_cbidx = -1;
ret = 0; ret = 0;
clean_and_exit:if (ret == -1) { clean_and_exit:if (ret == -1) {
srslte_tdec_vl_free(h); srslte_tdec_gen_free(h);
} }
return ret; return ret;
} }
void srslte_tdec_vl_free(srslte_tdec_vl_t * h) void srslte_tdec_gen_free(srslte_tdec_gen_t * h)
{ {
if (h->llr1) { if (h->llr1) {
free(h->llr1); free(h->llr1);
@ -277,10 +277,10 @@ void srslte_tdec_vl_free(srslte_tdec_vl_t * h)
srslte_tc_interl_free(&h->interleaver[i]); srslte_tc_interl_free(&h->interleaver[i]);
} }
bzero(h, sizeof(srslte_tdec_vl_t)); bzero(h, sizeof(srslte_tdec_gen_t));
} }
void srslte_tdec_vl_iteration(srslte_tdec_vl_t * h, srslte_llr_t * input, uint32_t long_cb) void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h, float * input, uint32_t long_cb)
{ {
uint32_t i; uint32_t i;
@ -323,18 +323,18 @@ void srslte_tdec_vl_iteration(srslte_tdec_vl_t * h, srslte_llr_t * input, uint32
h->w[i] += h->llr2[deinter[i]] - h->llr1[i]; h->w[i] += h->llr2[deinter[i]] - h->llr1[i];
} }
} else { } else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_vl_reset() first\n"); fprintf(stderr, "Error CB index not set (call srslte_tdec_gen_reset() first\n");
} }
} }
int srslte_tdec_vl_reset(srslte_tdec_vl_t * h, uint32_t long_cb) int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb)
{ {
if (long_cb > h->max_long_cb) { if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n", fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb); h->max_long_cb);
return -1; return -1;
} }
memset(h->w, 0, sizeof(srslte_llr_t) * long_cb); memset(h->w, 0, sizeof(float) * long_cb);
h->current_cbidx = srslte_cbsegm_cbindex(long_cb); h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) { if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb); fprintf(stderr, "Invalid CB length %d\n", long_cb);
@ -343,7 +343,7 @@ int srslte_tdec_vl_reset(srslte_tdec_vl_t * h, uint32_t long_cb)
return 0; return 0;
} }
void srslte_tdec_vl_decision(srslte_tdec_vl_t * h, uint8_t *output, uint32_t long_cb) void srslte_tdec_gen_decision(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb)
{ {
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse; uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
uint32_t i; uint32_t i;
@ -352,7 +352,7 @@ void srslte_tdec_vl_decision(srslte_tdec_vl_t * h, uint8_t *output, uint32_t lon
} }
} }
void srslte_tdec_vl_decision_byte(srslte_tdec_vl_t * h, uint8_t *output, uint32_t long_cb) void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb)
{ {
uint32_t i; uint32_t i;
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1}; uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
@ -373,21 +373,21 @@ void srslte_tdec_vl_decision_byte(srslte_tdec_vl_t * h, uint8_t *output, uint32_
} }
} }
int srslte_tdec_vl_run_all(srslte_tdec_vl_t * h, srslte_llr_t * input, uint8_t *output, int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h, float * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb) uint32_t nof_iterations, uint32_t long_cb)
{ {
uint32_t iter = 0; uint32_t iter = 0;
if (srslte_tdec_vl_reset(h, long_cb)) { if (srslte_tdec_gen_reset(h, long_cb)) {
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
do { do {
srslte_tdec_vl_iteration(h, input, long_cb); srslte_tdec_gen_iteration(h, input, long_cb);
iter++; iter++;
} while (iter < nof_iterations); } while (iter < nof_iterations);
srslte_tdec_vl_decision(h, output, long_cb); srslte_tdec_gen_decision(h, output, long_cb);
return SRSLTE_SUCCESS; return SRSLTE_SUCCESS;
} }

@ -32,7 +32,7 @@
#include <strings.h> #include <strings.h>
#include <math.h> #include <math.h>
#include "srslte/fec/turbodecoder.h" #include "srslte/fec/turbodecoder_sse.h"
#include "srslte/utils/vector.h" #include "srslte/utils/vector.h"
#include <inttypes.h> #include <inttypes.h>
@ -62,7 +62,7 @@ static inline int16_t hMax(__m128i buffer)
return (int16_t)(_mm_cvtsi128_si32(tmp3)); return (int16_t)(_mm_cvtsi128_si32(tmp3));
} }
void srslte_map_gen_beta(srslte_map_gen_t * s, llr_t * output, uint32_t long_cb) void srslte_map_gen_beta(srslte_map_gen_t * s, int16_t * output, uint32_t long_cb)
{ {
int k; int k;
uint32_t end = long_cb + 3; uint32_t end = long_cb + 3;
@ -101,7 +101,7 @@ void srslte_map_gen_beta(srslte_map_gen_t * s, llr_t * output, uint32_t long_cb)
shuf_g[1] = _mm_set_epi8(11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10); shuf_g[1] = _mm_set_epi8(11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10);
shuf_g[0] = _mm_set_epi8(15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14); shuf_g[0] = _mm_set_epi8(15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14);
__m128i gv; __m128i gv;
llr_t *b = &s->branch[2*long_cb-8]; int16_t *b = &s->branch[2*long_cb-8];
__m128i *gPtr = (__m128i*) b; __m128i *gPtr = (__m128i*) b;
__m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0); __m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
@ -119,8 +119,8 @@ void srslte_map_gen_beta(srslte_map_gen_t * s, llr_t * output, uint32_t long_cb)
bn = _mm_add_epi16(bn, alpha_k); output[k-d] = hMax(bn) - hMax(bp); bn = _mm_add_epi16(bn, alpha_k); output[k-d] = hMax(bn) - hMax(bp);
for (k=end-1; k>=long_cb; k--) { for (k=end-1; k>=long_cb; k--) {
llr_t g0 = s->branch[2*k]; int16_t g0 = s->branch[2*k];
llr_t g1 = s->branch[2*k+1]; int16_t g1 = s->branch[2*k+1];
g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1); g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1);
BETA_STEP(g); BETA_STEP(g);
@ -147,7 +147,7 @@ void srslte_map_gen_beta(srslte_map_gen_t * s, llr_t * output, uint32_t long_cb)
void srslte_map_gen_alpha(srslte_map_gen_t * s, uint32_t long_cb) void srslte_map_gen_alpha(srslte_map_gen_t * s, uint32_t long_cb)
{ {
uint32_t k; uint32_t k;
llr_t *alpha = s->alpha; int16_t *alpha = s->alpha;
uint32_t i; uint32_t i;
alpha[0] = 0; alpha[0] = 0;
@ -221,7 +221,7 @@ void srslte_map_gen_alpha(srslte_map_gen_t * s, uint32_t long_cb)
} }
} }
void srslte_map_gen_gamma(srslte_map_gen_t * h, llr_t *input, llr_t *app, llr_t *parity, uint32_t long_cb) void srslte_map_gen_gamma(srslte_map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb)
{ {
__m128i res10, res20, res11, res21, res1, res2; __m128i res10, res20, res11, res21, res1, res2;
__m128i in, ap, pa, g1, g0; __m128i in, ap, pa, g1, g0;
@ -278,12 +278,12 @@ void srslte_map_gen_gamma(srslte_map_gen_t * h, llr_t *input, llr_t *app, llr_t
int srslte_map_gen_init(srslte_map_gen_t * h, int max_long_cb) int srslte_map_gen_init(srslte_map_gen_t * h, int max_long_cb)
{ {
bzero(h, sizeof(srslte_map_gen_t)); bzero(h, sizeof(srslte_map_gen_t));
h->alpha = srslte_vec_malloc(sizeof(llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES); h->alpha = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->alpha) { if (!h->alpha) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
return -1; return -1;
} }
h->branch = srslte_vec_malloc(sizeof(llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES); h->branch = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->branch) { if (!h->branch) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
return -1; return -1;
@ -303,7 +303,7 @@ void srslte_map_gen_free(srslte_map_gen_t * h)
bzero(h, sizeof(srslte_map_gen_t)); bzero(h, sizeof(srslte_map_gen_t));
} }
void srslte_map_gen_dec(srslte_map_gen_t * h, llr_t * input, llr_t *app, llr_t * parity, llr_t * output, void srslte_map_gen_dec(srslte_map_gen_t * h, int16_t * input, int16_t *app, int16_t * parity, int16_t * output,
uint32_t long_cb) uint32_t long_cb)
{ {
@ -323,45 +323,45 @@ void srslte_map_gen_dec(srslte_map_gen_t * h, llr_t * input, llr_t *app, llr_t *
* TURBO DECODER INTERFACE * TURBO DECODER INTERFACE
* *
************************************************/ ************************************************/
int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) int srslte_tdec_sse_init(srslte_tdec_sse_t * h, uint32_t max_long_cb)
{ {
int ret = -1; int ret = -1;
bzero(h, sizeof(srslte_tdec_t)); bzero(h, sizeof(srslte_tdec_sse_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL; uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->max_long_cb = max_long_cb; h->max_long_cb = max_long_cb;
h->app1 = srslte_vec_malloc(sizeof(llr_t) * len); h->app1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app1) { if (!h->app1) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->app2 = srslte_vec_malloc(sizeof(llr_t) * len); h->app2 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app2) { if (!h->app2) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->ext1 = srslte_vec_malloc(sizeof(llr_t) * len); h->ext1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext1) { if (!h->ext1) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->ext2 = srslte_vec_malloc(sizeof(llr_t) * len); h->ext2 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext2) { if (!h->ext2) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->syst = srslte_vec_malloc(sizeof(llr_t) * len); h->syst = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->syst) { if (!h->syst) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->parity0 = srslte_vec_malloc(sizeof(llr_t) * len); h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity0) { if (!h->parity0) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
} }
h->parity1 = srslte_vec_malloc(sizeof(llr_t) * len); h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity1) { if (!h->parity1) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
goto clean_and_exit; goto clean_and_exit;
@ -380,12 +380,12 @@ int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb)
h->current_cbidx = -1; h->current_cbidx = -1;
ret = 0; ret = 0;
clean_and_exit:if (ret == -1) { clean_and_exit:if (ret == -1) {
srslte_tdec_free(h); srslte_tdec_sse_free(h);
} }
return ret; return ret;
} }
void srslte_tdec_free(srslte_tdec_t * h) void srslte_tdec_sse_free(srslte_tdec_sse_t * h)
{ {
if (h->app1) { if (h->app1) {
free(h->app1); free(h->app1);
@ -415,10 +415,10 @@ void srslte_tdec_free(srslte_tdec_t * h)
srslte_tc_interl_free(&h->interleaver[i]); srslte_tc_interl_free(&h->interleaver[i]);
} }
bzero(h, sizeof(srslte_tdec_t)); bzero(h, sizeof(srslte_tdec_sse_t));
} }
void deinterleave_input(srslte_tdec_t *h, short *input, uint32_t long_cb) { void deinterleave_input(srslte_tdec_sse_t *h, int16_t *input, uint32_t long_cb) {
uint32_t i; uint32_t i;
__m128i *inputPtr = (__m128i*) input; __m128i *inputPtr = (__m128i*) input;
@ -502,7 +502,7 @@ void deinterleave_input(srslte_tdec_t *h, short *input, uint32_t long_cb) {
} }
void srslte_tdec_iteration(srslte_tdec_t * h, short * input, uint32_t long_cb) void srslte_tdec_sse_iteration(srslte_tdec_sse_t * h, int16_t * input, uint32_t long_cb)
{ {
if (h->current_cbidx >= 0) { if (h->current_cbidx >= 0) {
@ -541,11 +541,11 @@ void srslte_tdec_iteration(srslte_tdec_t * h, short * input, uint32_t long_cb)
h->n_iter++; h->n_iter++;
} else { } else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_reset() first\n"); fprintf(stderr, "Error CB index not set (call srslte_tdec_sse_reset() first\n");
} }
} }
int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb) int srslte_tdec_sse_reset(srslte_tdec_sse_t * h, uint32_t long_cb)
{ {
if (long_cb > h->max_long_cb) { if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n", fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
@ -561,7 +561,7 @@ int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb)
return 0; return 0;
} }
void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) void srslte_tdec_sse_decision(srslte_tdec_sse_t * h, uint8_t *output, uint32_t long_cb)
{ {
__m128i zero = _mm_set1_epi16(0); __m128i zero = _mm_set1_epi16(0);
__m128i lsb_mask = _mm_set1_epi16(1); __m128i lsb_mask = _mm_set1_epi16(1);
@ -587,7 +587,7 @@ void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb)
} }
} }
void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) void srslte_tdec_sse_decision_byte(srslte_tdec_sse_t * h, uint8_t *output, uint32_t long_cb)
{ {
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1}; uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
@ -606,18 +606,18 @@ void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long
} }
} }
int srslte_tdec_run_all(srslte_tdec_t * h, short * input, uint8_t *output, int srslte_tdec_sse_run_all(srslte_tdec_sse_t * h, int16_t * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb) uint32_t nof_iterations, uint32_t long_cb)
{ {
if (srslte_tdec_reset(h, long_cb)) { if (srslte_tdec_sse_reset(h, long_cb)) {
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
do { do {
srslte_tdec_iteration(h, input, long_cb); srslte_tdec_sse_iteration(h, input, long_cb);
} while (h->n_iter < nof_iterations); } while (h->n_iter < nof_iterations);
srslte_tdec_decision_byte(h, output, long_cb); srslte_tdec_sse_decision_byte(h, output, long_cb);
return SRSLTE_SUCCESS; return SRSLTE_SUCCESS;
} }

@ -50,7 +50,7 @@ uint8_t bits[3*6144+12];
uint8_t buff_b[BUFFSZ]; uint8_t buff_b[BUFFSZ];
float buff_f[BUFFSZ]; float buff_f[BUFFSZ];
float bits_f[3*6144+12]; float bits_f[3*6144+12];
float bits2_f[3*6144+12]; short bits2_s[3*6144+12];
void usage(char *prog) { void usage(char *prog) {
printf("Usage: %s -c cb_idx -e nof_e_bits [-i rv_idx]\n", prog); printf("Usage: %s -c cb_idx -e nof_e_bits [-i rv_idx]\n", prog);
@ -84,18 +84,24 @@ void parse_args(int argc, char **argv) {
int main(int argc, char **argv) { int main(int argc, char **argv) {
int i; int i;
uint8_t *rm_bits, *rm_bits2, *rm_bits2_bytes; uint8_t *rm_bits, *rm_bits2, *rm_bits2_bytes;
short *rm_bits_s;
float *rm_bits_f; float *rm_bits_f;
parse_args(argc, argv); parse_args(argc, argv);
srslte_rm_turbo_gentables(); srslte_rm_turbo_gentables();
rm_bits_f = malloc(sizeof(float) * nof_e_bits); rm_bits_s = srslte_vec_malloc(sizeof(short) * nof_e_bits);
if (!rm_bits_s) {
perror("malloc");
exit(-1);
}
rm_bits_f = srslte_vec_malloc(sizeof(float) * nof_e_bits);
if (!rm_bits_f) { if (!rm_bits_f) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
} }
rm_bits = malloc(sizeof(uint8_t) * nof_e_bits); rm_bits = srslte_vec_malloc(sizeof(uint8_t) * nof_e_bits);
if (!rm_bits) { if (!rm_bits) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
@ -171,18 +177,18 @@ int main(int argc, char **argv) {
for (int i=0;i<nof_e_bits;i++) { for (int i=0;i<nof_e_bits;i++) {
rm_bits_f[i] = rand()%10-5; rm_bits_f[i] = rand()%10-5;
rm_bits_s[i] = (short) rm_bits_f[i];
} }
bzero(buff_f, BUFFSZ*sizeof(float)); bzero(buff_f, BUFFSZ*sizeof(float));
srslte_rm_turbo_rx(buff_f, BUFFSZ, rm_bits_f, nof_e_bits, bits_f, long_cb_enc, rv_idx, 0); srslte_rm_turbo_rx(buff_f, BUFFSZ, rm_bits_f, nof_e_bits, bits_f, long_cb_enc, rv_idx, 0);
bzero(bits2_f, long_cb_enc*sizeof(float)); bzero(bits2_s, long_cb_enc*sizeof(short));
srslte_rm_turbo_rx_lut(rm_bits_f, bits2_f, nof_e_bits, cb_idx, rv_idx); srslte_rm_turbo_rx_lut(rm_bits_s, bits2_s, nof_e_bits, cb_idx, rv_idx);
for (int i=0;i<long_cb_enc;i++) { for (int i=0;i<long_cb_enc;i++) {
if (bits_f[i] != bits2_f[i]) { if (bits_f[i] != bits2_s[i]) {
printf("error RX in bit %d %f!=%f\n", i, bits_f[i], bits2_f[i]); printf("error RX in bit %d %f!=%d\n", i, bits_f[i], bits2_s[i]);
exit(-1); exit(-1);
} }
} }

@ -36,7 +36,6 @@
#include <sys/time.h> #include <sys/time.h>
#include <time.h> #include <time.h>
#include "srslte/srslte.h" #include "srslte/srslte.h"
#include "srslte/fec/turbodecoder_vl.h"
#include "turbodecoder_test.h" #include "turbodecoder_test.h"
@ -119,12 +118,12 @@ int main(int argc, char **argv) {
float var[SNR_POINTS]; float var[SNR_POINTS];
uint32_t snr_points; uint32_t snr_points;
uint32_t errors; uint32_t errors;
uint32_t errors_vl; uint32_t errors_gen;
uint32_t coded_length; uint32_t coded_length;
struct timeval tdata[3]; struct timeval tdata[3];
float mean_usec, mean_usec_vl; float mean_usec, mean_usec_gen;
srslte_tdec_t tdec; srslte_tdec_sse_t tdec;
srslte_tdec_vl_t tdec_vl; srslte_tdec_gen_t tdec_gen;
srslte_tcod_t tcod; srslte_tcod_t tcod;
parse_args(argc, argv); parse_args(argc, argv);
@ -190,12 +189,12 @@ int main(int argc, char **argv) {
exit(-1); exit(-1);
} }
if (srslte_tdec_init(&tdec, frame_length)) { if (srslte_tdec_sse_init(&tdec, frame_length)) {
fprintf(stderr, "Error initiating Turbo decoder\n"); fprintf(stderr, "Error initiating Turbo decoder\n");
exit(-1); exit(-1);
} }
if (srslte_tdec_vl_init(&tdec_vl, frame_length)) { if (srslte_tdec_gen_init(&tdec_gen, frame_length)) {
fprintf(stderr, "Error initiating Turbo decoder\n"); fprintf(stderr, "Error initiating Turbo decoder\n");
exit(-1); exit(-1);
} }
@ -217,9 +216,9 @@ int main(int argc, char **argv) {
for (i = 0; i < snr_points; i++) { for (i = 0; i < snr_points; i++) {
mean_usec = 0; mean_usec = 0;
mean_usec_vl = 0; mean_usec_gen = 0;
errors = 0; errors = 0;
errors_vl = 0; errors_gen = 0;
frame_cnt = 0; frame_cnt = 0;
while (frame_cnt < nof_frames) { while (frame_cnt < nof_frames) {
/* generate data_tx */ /* generate data_tx */
@ -250,8 +249,8 @@ int main(int argc, char **argv) {
llr_s[j] = (int16_t) (100*llr[j]); llr_s[j] = (int16_t) (100*llr[j]);
} }
/* decoder */ /* decoder */
srslte_tdec_reset(&tdec, frame_length); srslte_tdec_sse_reset(&tdec, frame_length);
srslte_tdec_vl_reset(&tdec_vl, frame_length); srslte_tdec_gen_reset(&tdec_gen, frame_length);
uint32_t t; uint32_t t;
if (nof_iterations == -1) { if (nof_iterations == -1) {
@ -262,7 +261,7 @@ int main(int argc, char **argv) {
gettimeofday(&tdata[1], NULL); gettimeofday(&tdata[1], NULL);
for (int k=0;k<nof_repetitions;k++) { for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length); srslte_tdec_sse_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length);
} }
gettimeofday(&tdata[2], NULL); gettimeofday(&tdata[2], NULL);
get_time_interval(tdata); get_time_interval(tdata);
@ -274,21 +273,21 @@ int main(int argc, char **argv) {
gettimeofday(&tdata[1], NULL); gettimeofday(&tdata[1], NULL);
for (int k=0;k<nof_repetitions;k++) { for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_vl_run_all(&tdec_vl, llr, data_rx, t, frame_length); srslte_tdec_gen_run_all(&tdec_gen, llr, data_rx, t, frame_length);
} }
gettimeofday(&tdata[2], NULL); gettimeofday(&tdata[2], NULL);
get_time_interval(tdata); get_time_interval(tdata);
mean_usec_vl = (float) mean_usec_vl * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1; mean_usec_gen = (float) mean_usec_gen * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
/* check errors */ /* check errors */
errors_vl += srslte_bit_diff(data_tx, data_rx, frame_length); errors_gen += srslte_bit_diff(data_tx, data_rx, frame_length);
frame_cnt++; frame_cnt++;
printf("Eb/No: %2.2f %10d/%d ", SNR_MIN + i * ebno_inc, frame_cnt, nof_frames); printf("Eb/No: %2.2f %10d/%d ", SNR_MIN + i * ebno_inc, frame_cnt, nof_frames);
printf("BER: %.2e ", (float) errors / (frame_cnt * frame_length)); printf("BER: %.2e ", (float) errors / (frame_cnt * frame_length));
printf("BER_vl: %.2e ", (float) errors_vl / (frame_cnt * frame_length)); printf("BER_gen: %.2e ", (float) errors_gen / (frame_cnt * frame_length));
printf("%3.1f Mbps (%6.2f usec) -- vl: ", (float) frame_length / mean_usec, mean_usec); printf("%3.1f Mbps (%6.2f usec) -- gen: ", (float) frame_length / mean_usec, mean_usec);
printf("%3.1f Mbps (%6.2f usec)", (float) frame_length / mean_usec_vl, mean_usec_vl); printf("%3.1f Mbps (%6.2f usec)", (float) frame_length / mean_usec_gen, mean_usec_gen);
printf("\r"); printf("\r");
} }
@ -298,10 +297,10 @@ int main(int argc, char **argv) {
printf("\n"); printf("\n");
if (snr_points == 1) { if (snr_points == 1) {
if (errors) { if (errors) {
printf("%d Errors\n", errors); printf("%d Errors in SSE\n", errors);
} }
if (errors_vl) { if (errors_gen) {
printf("%d Errors in VL\n", errors_vl); printf("%d Errors in GEN\n", errors_gen);
} }
} }
@ -312,7 +311,8 @@ int main(int argc, char **argv) {
free(llr_c); free(llr_c);
free(data_rx); free(data_rx);
srslte_tdec_free(&tdec); srslte_tdec_sse_free(&tdec);
srslte_tdec_gen_free(&tdec_gen);
srslte_tcod_free(&tcod); srslte_tcod_free(&tcod);
printf("\n"); printf("\n");

@ -47,7 +47,7 @@ void help()
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{ {
srslte_tdec_t tdec; srslte_tdec_gen_t tdec;
float *input_llr; float *input_llr;
uint8_t *output_data; uint8_t *output_data;
uint32_t nof_bits; uint32_t nof_bits;
@ -86,18 +86,18 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
// allocate memory for output bits // allocate memory for output bits
output_data = srslte_vec_malloc(nof_bits * sizeof(uint8_t)); output_data = srslte_vec_malloc(nof_bits * sizeof(uint8_t));
if (srslte_tdec_init(&tdec, nof_bits)) { if (srslte_tdec_gen_init(&tdec, nof_bits)) {
mexErrMsgTxt("Error initiating Turbo decoder\n"); mexErrMsgTxt("Error initiating Turbo decoder\n");
return; return;
} }
srslte_tdec_run_all(&tdec, input_llr, output_data, nof_iterations, nof_bits); srslte_tdec_gen_run_all(&tdec, input_llr, output_data, nof_iterations, nof_bits);
if (nlhs >= 1) { if (nlhs >= 1) {
mexutils_write_uint8(output_data, &plhs[0], nof_bits, 1); mexutils_write_uint8(output_data, &plhs[0], nof_bits, 1);
} }
srslte_tdec_free(&tdec); srslte_tdec_gen_free(&tdec);
free(input_llr); free(input_llr);
free(output_data); free(output_data);

@ -58,7 +58,6 @@ void demod_bpsk_lte(const cf_t *symbols, float *llr, int nsymbols) {
} }
void demod_qpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) { void demod_qpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
srslte_vec_fprint_f(stdout, (float*) symbols, nsymbols*2);
srslte_vec_convert_fi((float*) symbols, llr, -SCALE_SHORT_CONV*sqrt(2), nsymbols*2); srslte_vec_convert_fi((float*) symbols, llr, -SCALE_SHORT_CONV*sqrt(2), nsymbols*2);
} }

@ -229,8 +229,8 @@ int srslte_pdsch_init(srslte_pdsch_t *q, srslte_cell_t cell) {
q->rnti_is_set = false; q->rnti_is_set = false;
// Allocate floats for reception (LLRs) // Allocate int16_t for reception (LLRs)
q->e = srslte_vec_malloc(sizeof(float) * q->max_re * srslte_mod_bits_x_symbol(SRSLTE_MOD_64QAM)); q->e = srslte_vec_malloc(sizeof(int16_t) * q->max_re * srslte_mod_bits_x_symbol(SRSLTE_MOD_64QAM));
if (!q->e) { if (!q->e) {
goto clean; goto clean;
} }
@ -417,7 +417,7 @@ int srslte_pdsch_decode_rnti(srslte_pdsch_t *q,
* The MAX-log-MAP algorithm used in turbo decoding is unsensitive to SNR estimation, * The MAX-log-MAP algorithm used in turbo decoding is unsensitive to SNR estimation,
* thus we don't need tot set it in the LLRs normalization * thus we don't need tot set it in the LLRs normalization
*/ */
srslte_demod_soft_demodulate(cfg->grant.mcs.mod, q->d, q->e, cfg->nbits.nof_re); srslte_demod_soft_demodulate_s(cfg->grant.mcs.mod, q->d, q->e, cfg->nbits.nof_re);
/* descramble */ /* descramble */
if (rnti != q->rnti) { if (rnti != q->rnti) {
@ -425,10 +425,10 @@ int srslte_pdsch_decode_rnti(srslte_pdsch_t *q,
if (srslte_sequence_pdsch(&seq, rnti, 0, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { if (srslte_sequence_pdsch(&seq, rnti, 0, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) {
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
srslte_scrambling_f_offset(&seq, q->e, 0, cfg->nbits.nof_bits); srslte_scrambling_s_offset(&seq, q->e, 0, cfg->nbits.nof_bits);
srslte_sequence_free(&seq); srslte_sequence_free(&seq);
} else { } else {
srslte_scrambling_f_offset(&q->seq[cfg->sf_idx], q->e, 0, cfg->nbits.nof_bits); srslte_scrambling_s_offset(&q->seq[cfg->sf_idx], q->e, 0, cfg->nbits.nof_bits);
} }
return srslte_dlsch_decode(&q->dl_sch, cfg, softbuffer, q->e, data); return srslte_dlsch_decode(&q->dl_sch, cfg, softbuffer, q->e, data);

@ -109,14 +109,14 @@ int srslte_sch_init(srslte_sch_t *q) {
fprintf(stderr, "Error initiating Turbo Coder\n"); fprintf(stderr, "Error initiating Turbo Coder\n");
goto clean; goto clean;
} }
if (srslte_tdec_init(&q->decoder, SRSLTE_TCOD_MAX_LEN_CB)) { if (srslte_tdec_sse_init(&q->decoder, SRSLTE_TCOD_MAX_LEN_CB)) {
fprintf(stderr, "Error initiating Turbo Decoder\n"); fprintf(stderr, "Error initiating Turbo Decoder\n");
goto clean; goto clean;
} }
srslte_rm_turbo_gentables(); srslte_rm_turbo_gentables();
// Allocate floats for reception (LLRs) // Allocate int16 for reception (LLRs)
q->cb_in = srslte_vec_malloc(sizeof(uint8_t) * (SRSLTE_TCOD_MAX_LEN_CB+8)/8); q->cb_in = srslte_vec_malloc(sizeof(uint8_t) * (SRSLTE_TCOD_MAX_LEN_CB+8)/8);
if (!q->cb_in) { if (!q->cb_in) {
goto clean; goto clean;
@ -161,7 +161,7 @@ void srslte_sch_free(srslte_sch_t *q) {
if (q->ul_interleaver) { if (q->ul_interleaver) {
free(q->ul_interleaver); free(q->ul_interleaver);
} }
srslte_tdec_free(&q->decoder); srslte_tdec_sse_free(&q->decoder);
srslte_tcod_free(&q->encoder); srslte_tcod_free(&q->encoder);
srslte_uci_cqi_free(&q->uci_cqi); srslte_uci_cqi_free(&q->uci_cqi);
bzero(q, sizeof(srslte_sch_t)); bzero(q, sizeof(srslte_sch_t));
@ -329,7 +329,7 @@ static int encode_tb(srslte_sch_t *q,
static int decode_tb(srslte_sch_t *q, static int decode_tb(srslte_sch_t *q,
srslte_softbuffer_rx_t *softbuffer, srslte_cbsegm_t *cb_segm, srslte_softbuffer_rx_t *softbuffer, srslte_cbsegm_t *cb_segm,
uint32_t Qm, uint32_t rv, uint32_t nof_e_bits, uint32_t Qm, uint32_t rv, uint32_t nof_e_bits,
float *e_bits, uint8_t *data) int16_t *e_bits, uint8_t *data)
{ {
uint8_t parity[3] = {0, 0, 0}; uint8_t parity[3] = {0, 0, 0};
uint32_t par_rx, par_tx; uint32_t par_rx, par_tx;
@ -408,7 +408,7 @@ static int decode_tb(srslte_sch_t *q,
if (SRSLTE_VERBOSE_ISDEBUG()) { if (SRSLTE_VERBOSE_ISDEBUG()) {
DEBUG("CB#%d RMOUT: ", i); DEBUG("CB#%d RMOUT: ", i);
srslte_vec_fprint_f(stdout, softbuffer->buffer_f[i], 3*cb_len+12); srslte_vec_fprint_s(stdout, softbuffer->buffer_f[i], 3*cb_len+12);
} }
/* Turbo Decoding with CRC-based early stopping */ /* Turbo Decoding with CRC-based early stopping */
@ -418,10 +418,10 @@ static int decode_tb(srslte_sch_t *q,
srslte_crc_t *crc_ptr; srslte_crc_t *crc_ptr;
early_stop = false; early_stop = false;
srslte_tdec_reset(&q->decoder, cb_len); srslte_tdec_sse_reset(&q->decoder, cb_len);
do { do {
srslte_tdec_iteration(&q->decoder, softbuffer->buffer_f[i], cb_len); srslte_tdec_sse_iteration(&q->decoder, softbuffer->buffer_f[i], cb_len);
q->nof_iterations++; q->nof_iterations++;
if (cb_segm->C > 1) { if (cb_segm->C > 1) {
@ -434,7 +434,7 @@ static int decode_tb(srslte_sch_t *q,
crc_ptr = &q->crc_tb; crc_ptr = &q->crc_tb;
} }
srslte_tdec_decision_byte(&q->decoder, q->cb_in, cb_len); srslte_tdec_sse_decision_byte(&q->decoder, q->cb_in, cb_len);
/* Check Codeblock CRC and stop early if incorrect */ /* Check Codeblock CRC and stop early if incorrect */
if (!srslte_crc_checksum_byte(crc_ptr, cb_in_ptr, len_crc)) { if (!srslte_crc_checksum_byte(crc_ptr, cb_in_ptr, len_crc)) {
@ -504,7 +504,7 @@ static int decode_tb(srslte_sch_t *q,
} }
int srslte_dlsch_decode(srslte_sch_t *q, srslte_pdsch_cfg_t *cfg, srslte_softbuffer_rx_t *softbuffer, int srslte_dlsch_decode(srslte_sch_t *q, srslte_pdsch_cfg_t *cfg, srslte_softbuffer_rx_t *softbuffer,
float *e_bits, uint8_t *data) int16_t *e_bits, uint8_t *data)
{ {
return decode_tb(q, return decode_tb(q,
softbuffer, &cfg->cb_segm, softbuffer, &cfg->cb_segm,
@ -522,7 +522,7 @@ int srslte_dlsch_encode(srslte_sch_t *q, srslte_pdsch_cfg_t *cfg, srslte_softbuf
} }
int srslte_ulsch_decode(srslte_sch_t *q, srslte_pusch_cfg_t *cfg, srslte_softbuffer_rx_t *softbuffer, int srslte_ulsch_decode(srslte_sch_t *q, srslte_pusch_cfg_t *cfg, srslte_softbuffer_rx_t *softbuffer,
float *e_bits, uint8_t *data) int16_t *e_bits, uint8_t *data)
{ {
return decode_tb(q, return decode_tb(q,
softbuffer, &cfg->cb_segm, softbuffer, &cfg->cb_segm,

@ -93,7 +93,6 @@ BuildMex(MEXNAME pdcch SOURCES pdcch_test_mex.c LIBRARIES srslte srslte_mex)
ADD_EXECUTABLE(pdsch_test pdsch_test.c) ADD_EXECUTABLE(pdsch_test pdsch_test.c)
TARGET_LINK_LIBRARIES(pdsch_test srslte) TARGET_LINK_LIBRARIES(pdsch_test srslte)
ADD_TEST(pdsch_test_bpsk pdsch_test -m 0 -n 50 -r 2)
ADD_TEST(pdsch_test_qpsk pdsch_test -m 10 -n 50 -r 1) ADD_TEST(pdsch_test_qpsk pdsch_test -m 10 -n 50 -r 1)
ADD_TEST(pdsch_test_qam16 pdsch_test -m 20 -n 100) ADD_TEST(pdsch_test_qam16 pdsch_test -m 20 -n 100)
ADD_TEST(pdsch_test_qam64 pdsch_test -m 28 -n 100 -r 0) ADD_TEST(pdsch_test_qam64 pdsch_test -m 28 -n 100 -r 0)

@ -162,7 +162,7 @@ int main(int argc, char **argv) {
} }
} }
data = malloc(sizeof(uint8_t) * grant.mcs.tbs/8); data = srslte_vec_malloc(sizeof(uint8_t) * grant.mcs.tbs/8);
if (!data) { if (!data) {
perror("malloc"); perror("malloc");
goto quit; goto quit;

@ -41,6 +41,15 @@ void srslte_scrambling_f_offset(srslte_sequence_t *s, float *data, int offset, i
srslte_vec_prod_fff(data, &s->c_float[offset], data, len); srslte_vec_prod_fff(data, &s->c_float[offset], data, len);
} }
void srslte_scrambling_s(srslte_sequence_t *s, short *data) {
srslte_scrambling_s_offset(s, data, 0, s->len);
}
void srslte_scrambling_s_offset(srslte_sequence_t *s, short *data, int offset, int len) {
assert (len + offset <= s->len);
srslte_vec_prod_sss(data, &s->c_short[offset], data, len);
}
void srslte_scrambling_c(srslte_sequence_t *s, cf_t *data) { void srslte_scrambling_c(srslte_sequence_t *s, cf_t *data) {
srslte_scrambling_c_offset(s, data, 0, s->len); srslte_scrambling_c_offset(s, data, 0, s->len);
} }

@ -457,6 +457,17 @@ void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
#endif #endif
} }
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*y[i];
}
#else
srslte_vec_prod_sss_simd(x,y,z,len);
#endif
}
void srslte_vec_prod_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) { void srslte_vec_prod_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT2_FUNCTION #ifndef HAVE_VOLK_MULT2_FUNCTION
int i; int i;

@ -107,6 +107,36 @@ void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len)
} }
} }
void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len)
{
unsigned int number = 0;
const unsigned int points = len / 8;
const __m128i* xPtr = (const __m128i*) x;
const __m128i* yPtr = (const __m128i*) y;
__m128i* zPtr = (__m128i*) z;
__m128i xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
yVal = _mm_load_si128(yPtr);
zVal = _mm_mullo_epi16(xVal, yVal);
_mm_store_si128(zPtr, zVal);
xPtr ++;
yPtr ++;
zPtr ++;
}
number = points * 8;
for(;number < len; number++){
z[number] = x[number] * y[number];
}
}
void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len) void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
{ {
unsigned int number = 0; unsigned int number = 0;
@ -190,9 +220,6 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
intInputVal1 = _mm_cvtps_epi32(ret1); intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2); intInputVal2 = _mm_cvtps_epi32(ret2);
printf("intinput: "); print128_num(intInputVal1);
printf("intinput2: "); print128_num(intInputVal2);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1); _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);

Loading…
Cancel
Save