Integrated SSE decoder into pdsch receiver

master
ismagom 9 years ago
parent f2b40c57ae
commit f735268202

@ -412,7 +412,7 @@ int update_control() {
}
}
#define DATA_BUFF_SZ 1000
#define DATA_BUFF_SZ 1024*128
uint8_t data[8*DATA_BUFF_SZ], data_unpacked[DATA_BUFF_SZ];
uint8_t data_tmp[DATA_BUFF_SZ];
@ -511,9 +511,9 @@ int main(int argc, char **argv) {
sigaddset(&sigset, SIGINT);
sigprocmask(SIG_UNBLOCK, &sigset, NULL);
signal(SIGINT, sig_int_handler);
cuhd_set_master_clock_rate(uhd, 30.72e6);
if (!output_file_name) {
cuhd_set_master_clock_rate(uhd, 30.72e6);
printf("Set TX rate: %.2f MHz\n",
cuhd_set_tx_srate(uhd, srslte_sampling_freq_hz(cell.nof_prb)) / 1000000);
printf("Set TX gain: %.1f dB\n", cuhd_set_tx_gain(uhd, uhd_gain));

@ -44,6 +44,7 @@ typedef struct SRSLTE_API {
uint8_t *c;
uint8_t *c_bytes;
float *c_float;
short *c_short;
uint32_t len;
} srslte_sequence_t;

@ -77,8 +77,8 @@ SRSLTE_API int srslte_rm_turbo_rx(float *w_buff,
uint32_t rv_idx,
uint32_t nof_filler_bits);
SRSLTE_API int srslte_rm_turbo_rx_lut(float *input,
float *output,
SRSLTE_API int srslte_rm_turbo_rx_lut(int16_t *input,
int16_t *output,
uint32_t in_len,
uint32_t cb_idx,
uint32_t rv_idx);

@ -42,7 +42,7 @@
typedef struct SRSLTE_API {
uint32_t max_cb;
float **buffer_f;
int16_t **buffer_f;
} srslte_softbuffer_rx_t;
typedef struct SRSLTE_API {

@ -1,104 +0,0 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 The srsLTE Developers. See the
* COPYRIGHT file at the top-level directory of this distribution.
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/**********************************************************************************************
* File: turbodecoder.h
*
* Description: Turbo Decoder.
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
* encoders and one turbo code internal interleaver. The coding rate of turbo
* encoder is 1/3.
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
*
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef TURBODECODER_
#define TURBODECODER_
#include "srslte/config.h"
#include "srslte/fec/tc_interl.h"
#include "srslte/fec/cbsegm.h"
#define SRSLTE_TCOD_RATE 3
#define SRSLTE_TCOD_TOTALTAIL 12
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef short llr_t;
typedef struct SRSLTE_API {
int max_long_cb;
llr_t *alpha;
llr_t *branch;
} srslte_map_gen_t;
typedef struct SRSLTE_API {
int max_long_cb;
srslte_map_gen_t dec;
llr_t *app1;
llr_t *app2;
llr_t *ext1;
llr_t *ext2;
llr_t *syst;
llr_t *parity0;
llr_t *parity1;
int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter;
} srslte_tdec_t;
SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h);
SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb);
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
llr_t * input,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
llr_t * input,
uint8_t *output,
uint32_t nof_iterations,
uint32_t long_cb);
#endif

@ -37,8 +37,8 @@
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef TURBODECODER_VL_
#define TURBODECODER_VL_
#ifndef TURBODECODER_GEN_
#define TURBODECODER_GEN_
#include "srslte/config.h"
#include "srslte/fec/tc_interl.h"
@ -50,11 +50,9 @@
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef float srslte_llr_t;
typedef struct SRSLTE_API {
int max_long_cb;
srslte_llr_t *beta;
float *beta;
} srslte_map_gen_vl_t;
typedef struct SRSLTE_API {
@ -62,37 +60,37 @@ typedef struct SRSLTE_API {
srslte_map_gen_vl_t dec;
srslte_llr_t *llr1;
srslte_llr_t *llr2;
srslte_llr_t *w;
srslte_llr_t *syst;
srslte_llr_t *parity;
float *llr1;
float *llr2;
float *w;
float *syst;
float *parity;
int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
} srslte_tdec_vl_t;
} srslte_tdec_gen_t;
SRSLTE_API int srslte_tdec_vl_init(srslte_tdec_vl_t * h,
SRSLTE_API int srslte_tdec_gen_init(srslte_tdec_gen_t * h,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_vl_free(srslte_tdec_vl_t * h);
SRSLTE_API void srslte_tdec_gen_free(srslte_tdec_gen_t * h);
SRSLTE_API int srslte_tdec_vl_reset(srslte_tdec_vl_t * h, uint32_t long_cb);
SRSLTE_API int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb);
SRSLTE_API void srslte_tdec_vl_iteration(srslte_tdec_vl_t * h,
srslte_llr_t * input,
SRSLTE_API void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h,
float * input,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_vl_decision(srslte_tdec_vl_t * h,
SRSLTE_API void srslte_tdec_gen_decision(srslte_tdec_gen_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_vl_decision_byte(srslte_tdec_vl_t * h,
SRSLTE_API void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_vl_run_all(srslte_tdec_vl_t * h,
srslte_llr_t * input,
SRSLTE_API int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h,
float * input,
uint8_t *output,
uint32_t nof_iterations,
uint32_t long_cb);

@ -40,7 +40,8 @@
#include "srslte/common/phy_common.h"
#include "srslte/fec/rm_turbo.h"
#include "srslte/fec/turbocoder.h"
#include "srslte/fec/turbodecoder.h"
#include "srslte/fec/turbodecoder_gen.h"
#include "srslte/fec/turbodecoder_sse.h"
#include "srslte/fec/crc.h"
#include "srslte/phch/pdsch_cfg.h"
#include "srslte/phch/pusch_cfg.h"
@ -73,7 +74,7 @@ typedef struct SRSLTE_API {
uint32_t nof_ri_ack_bits;
srslte_tcod_t encoder;
srslte_tdec_t decoder;
srslte_tdec_sse_t decoder;
srslte_crc_t crc_tb;
srslte_crc_t crc_cb;
@ -99,7 +100,7 @@ SRSLTE_API int srslte_dlsch_encode(srslte_sch_t *q,
SRSLTE_API int srslte_dlsch_decode(srslte_sch_t *q,
srslte_pdsch_cfg_t *cfg,
srslte_softbuffer_rx_t *softbuffer,
float *e_bits,
int16_t *e_bits,
uint8_t *data);
SRSLTE_API int srslte_ulsch_encode(srslte_sch_t *q,
@ -120,7 +121,7 @@ SRSLTE_API int srslte_ulsch_uci_encode(srslte_sch_t *q,
SRSLTE_API int srslte_ulsch_decode(srslte_sch_t *q,
srslte_pusch_cfg_t *cfg,
srslte_softbuffer_rx_t *softbuffer,
float *e_bits,
int16_t *e_bits,
uint8_t *data);
SRSLTE_API float srslte_sch_beta_cqi(uint32_t I_cqi);

@ -65,6 +65,14 @@ SRSLTE_API void srslte_scrambling_f_offset(srslte_sequence_t *s,
int offset,
int len);
SRSLTE_API void srslte_scrambling_s(srslte_sequence_t *s,
short *data);
SRSLTE_API void srslte_scrambling_s_offset(srslte_sequence_t *s,
short *data,
int offset,
int len);
SRSLTE_API void srslte_scrambling_c(srslte_sequence_t *s,
cf_t *data);

@ -63,7 +63,8 @@
#include "srslte/fec/crc.h"
#include "srslte/fec/tc_interl.h"
#include "srslte/fec/turbocoder.h"
#include "srslte/fec/turbodecoder.h"
#include "srslte/fec/turbodecoder_sse.h"
#include "srslte/fec/turbodecoder_gen.h"
#include "srslte/fec/cbsegm.h"
#include "srslte/fec/rm_conv.h"
#include "srslte/fec/rm_turbo.h"

@ -130,6 +130,7 @@ SRSLTE_API void srslte_vec_prod_conj_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len
/* real vector product (element-wise) */
SRSLTE_API void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len);
/* Dot-product */
SRSLTE_API cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len);

@ -41,6 +41,8 @@ SRSLTE_API void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t l
SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len);

@ -85,6 +85,7 @@ int srslte_sequence_LTE_pr(srslte_sequence_t *q, uint32_t len, uint32_t seed) {
srslte_bit_pack_vector(q->c, q->c_bytes, len);
for (int i=0;i<len;i++) {
q->c_float[i] = (1-2*q->c[i]);
q->c_short[i] = (int16_t) q->c_float[i];
}
return SRSLTE_SUCCESS;
}
@ -98,13 +99,16 @@ int srslte_sequence_init(srslte_sequence_t *q, uint32_t len) {
if (q->c_float) {
free(q->c_float);
}
if (q->c_short) {
free(q->c_short);
}
}
if (!q->c) {
q->c = srslte_vec_malloc(len * sizeof(uint8_t));
if (!q->c) {
return SRSLTE_ERROR;
}
q->c_bytes = srslte_vec_malloc(len * sizeof(uint8_t)/8);
q->c_bytes = srslte_vec_malloc(len * sizeof(uint8_t)/8+8);
if (!q->c_bytes) {
return SRSLTE_ERROR;
}
@ -112,6 +116,10 @@ int srslte_sequence_init(srslte_sequence_t *q, uint32_t len) {
if (!q->c_float) {
return SRSLTE_ERROR;
}
q->c_short = srslte_vec_malloc(len * sizeof(short));
if (!q->c_short) {
return SRSLTE_ERROR;
}
q->len = len;
}
return SRSLTE_SUCCESS;

@ -28,7 +28,7 @@
#include <strings.h>
#include <math.h>
#include "srslte/fec/turbodecoder.h"
#include "srslte/fec/turbodecoder_gen.h"
#include "srslte/fec/cbsegm.h"
#include "srslte/utils/debug.h"

@ -272,7 +272,7 @@ int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity
}
}
int srslte_rm_turbo_rx_lut(float *input, float *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
int srslte_rm_turbo_rx_lut(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
{
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;

@ -36,7 +36,7 @@
#include "srslte/common/phy_common.h"
#include "srslte/phch/ra.h"
#include "srslte/fec/turbodecoder.h"
#include "srslte/fec/turbodecoder_gen.h"
#include "srslte/fec/rm_turbo.h"
#include "srslte/fec/softbuffer.h"
#include "srslte/utils/vector.h"

@ -32,7 +32,7 @@
#include <strings.h>
#include <math.h>
#include "srslte/fec/turbodecoder_vl.h"
#include "srslte/fec/turbodecoder_gen.h"
#include "srslte/utils/vector.h"
#define NUMSTATES 8
@ -49,14 +49,14 @@
* Decoder
*
************************************************/
static void map_gen_beta(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_llr_t * parity,
static void map_gen_beta(srslte_map_gen_vl_t * s, float * input, float * parity,
uint32_t long_cb)
{
srslte_llr_t m_b[8], new[8], old[8];
srslte_llr_t x, y, xy;
float m_b[8], new[8], old[8];
float x, y, xy;
int k;
uint32_t end = long_cb + SRSLTE_TCOD_RATE;
srslte_llr_t *beta = s->beta;
float *beta = s->beta;
uint32_t i;
for (i = 0; i < 8; i++) {
@ -96,16 +96,16 @@ static void map_gen_beta(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_l
}
}
static void map_gen_alpha(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_llr_t * parity, srslte_llr_t * output,
static void map_gen_alpha(srslte_map_gen_vl_t * s, float * input, float * parity, float * output,
uint32_t long_cb)
{
srslte_llr_t m_b[8], new[8], old[8], max1[8], max0[8];
srslte_llr_t m1, m0;
srslte_llr_t x, y, xy;
srslte_llr_t out;
float m_b[8], new[8], old[8], max1[8], max0[8];
float m1, m0;
float x, y, xy;
float out;
uint32_t k;
uint32_t end = long_cb;
srslte_llr_t *beta = s->beta;
float *beta = s->beta;
uint32_t i;
old[0] = 0;
@ -166,7 +166,7 @@ static void map_gen_alpha(srslte_map_gen_vl_t * s, srslte_llr_t * input, srslte_
static int map_gen_init(srslte_map_gen_vl_t * h, int max_long_cb)
{
bzero(h, sizeof(srslte_map_gen_vl_t));
h->beta = srslte_vec_malloc(sizeof(srslte_llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
h->beta = srslte_vec_malloc(sizeof(float) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->beta) {
perror("srslte_vec_malloc");
return -1;
@ -183,7 +183,7 @@ static void map_gen_free(srslte_map_gen_vl_t * h)
bzero(h, sizeof(srslte_map_gen_vl_t));
}
static void map_gen_dec(srslte_map_gen_vl_t * h, srslte_llr_t * input, srslte_llr_t * parity, srslte_llr_t * output,
static void map_gen_dec(srslte_map_gen_vl_t * h, float * input, float * parity, float * output,
uint32_t long_cb)
{
uint32_t k;
@ -201,35 +201,35 @@ static void map_gen_dec(srslte_map_gen_vl_t * h, srslte_llr_t * input, srslte_ll
* TURBO DECODER INTERFACE
*
************************************************/
int srslte_tdec_vl_init(srslte_tdec_vl_t * h, uint32_t max_long_cb)
int srslte_tdec_gen_init(srslte_tdec_gen_t * h, uint32_t max_long_cb)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_vl_t));
bzero(h, sizeof(srslte_tdec_gen_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->max_long_cb = max_long_cb;
h->llr1 = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
h->llr1 = srslte_vec_malloc(sizeof(float) * len);
if (!h->llr1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->llr2 = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
h->llr2 = srslte_vec_malloc(sizeof(float) * len);
if (!h->llr2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->w = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
h->w = srslte_vec_malloc(sizeof(float) * len);
if (!h->w) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
h->syst = srslte_vec_malloc(sizeof(float) * len);
if (!h->syst) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity = srslte_vec_malloc(sizeof(srslte_llr_t) * len);
h->parity = srslte_vec_malloc(sizeof(float) * len);
if (!h->parity) {
perror("srslte_vec_malloc");
goto clean_and_exit;
@ -248,12 +248,12 @@ int srslte_tdec_vl_init(srslte_tdec_vl_t * h, uint32_t max_long_cb)
h->current_cbidx = -1;
ret = 0;
clean_and_exit:if (ret == -1) {
srslte_tdec_vl_free(h);
srslte_tdec_gen_free(h);
}
return ret;
}
void srslte_tdec_vl_free(srslte_tdec_vl_t * h)
void srslte_tdec_gen_free(srslte_tdec_gen_t * h)
{
if (h->llr1) {
free(h->llr1);
@ -277,10 +277,10 @@ void srslte_tdec_vl_free(srslte_tdec_vl_t * h)
srslte_tc_interl_free(&h->interleaver[i]);
}
bzero(h, sizeof(srslte_tdec_vl_t));
bzero(h, sizeof(srslte_tdec_gen_t));
}
void srslte_tdec_vl_iteration(srslte_tdec_vl_t * h, srslte_llr_t * input, uint32_t long_cb)
void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h, float * input, uint32_t long_cb)
{
uint32_t i;
@ -323,18 +323,18 @@ void srslte_tdec_vl_iteration(srslte_tdec_vl_t * h, srslte_llr_t * input, uint32
h->w[i] += h->llr2[deinter[i]] - h->llr1[i];
}
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_vl_reset() first\n");
fprintf(stderr, "Error CB index not set (call srslte_tdec_gen_reset() first\n");
}
}
int srslte_tdec_vl_reset(srslte_tdec_vl_t * h, uint32_t long_cb)
int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb);
return -1;
}
memset(h->w, 0, sizeof(srslte_llr_t) * long_cb);
memset(h->w, 0, sizeof(float) * long_cb);
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb);
@ -343,7 +343,7 @@ int srslte_tdec_vl_reset(srslte_tdec_vl_t * h, uint32_t long_cb)
return 0;
}
void srslte_tdec_vl_decision(srslte_tdec_vl_t * h, uint8_t *output, uint32_t long_cb)
void srslte_tdec_gen_decision(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb)
{
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
uint32_t i;
@ -352,7 +352,7 @@ void srslte_tdec_vl_decision(srslte_tdec_vl_t * h, uint8_t *output, uint32_t lon
}
}
void srslte_tdec_vl_decision_byte(srslte_tdec_vl_t * h, uint8_t *output, uint32_t long_cb)
void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb)
{
uint32_t i;
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
@ -373,21 +373,21 @@ void srslte_tdec_vl_decision_byte(srslte_tdec_vl_t * h, uint8_t *output, uint32_
}
}
int srslte_tdec_vl_run_all(srslte_tdec_vl_t * h, srslte_llr_t * input, uint8_t *output,
int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h, float * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb)
{
uint32_t iter = 0;
if (srslte_tdec_vl_reset(h, long_cb)) {
if (srslte_tdec_gen_reset(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
srslte_tdec_vl_iteration(h, input, long_cb);
srslte_tdec_gen_iteration(h, input, long_cb);
iter++;
} while (iter < nof_iterations);
srslte_tdec_vl_decision(h, output, long_cb);
srslte_tdec_gen_decision(h, output, long_cb);
return SRSLTE_SUCCESS;
}

@ -32,7 +32,7 @@
#include <strings.h>
#include <math.h>
#include "srslte/fec/turbodecoder.h"
#include "srslte/fec/turbodecoder_sse.h"
#include "srslte/utils/vector.h"
#include <inttypes.h>
@ -62,7 +62,7 @@ static inline int16_t hMax(__m128i buffer)
return (int16_t)(_mm_cvtsi128_si32(tmp3));
}
void srslte_map_gen_beta(srslte_map_gen_t * s, llr_t * output, uint32_t long_cb)
void srslte_map_gen_beta(srslte_map_gen_t * s, int16_t * output, uint32_t long_cb)
{
int k;
uint32_t end = long_cb + 3;
@ -101,7 +101,7 @@ void srslte_map_gen_beta(srslte_map_gen_t * s, llr_t * output, uint32_t long_cb)
shuf_g[1] = _mm_set_epi8(11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10);
shuf_g[0] = _mm_set_epi8(15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14);
__m128i gv;
llr_t *b = &s->branch[2*long_cb-8];
int16_t *b = &s->branch[2*long_cb-8];
__m128i *gPtr = (__m128i*) b;
__m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
@ -119,8 +119,8 @@ void srslte_map_gen_beta(srslte_map_gen_t * s, llr_t * output, uint32_t long_cb)
bn = _mm_add_epi16(bn, alpha_k); output[k-d] = hMax(bn) - hMax(bp);
for (k=end-1; k>=long_cb; k--) {
llr_t g0 = s->branch[2*k];
llr_t g1 = s->branch[2*k+1];
int16_t g0 = s->branch[2*k];
int16_t g1 = s->branch[2*k+1];
g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1);
BETA_STEP(g);
@ -147,7 +147,7 @@ void srslte_map_gen_beta(srslte_map_gen_t * s, llr_t * output, uint32_t long_cb)
void srslte_map_gen_alpha(srslte_map_gen_t * s, uint32_t long_cb)
{
uint32_t k;
llr_t *alpha = s->alpha;
int16_t *alpha = s->alpha;
uint32_t i;
alpha[0] = 0;
@ -221,7 +221,7 @@ void srslte_map_gen_alpha(srslte_map_gen_t * s, uint32_t long_cb)
}
}
void srslte_map_gen_gamma(srslte_map_gen_t * h, llr_t *input, llr_t *app, llr_t *parity, uint32_t long_cb)
void srslte_map_gen_gamma(srslte_map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb)
{
__m128i res10, res20, res11, res21, res1, res2;
__m128i in, ap, pa, g1, g0;
@ -278,12 +278,12 @@ void srslte_map_gen_gamma(srslte_map_gen_t * h, llr_t *input, llr_t *app, llr_t
int srslte_map_gen_init(srslte_map_gen_t * h, int max_long_cb)
{
bzero(h, sizeof(srslte_map_gen_t));
h->alpha = srslte_vec_malloc(sizeof(llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
h->alpha = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->alpha) {
perror("srslte_vec_malloc");
return -1;
}
h->branch = srslte_vec_malloc(sizeof(llr_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
h->branch = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->branch) {
perror("srslte_vec_malloc");
return -1;
@ -303,7 +303,7 @@ void srslte_map_gen_free(srslte_map_gen_t * h)
bzero(h, sizeof(srslte_map_gen_t));
}
void srslte_map_gen_dec(srslte_map_gen_t * h, llr_t * input, llr_t *app, llr_t * parity, llr_t * output,
void srslte_map_gen_dec(srslte_map_gen_t * h, int16_t * input, int16_t *app, int16_t * parity, int16_t * output,
uint32_t long_cb)
{
@ -323,45 +323,45 @@ void srslte_map_gen_dec(srslte_map_gen_t * h, llr_t * input, llr_t *app, llr_t *
* TURBO DECODER INTERFACE
*
************************************************/
int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb)
int srslte_tdec_sse_init(srslte_tdec_sse_t * h, uint32_t max_long_cb)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_t));
bzero(h, sizeof(srslte_tdec_sse_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->max_long_cb = max_long_cb;
h->app1 = srslte_vec_malloc(sizeof(llr_t) * len);
h->app1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->app2 = srslte_vec_malloc(sizeof(llr_t) * len);
h->app2 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext1 = srslte_vec_malloc(sizeof(llr_t) * len);
h->ext1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext2 = srslte_vec_malloc(sizeof(llr_t) * len);
h->ext2 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst = srslte_vec_malloc(sizeof(llr_t) * len);
h->syst = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->syst) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity0 = srslte_vec_malloc(sizeof(llr_t) * len);
h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity0) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity1 = srslte_vec_malloc(sizeof(llr_t) * len);
h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
@ -380,12 +380,12 @@ int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb)
h->current_cbidx = -1;
ret = 0;
clean_and_exit:if (ret == -1) {
srslte_tdec_free(h);
srslte_tdec_sse_free(h);
}
return ret;
}
void srslte_tdec_free(srslte_tdec_t * h)
void srslte_tdec_sse_free(srslte_tdec_sse_t * h)
{
if (h->app1) {
free(h->app1);
@ -415,10 +415,10 @@ void srslte_tdec_free(srslte_tdec_t * h)
srslte_tc_interl_free(&h->interleaver[i]);
}
bzero(h, sizeof(srslte_tdec_t));
bzero(h, sizeof(srslte_tdec_sse_t));
}
void deinterleave_input(srslte_tdec_t *h, short *input, uint32_t long_cb) {
void deinterleave_input(srslte_tdec_sse_t *h, int16_t *input, uint32_t long_cb) {
uint32_t i;
__m128i *inputPtr = (__m128i*) input;
@ -502,7 +502,7 @@ void deinterleave_input(srslte_tdec_t *h, short *input, uint32_t long_cb) {
}
void srslte_tdec_iteration(srslte_tdec_t * h, short * input, uint32_t long_cb)
void srslte_tdec_sse_iteration(srslte_tdec_sse_t * h, int16_t * input, uint32_t long_cb)
{
if (h->current_cbidx >= 0) {
@ -541,11 +541,11 @@ void srslte_tdec_iteration(srslte_tdec_t * h, short * input, uint32_t long_cb)
h->n_iter++;
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_reset() first\n");
fprintf(stderr, "Error CB index not set (call srslte_tdec_sse_reset() first\n");
}
}
int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb)
int srslte_tdec_sse_reset(srslte_tdec_sse_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
@ -561,7 +561,7 @@ int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb)
return 0;
}
void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb)
void srslte_tdec_sse_decision(srslte_tdec_sse_t * h, uint8_t *output, uint32_t long_cb)
{
__m128i zero = _mm_set1_epi16(0);
__m128i lsb_mask = _mm_set1_epi16(1);
@ -587,7 +587,7 @@ void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb)
}
}
void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb)
void srslte_tdec_sse_decision_byte(srslte_tdec_sse_t * h, uint8_t *output, uint32_t long_cb)
{
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
@ -606,18 +606,18 @@ void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long
}
}
int srslte_tdec_run_all(srslte_tdec_t * h, short * input, uint8_t *output,
int srslte_tdec_sse_run_all(srslte_tdec_sse_t * h, int16_t * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb)
{
if (srslte_tdec_reset(h, long_cb)) {
if (srslte_tdec_sse_reset(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
srslte_tdec_iteration(h, input, long_cb);
srslte_tdec_sse_iteration(h, input, long_cb);
} while (h->n_iter < nof_iterations);
srslte_tdec_decision_byte(h, output, long_cb);
srslte_tdec_sse_decision_byte(h, output, long_cb);
return SRSLTE_SUCCESS;
}

@ -50,7 +50,7 @@ uint8_t bits[3*6144+12];
uint8_t buff_b[BUFFSZ];
float buff_f[BUFFSZ];
float bits_f[3*6144+12];
float bits2_f[3*6144+12];
short bits2_s[3*6144+12];
void usage(char *prog) {
printf("Usage: %s -c cb_idx -e nof_e_bits [-i rv_idx]\n", prog);
@ -84,18 +84,24 @@ void parse_args(int argc, char **argv) {
int main(int argc, char **argv) {
int i;
uint8_t *rm_bits, *rm_bits2, *rm_bits2_bytes;
short *rm_bits_s;
float *rm_bits_f;
parse_args(argc, argv);
srslte_rm_turbo_gentables();
rm_bits_f = malloc(sizeof(float) * nof_e_bits);
rm_bits_s = srslte_vec_malloc(sizeof(short) * nof_e_bits);
if (!rm_bits_s) {
perror("malloc");
exit(-1);
}
rm_bits_f = srslte_vec_malloc(sizeof(float) * nof_e_bits);
if (!rm_bits_f) {
perror("malloc");
exit(-1);
}
rm_bits = malloc(sizeof(uint8_t) * nof_e_bits);
rm_bits = srslte_vec_malloc(sizeof(uint8_t) * nof_e_bits);
if (!rm_bits) {
perror("malloc");
exit(-1);
@ -171,18 +177,18 @@ int main(int argc, char **argv) {
for (int i=0;i<nof_e_bits;i++) {
rm_bits_f[i] = rand()%10-5;
rm_bits_s[i] = (short) rm_bits_f[i];
}
bzero(buff_f, BUFFSZ*sizeof(float));
srslte_rm_turbo_rx(buff_f, BUFFSZ, rm_bits_f, nof_e_bits, bits_f, long_cb_enc, rv_idx, 0);
bzero(bits2_f, long_cb_enc*sizeof(float));
srslte_rm_turbo_rx_lut(rm_bits_f, bits2_f, nof_e_bits, cb_idx, rv_idx);
bzero(bits2_s, long_cb_enc*sizeof(short));
srslte_rm_turbo_rx_lut(rm_bits_s, bits2_s, nof_e_bits, cb_idx, rv_idx);
for (int i=0;i<long_cb_enc;i++) {
if (bits_f[i] != bits2_f[i]) {
printf("error RX in bit %d %f!=%f\n", i, bits_f[i], bits2_f[i]);
if (bits_f[i] != bits2_s[i]) {
printf("error RX in bit %d %f!=%d\n", i, bits_f[i], bits2_s[i]);
exit(-1);
}
}

@ -36,7 +36,6 @@
#include <sys/time.h>
#include <time.h>
#include "srslte/srslte.h"
#include "srslte/fec/turbodecoder_vl.h"
#include "turbodecoder_test.h"
@ -119,12 +118,12 @@ int main(int argc, char **argv) {
float var[SNR_POINTS];
uint32_t snr_points;
uint32_t errors;
uint32_t errors_vl;
uint32_t errors_gen;
uint32_t coded_length;
struct timeval tdata[3];
float mean_usec, mean_usec_vl;
srslte_tdec_t tdec;
srslte_tdec_vl_t tdec_vl;
float mean_usec, mean_usec_gen;
srslte_tdec_sse_t tdec;
srslte_tdec_gen_t tdec_gen;
srslte_tcod_t tcod;
parse_args(argc, argv);
@ -190,12 +189,12 @@ int main(int argc, char **argv) {
exit(-1);
}
if (srslte_tdec_init(&tdec, frame_length)) {
if (srslte_tdec_sse_init(&tdec, frame_length)) {
fprintf(stderr, "Error initiating Turbo decoder\n");
exit(-1);
}
if (srslte_tdec_vl_init(&tdec_vl, frame_length)) {
if (srslte_tdec_gen_init(&tdec_gen, frame_length)) {
fprintf(stderr, "Error initiating Turbo decoder\n");
exit(-1);
}
@ -217,9 +216,9 @@ int main(int argc, char **argv) {
for (i = 0; i < snr_points; i++) {
mean_usec = 0;
mean_usec_vl = 0;
mean_usec_gen = 0;
errors = 0;
errors_vl = 0;
errors_gen = 0;
frame_cnt = 0;
while (frame_cnt < nof_frames) {
/* generate data_tx */
@ -250,8 +249,8 @@ int main(int argc, char **argv) {
llr_s[j] = (int16_t) (100*llr[j]);
}
/* decoder */
srslte_tdec_reset(&tdec, frame_length);
srslte_tdec_vl_reset(&tdec_vl, frame_length);
srslte_tdec_sse_reset(&tdec, frame_length);
srslte_tdec_gen_reset(&tdec_gen, frame_length);
uint32_t t;
if (nof_iterations == -1) {
@ -262,7 +261,7 @@ int main(int argc, char **argv) {
gettimeofday(&tdata[1], NULL);
for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length);
srslte_tdec_sse_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length);
}
gettimeofday(&tdata[2], NULL);
get_time_interval(tdata);
@ -274,21 +273,21 @@ int main(int argc, char **argv) {
gettimeofday(&tdata[1], NULL);
for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_vl_run_all(&tdec_vl, llr, data_rx, t, frame_length);
srslte_tdec_gen_run_all(&tdec_gen, llr, data_rx, t, frame_length);
}
gettimeofday(&tdata[2], NULL);
get_time_interval(tdata);
mean_usec_vl = (float) mean_usec_vl * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
mean_usec_gen = (float) mean_usec_gen * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
/* check errors */
errors_vl += srslte_bit_diff(data_tx, data_rx, frame_length);
errors_gen += srslte_bit_diff(data_tx, data_rx, frame_length);
frame_cnt++;
printf("Eb/No: %2.2f %10d/%d ", SNR_MIN + i * ebno_inc, frame_cnt, nof_frames);
printf("BER: %.2e ", (float) errors / (frame_cnt * frame_length));
printf("BER_vl: %.2e ", (float) errors_vl / (frame_cnt * frame_length));
printf("%3.1f Mbps (%6.2f usec) -- vl: ", (float) frame_length / mean_usec, mean_usec);
printf("%3.1f Mbps (%6.2f usec)", (float) frame_length / mean_usec_vl, mean_usec_vl);
printf("BER_gen: %.2e ", (float) errors_gen / (frame_cnt * frame_length));
printf("%3.1f Mbps (%6.2f usec) -- gen: ", (float) frame_length / mean_usec, mean_usec);
printf("%3.1f Mbps (%6.2f usec)", (float) frame_length / mean_usec_gen, mean_usec_gen);
printf("\r");
}
@ -298,10 +297,10 @@ int main(int argc, char **argv) {
printf("\n");
if (snr_points == 1) {
if (errors) {
printf("%d Errors\n", errors);
printf("%d Errors in SSE\n", errors);
}
if (errors_vl) {
printf("%d Errors in VL\n", errors_vl);
if (errors_gen) {
printf("%d Errors in GEN\n", errors_gen);
}
}
@ -312,7 +311,8 @@ int main(int argc, char **argv) {
free(llr_c);
free(data_rx);
srslte_tdec_free(&tdec);
srslte_tdec_sse_free(&tdec);
srslte_tdec_gen_free(&tdec_gen);
srslte_tcod_free(&tcod);
printf("\n");

@ -47,7 +47,7 @@ void help()
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
srslte_tdec_t tdec;
srslte_tdec_gen_t tdec;
float *input_llr;
uint8_t *output_data;
uint32_t nof_bits;
@ -86,18 +86,18 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
// allocate memory for output bits
output_data = srslte_vec_malloc(nof_bits * sizeof(uint8_t));
if (srslte_tdec_init(&tdec, nof_bits)) {
if (srslte_tdec_gen_init(&tdec, nof_bits)) {
mexErrMsgTxt("Error initiating Turbo decoder\n");
return;
}
srslte_tdec_run_all(&tdec, input_llr, output_data, nof_iterations, nof_bits);
srslte_tdec_gen_run_all(&tdec, input_llr, output_data, nof_iterations, nof_bits);
if (nlhs >= 1) {
mexutils_write_uint8(output_data, &plhs[0], nof_bits, 1);
}
srslte_tdec_free(&tdec);
srslte_tdec_gen_free(&tdec);
free(input_llr);
free(output_data);

@ -58,7 +58,6 @@ void demod_bpsk_lte(const cf_t *symbols, float *llr, int nsymbols) {
}
void demod_qpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
srslte_vec_fprint_f(stdout, (float*) symbols, nsymbols*2);
srslte_vec_convert_fi((float*) symbols, llr, -SCALE_SHORT_CONV*sqrt(2), nsymbols*2);
}

@ -229,8 +229,8 @@ int srslte_pdsch_init(srslte_pdsch_t *q, srslte_cell_t cell) {
q->rnti_is_set = false;
// Allocate floats for reception (LLRs)
q->e = srslte_vec_malloc(sizeof(float) * q->max_re * srslte_mod_bits_x_symbol(SRSLTE_MOD_64QAM));
// Allocate int16_t for reception (LLRs)
q->e = srslte_vec_malloc(sizeof(int16_t) * q->max_re * srslte_mod_bits_x_symbol(SRSLTE_MOD_64QAM));
if (!q->e) {
goto clean;
}
@ -417,7 +417,7 @@ int srslte_pdsch_decode_rnti(srslte_pdsch_t *q,
* The MAX-log-MAP algorithm used in turbo decoding is unsensitive to SNR estimation,
* thus we don't need tot set it in the LLRs normalization
*/
srslte_demod_soft_demodulate(cfg->grant.mcs.mod, q->d, q->e, cfg->nbits.nof_re);
srslte_demod_soft_demodulate_s(cfg->grant.mcs.mod, q->d, q->e, cfg->nbits.nof_re);
/* descramble */
if (rnti != q->rnti) {
@ -425,10 +425,10 @@ int srslte_pdsch_decode_rnti(srslte_pdsch_t *q,
if (srslte_sequence_pdsch(&seq, rnti, 0, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) {
return SRSLTE_ERROR;
}
srslte_scrambling_f_offset(&seq, q->e, 0, cfg->nbits.nof_bits);
srslte_scrambling_s_offset(&seq, q->e, 0, cfg->nbits.nof_bits);
srslte_sequence_free(&seq);
} else {
srslte_scrambling_f_offset(&q->seq[cfg->sf_idx], q->e, 0, cfg->nbits.nof_bits);
srslte_scrambling_s_offset(&q->seq[cfg->sf_idx], q->e, 0, cfg->nbits.nof_bits);
}
return srslte_dlsch_decode(&q->dl_sch, cfg, softbuffer, q->e, data);

@ -109,14 +109,14 @@ int srslte_sch_init(srslte_sch_t *q) {
fprintf(stderr, "Error initiating Turbo Coder\n");
goto clean;
}
if (srslte_tdec_init(&q->decoder, SRSLTE_TCOD_MAX_LEN_CB)) {
if (srslte_tdec_sse_init(&q->decoder, SRSLTE_TCOD_MAX_LEN_CB)) {
fprintf(stderr, "Error initiating Turbo Decoder\n");
goto clean;
}
srslte_rm_turbo_gentables();
// Allocate floats for reception (LLRs)
// Allocate int16 for reception (LLRs)
q->cb_in = srslte_vec_malloc(sizeof(uint8_t) * (SRSLTE_TCOD_MAX_LEN_CB+8)/8);
if (!q->cb_in) {
goto clean;
@ -161,7 +161,7 @@ void srslte_sch_free(srslte_sch_t *q) {
if (q->ul_interleaver) {
free(q->ul_interleaver);
}
srslte_tdec_free(&q->decoder);
srslte_tdec_sse_free(&q->decoder);
srslte_tcod_free(&q->encoder);
srslte_uci_cqi_free(&q->uci_cqi);
bzero(q, sizeof(srslte_sch_t));
@ -329,7 +329,7 @@ static int encode_tb(srslte_sch_t *q,
static int decode_tb(srslte_sch_t *q,
srslte_softbuffer_rx_t *softbuffer, srslte_cbsegm_t *cb_segm,
uint32_t Qm, uint32_t rv, uint32_t nof_e_bits,
float *e_bits, uint8_t *data)
int16_t *e_bits, uint8_t *data)
{
uint8_t parity[3] = {0, 0, 0};
uint32_t par_rx, par_tx;
@ -408,7 +408,7 @@ static int decode_tb(srslte_sch_t *q,
if (SRSLTE_VERBOSE_ISDEBUG()) {
DEBUG("CB#%d RMOUT: ", i);
srslte_vec_fprint_f(stdout, softbuffer->buffer_f[i], 3*cb_len+12);
srslte_vec_fprint_s(stdout, softbuffer->buffer_f[i], 3*cb_len+12);
}
/* Turbo Decoding with CRC-based early stopping */
@ -418,10 +418,10 @@ static int decode_tb(srslte_sch_t *q,
srslte_crc_t *crc_ptr;
early_stop = false;
srslte_tdec_reset(&q->decoder, cb_len);
srslte_tdec_sse_reset(&q->decoder, cb_len);
do {
srslte_tdec_iteration(&q->decoder, softbuffer->buffer_f[i], cb_len);
srslte_tdec_sse_iteration(&q->decoder, softbuffer->buffer_f[i], cb_len);
q->nof_iterations++;
if (cb_segm->C > 1) {
@ -434,7 +434,7 @@ static int decode_tb(srslte_sch_t *q,
crc_ptr = &q->crc_tb;
}
srslte_tdec_decision_byte(&q->decoder, q->cb_in, cb_len);
srslte_tdec_sse_decision_byte(&q->decoder, q->cb_in, cb_len);
/* Check Codeblock CRC and stop early if incorrect */
if (!srslte_crc_checksum_byte(crc_ptr, cb_in_ptr, len_crc)) {
@ -504,7 +504,7 @@ static int decode_tb(srslte_sch_t *q,
}
int srslte_dlsch_decode(srslte_sch_t *q, srslte_pdsch_cfg_t *cfg, srslte_softbuffer_rx_t *softbuffer,
float *e_bits, uint8_t *data)
int16_t *e_bits, uint8_t *data)
{
return decode_tb(q,
softbuffer, &cfg->cb_segm,
@ -522,7 +522,7 @@ int srslte_dlsch_encode(srslte_sch_t *q, srslte_pdsch_cfg_t *cfg, srslte_softbuf
}
int srslte_ulsch_decode(srslte_sch_t *q, srslte_pusch_cfg_t *cfg, srslte_softbuffer_rx_t *softbuffer,
float *e_bits, uint8_t *data)
int16_t *e_bits, uint8_t *data)
{
return decode_tb(q,
softbuffer, &cfg->cb_segm,

@ -93,7 +93,6 @@ BuildMex(MEXNAME pdcch SOURCES pdcch_test_mex.c LIBRARIES srslte srslte_mex)
ADD_EXECUTABLE(pdsch_test pdsch_test.c)
TARGET_LINK_LIBRARIES(pdsch_test srslte)
ADD_TEST(pdsch_test_bpsk pdsch_test -m 0 -n 50 -r 2)
ADD_TEST(pdsch_test_qpsk pdsch_test -m 10 -n 50 -r 1)
ADD_TEST(pdsch_test_qam16 pdsch_test -m 20 -n 100)
ADD_TEST(pdsch_test_qam64 pdsch_test -m 28 -n 100 -r 0)

@ -162,7 +162,7 @@ int main(int argc, char **argv) {
}
}
data = malloc(sizeof(uint8_t) * grant.mcs.tbs/8);
data = srslte_vec_malloc(sizeof(uint8_t) * grant.mcs.tbs/8);
if (!data) {
perror("malloc");
goto quit;

@ -41,6 +41,15 @@ void srslte_scrambling_f_offset(srslte_sequence_t *s, float *data, int offset, i
srslte_vec_prod_fff(data, &s->c_float[offset], data, len);
}
void srslte_scrambling_s(srslte_sequence_t *s, short *data) {
srslte_scrambling_s_offset(s, data, 0, s->len);
}
void srslte_scrambling_s_offset(srslte_sequence_t *s, short *data, int offset, int len) {
assert (len + offset <= s->len);
srslte_vec_prod_sss(data, &s->c_short[offset], data, len);
}
void srslte_scrambling_c(srslte_sequence_t *s, cf_t *data) {
srslte_scrambling_c_offset(s, data, 0, s->len);
}

@ -457,6 +457,17 @@ void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
#endif
}
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*y[i];
}
#else
srslte_vec_prod_sss_simd(x,y,z,len);
#endif
}
void srslte_vec_prod_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT2_FUNCTION
int i;

@ -107,6 +107,36 @@ void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len)
}
}
void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len)
{
unsigned int number = 0;
const unsigned int points = len / 8;
const __m128i* xPtr = (const __m128i*) x;
const __m128i* yPtr = (const __m128i*) y;
__m128i* zPtr = (__m128i*) z;
__m128i xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
yVal = _mm_load_si128(yPtr);
zVal = _mm_mullo_epi16(xVal, yVal);
_mm_store_si128(zPtr, zVal);
xPtr ++;
yPtr ++;
zPtr ++;
}
number = points * 8;
for(;number < len; number++){
z[number] = x[number] * y[number];
}
}
void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
{
unsigned int number = 0;
@ -190,9 +220,6 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
printf("intinput: "); print128_num(intInputVal1);
printf("intinput2: "); print128_num(intInputVal2);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);

Loading…
Cancel
Save