Turbo decoder working with hard decision

master
ismagom 9 years ago
parent ad06998d91
commit 19256c261e

@ -43,6 +43,10 @@ SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t l
SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len);
SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len);
#ifdef __cplusplus
}
#endif

@ -207,7 +207,7 @@ void srslte_map_gen_alpha(srslte_map_gen_t * s, uint32_t long_cb)
__m128i gv;
__m128i *gPtr = (__m128i*) s->branch;
__m128i g, ap, an;
__m128i alpha_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
#define ALPHA_STEP(c) g = _mm_shuffle_epi8(gv, shuf_g[c]); \
@ -564,13 +564,13 @@ void srslte_tdec_iteration(srslte_tdec_t * h, float * input, uint32_t long_cb)
}
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2
srslte_vec_lut_sss(h->ext1, inter, h->app2, long_cb);
srslte_vec_lut_sss(h->ext1, deinter, h->app2, long_cb);
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
srslte_map_gen_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, long_cb);
// Deinterleaved extrinsic bits become apriori info for decoder 1
srslte_vec_lut_sss(h->ext2, deinter, h->app1, long_cb);
srslte_vec_lut_sss(h->ext2, inter, h->app1, long_cb);
h->n_iter++;
} else {
@ -626,14 +626,14 @@ void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long
// long_cb is always byte aligned
for (uint32_t i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->app1[i+0]>0?mask[0]:0;
uint8_t out1 = h->app1[i+1]>0?mask[1]:0;
uint8_t out2 = h->app1[i+2]>0?mask[2]:0;
uint8_t out3 = h->app1[i+3]>0?mask[3]:0;
uint8_t out4 = h->app1[i+4]>0?mask[4]:0;
uint8_t out5 = h->app1[i+5]>0?mask[5]:0;
uint8_t out6 = h->app1[i+6]>0?mask[6]:0;
uint8_t out7 = h->app1[i+7]>0?mask[7]:0;
uint8_t out0 = h->app1[8*i+0]>0?mask[0]:0;
uint8_t out1 = h->app1[8*i+1]>0?mask[1]:0;
uint8_t out2 = h->app1[8*i+2]>0?mask[2]:0;
uint8_t out3 = h->app1[8*i+3]>0?mask[3]:0;
uint8_t out4 = h->app1[8*i+4]>0?mask[4]:0;
uint8_t out5 = h->app1[8*i+5]>0?mask[5]:0;
uint8_t out6 = h->app1[8*i+6]>0?mask[6]:0;
uint8_t out7 = h->app1[8*i+7]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
@ -650,7 +650,7 @@ int srslte_tdec_run_all(srslte_tdec_t * h, float * input, uint8_t *output,
srslte_tdec_iteration(h, input, long_cb);
} while (h->n_iter < nof_iterations);
srslte_tdec_decision(h, output, long_cb);
srslte_tdec_decision_byte(h, output, long_cb);
return SRSLTE_SUCCESS;
}

@ -113,7 +113,7 @@ int main(int argc, char **argv) {
uint32_t frame_cnt;
float *llr;
uint8_t *llr_c;
uint8_t *data_tx, *data_rx, *symbols;
uint8_t *data_tx, *data_rx, *data_rx_bytes, *symbols;
uint32_t i, j;
float var[SNR_POINTS];
uint32_t snr_points;
@ -157,6 +157,11 @@ int main(int argc, char **argv) {
perror("malloc");
exit(-1);
}
data_rx_bytes = srslte_vec_malloc(frame_length * sizeof(uint8_t));
if (!data_rx_bytes) {
perror("malloc");
exit(-1);
}
symbols = srslte_vec_malloc(coded_length * sizeof(uint8_t));
if (!symbols) {
@ -248,12 +253,14 @@ int main(int argc, char **argv) {
gettimeofday(&tdata[1], NULL);
for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_run_all(&tdec, llr, data_rx, t, frame_length);
srslte_tdec_run_all(&tdec, llr, data_rx_bytes, t, frame_length);
}
gettimeofday(&tdata[2], NULL);
get_time_interval(tdata);
mean_usec = (float) mean_usec * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
srslte_bit_unpack_vector(data_rx_bytes, data_rx, frame_length);
errors += srslte_bit_diff(data_tx, data_rx, frame_length);
gettimeofday(&tdata[1], NULL);

@ -228,7 +228,7 @@ int main(int argc, char **argv) {
int r = srslte_pdsch_decode(&pdsch, &pdsch_cfg, &softbuffer_rx, slot_symbols[0], ce, 0, data);
gettimeofday(&t[2], NULL);
get_time_interval(t);
printf("DECODED %d in %d:%d (%.2f Mbps)\n", r?"Error":"OK",
printf("DECODED %s in %d:%d (%.2f Mbps)\n", r?"Error":"OK",
(int) t[0].tv_sec, (int) t[0].tv_usec, (float) grant.mcs.tbs/t[0].tv_usec);
if (r) {
ret = -1;

@ -241,26 +241,30 @@ void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
}
void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
#ifdef HAVE_VOLK_CONVERT_FI_FUNCTION
volk_32f_s32f_convert_16i(z, x, scale, len);
#else
#ifndef HAVE_VECTOR_SIMD
int i;
for (i=0;i<len;i++) {
z[i] = (int16_t) (x[i]*scale);
}
#else
srslte_vec_convert_fi_simd(x, z, scale, len);
#endif
}
void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len) {
for (int i=0;i<len;i++) {
y[i] = x[lut[i]];
y[lut[i]] = x[i];
}
}
void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD
for (int i=0;i<len;i++) {
y[i] = x[lut[i]];
y[lut[i]] = x[i];
}
#else
srslte_vec_lut_sss_simd(x, lut, y, len);
#endif
}
void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) {

@ -135,3 +135,82 @@ void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
}
}
void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len)
{
unsigned int number = 0;
const unsigned int points = len / 8;
const __m128i* xPtr = (const __m128i*) x;
const __m128i* lutPtr = (__m128i*) lut;
__m128i xVal, lutVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
lutVal = _mm_load_si128(lutPtr);
for (int i=0;i<8;i++) {
uint16_t x = (uint16_t) _mm_extract_epi16(xVal, i);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, i);
y[l] = x;
}
xPtr ++;
lutPtr ++;
}
number = points * 8;
for(;number < len; number++){
y[lut[number]] = x[number];
}
}
/* Modified from volk_32f_s32f_convert_16i_a_sse2. Removed clipping */
void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
{
unsigned int number = 0;
const unsigned int eighthPoints = len / 8;
const float* inputVectorPtr = (const float*)x;
int16_t* outputVectorPtr = z;
float min_val = -32768;
float max_val = 32767;
float r;
__m128 vScalar = _mm_set_ps1(scale);
__m128 inputVal1, inputVal2;
__m128i intInputVal1, intInputVal2;
__m128 ret1, ret2;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(;number < eighthPoints; number++){
inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
// Scale and clip
ret1 = _mm_mul_ps(inputVal1, vScalar);
ret2 = _mm_mul_ps(inputVal2, vScalar);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
}
number = eighthPoints * 8;
for(; number < num_points; number++){
r = inputVector[number] * scalar;
if(r > max_val)
r = max_val;
else if(r < min_val)
r = min_val;
outputVector[number] = (int16_t)rintf(r);
}
}
Loading…
Cancel
Save