Turbo decoder working with hard decision

master
ismagom 9 years ago
parent ad06998d91
commit 19256c261e

@ -43,6 +43,10 @@ SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t l
SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len); SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len);
SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

@ -207,7 +207,7 @@ void srslte_map_gen_alpha(srslte_map_gen_t * s, uint32_t long_cb)
__m128i gv; __m128i gv;
__m128i *gPtr = (__m128i*) s->branch; __m128i *gPtr = (__m128i*) s->branch;
__m128i g, ap, an; __m128i g, ap, an;
__m128i alpha_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0); __m128i alpha_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
#define ALPHA_STEP(c) g = _mm_shuffle_epi8(gv, shuf_g[c]); \ #define ALPHA_STEP(c) g = _mm_shuffle_epi8(gv, shuf_g[c]); \
@ -564,13 +564,13 @@ void srslte_tdec_iteration(srslte_tdec_t * h, float * input, uint32_t long_cb)
} }
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2 // Interleave extrinsic output of DEC1 to form apriori info for decoder 2
srslte_vec_lut_sss(h->ext1, inter, h->app2, long_cb); srslte_vec_lut_sss(h->ext1, deinter, h->app2, long_cb);
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits // Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
srslte_map_gen_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, long_cb); srslte_map_gen_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, long_cb);
// Deinterleaved extrinsic bits become apriori info for decoder 1 // Deinterleaved extrinsic bits become apriori info for decoder 1
srslte_vec_lut_sss(h->ext2, deinter, h->app1, long_cb); srslte_vec_lut_sss(h->ext2, inter, h->app1, long_cb);
h->n_iter++; h->n_iter++;
} else { } else {
@ -626,14 +626,14 @@ void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long
// long_cb is always byte aligned // long_cb is always byte aligned
for (uint32_t i = 0; i < long_cb/8; i++) { for (uint32_t i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->app1[i+0]>0?mask[0]:0; uint8_t out0 = h->app1[8*i+0]>0?mask[0]:0;
uint8_t out1 = h->app1[i+1]>0?mask[1]:0; uint8_t out1 = h->app1[8*i+1]>0?mask[1]:0;
uint8_t out2 = h->app1[i+2]>0?mask[2]:0; uint8_t out2 = h->app1[8*i+2]>0?mask[2]:0;
uint8_t out3 = h->app1[i+3]>0?mask[3]:0; uint8_t out3 = h->app1[8*i+3]>0?mask[3]:0;
uint8_t out4 = h->app1[i+4]>0?mask[4]:0; uint8_t out4 = h->app1[8*i+4]>0?mask[4]:0;
uint8_t out5 = h->app1[i+5]>0?mask[5]:0; uint8_t out5 = h->app1[8*i+5]>0?mask[5]:0;
uint8_t out6 = h->app1[i+6]>0?mask[6]:0; uint8_t out6 = h->app1[8*i+6]>0?mask[6]:0;
uint8_t out7 = h->app1[i+7]>0?mask[7]:0; uint8_t out7 = h->app1[8*i+7]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7; output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
} }
@ -650,7 +650,7 @@ int srslte_tdec_run_all(srslte_tdec_t * h, float * input, uint8_t *output,
srslte_tdec_iteration(h, input, long_cb); srslte_tdec_iteration(h, input, long_cb);
} while (h->n_iter < nof_iterations); } while (h->n_iter < nof_iterations);
srslte_tdec_decision(h, output, long_cb); srslte_tdec_decision_byte(h, output, long_cb);
return SRSLTE_SUCCESS; return SRSLTE_SUCCESS;
} }

@ -113,7 +113,7 @@ int main(int argc, char **argv) {
uint32_t frame_cnt; uint32_t frame_cnt;
float *llr; float *llr;
uint8_t *llr_c; uint8_t *llr_c;
uint8_t *data_tx, *data_rx, *symbols; uint8_t *data_tx, *data_rx, *data_rx_bytes, *symbols;
uint32_t i, j; uint32_t i, j;
float var[SNR_POINTS]; float var[SNR_POINTS];
uint32_t snr_points; uint32_t snr_points;
@ -157,6 +157,11 @@ int main(int argc, char **argv) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
} }
data_rx_bytes = srslte_vec_malloc(frame_length * sizeof(uint8_t));
if (!data_rx_bytes) {
perror("malloc");
exit(-1);
}
symbols = srslte_vec_malloc(coded_length * sizeof(uint8_t)); symbols = srslte_vec_malloc(coded_length * sizeof(uint8_t));
if (!symbols) { if (!symbols) {
@ -248,12 +253,14 @@ int main(int argc, char **argv) {
gettimeofday(&tdata[1], NULL); gettimeofday(&tdata[1], NULL);
for (int k=0;k<nof_repetitions;k++) { for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_run_all(&tdec, llr, data_rx, t, frame_length); srslte_tdec_run_all(&tdec, llr, data_rx_bytes, t, frame_length);
} }
gettimeofday(&tdata[2], NULL); gettimeofday(&tdata[2], NULL);
get_time_interval(tdata); get_time_interval(tdata);
mean_usec = (float) mean_usec * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1; mean_usec = (float) mean_usec * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
srslte_bit_unpack_vector(data_rx_bytes, data_rx, frame_length);
errors += srslte_bit_diff(data_tx, data_rx, frame_length); errors += srslte_bit_diff(data_tx, data_rx, frame_length);
gettimeofday(&tdata[1], NULL); gettimeofday(&tdata[1], NULL);

@ -228,7 +228,7 @@ int main(int argc, char **argv) {
int r = srslte_pdsch_decode(&pdsch, &pdsch_cfg, &softbuffer_rx, slot_symbols[0], ce, 0, data); int r = srslte_pdsch_decode(&pdsch, &pdsch_cfg, &softbuffer_rx, slot_symbols[0], ce, 0, data);
gettimeofday(&t[2], NULL); gettimeofday(&t[2], NULL);
get_time_interval(t); get_time_interval(t);
printf("DECODED %d in %d:%d (%.2f Mbps)\n", r?"Error":"OK", printf("DECODED %s in %d:%d (%.2f Mbps)\n", r?"Error":"OK",
(int) t[0].tv_sec, (int) t[0].tv_usec, (float) grant.mcs.tbs/t[0].tv_usec); (int) t[0].tv_sec, (int) t[0].tv_usec, (float) grant.mcs.tbs/t[0].tv_usec);
if (r) { if (r) {
ret = -1; ret = -1;

@ -241,26 +241,30 @@ void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
} }
void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) { void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
#ifdef HAVE_VOLK_CONVERT_FI_FUNCTION #ifndef HAVE_VECTOR_SIMD
volk_32f_s32f_convert_16i(z, x, scale, len);
#else
int i; int i;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = (int16_t) (x[i]*scale); z[i] = (int16_t) (x[i]*scale);
} }
#else
srslte_vec_convert_fi_simd(x, z, scale, len);
#endif #endif
} }
void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len) { void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len) {
for (int i=0;i<len;i++) { for (int i=0;i<len;i++) {
y[i] = x[lut[i]]; y[lut[i]] = x[i];
} }
} }
void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) { void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
#ifndef HAVE_VECTOR_SIMD
for (int i=0;i<len;i++) { for (int i=0;i<len;i++) {
y[i] = x[lut[i]]; y[lut[i]] = x[i];
} }
#else
srslte_vec_lut_sss_simd(x, lut, y, len);
#endif
} }
void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) { void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) {

@ -135,3 +135,82 @@ void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
} }
} }
void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len)
{
unsigned int number = 0;
const unsigned int points = len / 8;
const __m128i* xPtr = (const __m128i*) x;
const __m128i* lutPtr = (__m128i*) lut;
__m128i xVal, lutVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
lutVal = _mm_load_si128(lutPtr);
for (int i=0;i<8;i++) {
uint16_t x = (uint16_t) _mm_extract_epi16(xVal, i);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, i);
y[l] = x;
}
xPtr ++;
lutPtr ++;
}
number = points * 8;
for(;number < len; number++){
y[lut[number]] = x[number];
}
}
/* Modified from volk_32f_s32f_convert_16i_a_sse2. Removed clipping */
void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
{
unsigned int number = 0;
const unsigned int eighthPoints = len / 8;
const float* inputVectorPtr = (const float*)x;
int16_t* outputVectorPtr = z;
float min_val = -32768;
float max_val = 32767;
float r;
__m128 vScalar = _mm_set_ps1(scale);
__m128 inputVal1, inputVal2;
__m128i intInputVal1, intInputVal2;
__m128 ret1, ret2;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(;number < eighthPoints; number++){
inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
// Scale and clip
ret1 = _mm_mul_ps(inputVal1, vScalar);
ret2 = _mm_mul_ps(inputVal2, vScalar);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
}
number = eighthPoints * 8;
for(; number < num_points; number++){
r = inputVector[number] * scalar;
if(r > max_val)
r = max_val;
else if(r < min_val)
r = min_val;
outputVector[number] = (int16_t)rintf(r);
}
}
Loading…
Cancel
Save