From 08c67573edb87881ec39213a8070cd2a34ba93c9 Mon Sep 17 00:00:00 2001 From: ismagom Date: Sun, 22 Nov 2015 20:12:06 +0100 Subject: [PATCH] Added viterbi SIMD --- srslte/include/srslte/fec/viterbi.h | 12 ++ srslte/lib/fec/src/viterbi.c | 130 ++++++++++-- srslte/lib/fec/src/viterbi37.h | 21 +- srslte/lib/fec/src/viterbi37_port.c | 36 +++- srslte/lib/fec/src/viterbi37_sse.c | 294 ++++++++++++++++++++++++++++ srslte/lib/fec/test/CMakeLists.txt | 18 +- srslte/lib/fec/test/viterbi_test.c | 246 +++++++++-------------- srslte/lib/fec/test/viterbi_test.h | 41 ++-- srslte/lib/phch/test/pdsch_test.c | 43 ++-- 9 files changed, 637 insertions(+), 204 deletions(-) create mode 100644 srslte/lib/fec/src/viterbi37_sse.c diff --git a/srslte/include/srslte/fec/viterbi.h b/srslte/include/srslte/fec/viterbi.h index d31252e90..7493730f5 100644 --- a/srslte/include/srslte/fec/viterbi.h +++ b/srslte/include/srslte/fec/viterbi.h @@ -39,6 +39,8 @@ #include #include "srslte/config.h" + + typedef enum { SRSLTE_VITERBI_27 = 0, SRSLTE_VITERBI_29, @@ -82,4 +84,14 @@ SRSLTE_API int srslte_viterbi_decode_uc(srslte_viterbi_t *q, uint8_t *data, uint32_t frame_length); + + +SRSLTE_API int srslte_viterbi_init_sse(srslte_viterbi_t *q, + srslte_viterbi_type_t type, + uint32_t poly[3], + uint32_t max_frame_length, + bool tail_bitting); + + + #endif diff --git a/srslte/lib/fec/src/viterbi.c b/srslte/lib/fec/src/viterbi.c index 4731e1f5b..f960c334d 100644 --- a/srslte/lib/fec/src/viterbi.c +++ b/srslte/lib/fec/src/viterbi.c @@ -38,6 +38,8 @@ #define DEB 0 +#undef LV_HAVE_SSE + int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { srslte_viterbi_t *q = o; uint32_t i; @@ -54,16 +56,17 @@ int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { init_viterbi37_port(q->ptr, q->tail_biting ? -1 : 0); /* Decode block */ + uint8_t *tmp = q->tmp; if (q->tail_biting) { - memcpy(q->tmp, symbols, 3 * frame_length * sizeof(uint8_t)); + memcpy(tmp, symbols, 3 * frame_length * sizeof(uint8_t)); for (i = 0; i < 3 * (q->K - 1); i++) { q->tmp[i + 3 * frame_length] = q->tmp[i]; } } else { - q->tmp = symbols; + tmp = symbols; } - update_viterbi37_blk_port(q->ptr, q->tmp, frame_length + q->K - 1, + update_viterbi37_blk_port(q->ptr, tmp, frame_length + q->K - 1, q->tail_biting ? &best_state : NULL); /* Do Viterbi chainback */ @@ -73,6 +76,57 @@ int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { return q->framebits; } + +#ifdef LV_HAVE_SSE +int decode37_sse(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { + srslte_viterbi_t *q = o; + uint32_t i; + + uint32_t best_state; + + if (frame_length > q->framebits) { + fprintf(stderr, "Initialized decoder for max frame length %d bits\n", + q->framebits); + return -1; + } + + /* Initialize Viterbi decoder */ + init_viterbi37_sse(q->ptr, q->tail_biting ? -1 : 0); + + /* Decode block */ + uint8_t *tmp = q->tmp; + if (q->tail_biting) { + memcpy(tmp, symbols, 3 * frame_length * sizeof(uint8_t)); + for (i = 0; i < 3 * (q->K - 1); i++) { + q->tmp[i + 3 * frame_length] = q->tmp[i]; + } + } else { + tmp = symbols; + } + + update_viterbi37_blk_sse(q->ptr, tmp, frame_length + q->K - 1, + q->tail_biting ? &best_state : NULL); + + /* Do Viterbi chainback */ + chainback_viterbi37_sse(q->ptr, data, frame_length, + q->tail_biting ? best_state : 0); + + return q->framebits; +} + +void free37_sse(void *o) { + srslte_viterbi_t *q = o; + if (q->symbols_uc) { + free(q->symbols_uc); + } + if (q->tmp) { + free(q->tmp); + } + delete_viterbi37_sse(q->ptr); +} + +#endif + void free37(void *o) { srslte_viterbi_t *q = o; if (q->symbols_uc) { @@ -108,7 +162,7 @@ int init37(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool tail_ } else { q->tmp = NULL; } - + if ((q->ptr = create_viterbi37_port(poly, framebits)) == NULL) { fprintf(stderr, "create_viterbi37 failed\n"); free37(q); @@ -118,21 +172,68 @@ int init37(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool tail_ } } +#ifdef LV_HAVE_SSE +int init37_sse(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool tail_biting) { + q->K = 7; + q->R = 3; + q->framebits = framebits; + q->gain_quant = 10; + q->tail_biting = tail_biting; + q->decode = decode37_sse; + q->free = free37_sse; + q->decode_f = NULL; + q->symbols_uc = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t)); + if (!q->symbols_uc) { + perror("malloc"); + return -1; + } + if (q->tail_biting) { + q->tmp = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t)); + if (!q->tmp) { + perror("malloc"); + free37(q); + return -1; + } + } else { + q->tmp = NULL; + } + + if ((q->ptr = create_viterbi37_sse(poly, framebits)) == NULL) { + fprintf(stderr, "create_viterbi37 failed\n"); + free37(q); + return -1; + } else { + return 0; + } +} +#endif + void srslte_viterbi_set_gain_quant(srslte_viterbi_t *q, float gain_quant) { q->gain_quant = gain_quant; } -int srslte_viterbi_init(srslte_viterbi_t *q, srslte_viterbi_type_t type, uint32_t poly[3], - uint32_t max_frame_length, bool tail_bitting) { +int srslte_viterbi_init(srslte_viterbi_t *q, srslte_viterbi_type_t type, uint32_t poly[3], uint32_t max_frame_length, bool tail_bitting) +{ switch (type) { case SRSLTE_VITERBI_37: +#ifdef LV_HAVE_SSE + return init37_sse(q, poly, max_frame_length, tail_bitting); +#else return init37(q, poly, max_frame_length, tail_bitting); +#endif default: fprintf(stderr, "Decoder not implemented\n"); return -1; } } +#ifdef LV_HAVE_SSE +int srslte_viterbi_init_sse(srslte_viterbi_t *q, srslte_viterbi_type_t type, uint32_t poly[3], uint32_t max_frame_length, bool tail_bitting) +{ + return init37_sse(q, poly, max_frame_length, tail_bitting); +} +#endif + void srslte_viterbi_free(srslte_viterbi_t *q) { if (q->free) { q->free(q); @@ -141,7 +242,8 @@ void srslte_viterbi_free(srslte_viterbi_t *q) { } /* symbols are real-valued */ -int srslte_viterbi_decode_f(srslte_viterbi_t *q, float *symbols, uint8_t *data, uint32_t frame_length) { +int srslte_viterbi_decode_f(srslte_viterbi_t *q, float *symbols, uint8_t *data, uint32_t frame_length) +{ uint32_t len; if (frame_length > q->framebits) { fprintf(stderr, "Initialized decoder for max frame length %d bits\n", @@ -154,16 +256,16 @@ int srslte_viterbi_decode_f(srslte_viterbi_t *q, float *symbols, uint8_t *data, len = 3 * (frame_length + q->K - 1); } if (!q->decode_f) { - srslte_vec_quant_fuc(symbols, q->symbols_uc, q->gain_quant, 127.5, 255, len); - return q->decode(q, q->symbols_uc, data, frame_length); + srslte_vec_quant_fuc(symbols, q->symbols_uc, q->gain_quant, 127.5, 255, len); + srslte_vec_fprint_b(stdout, q->symbols_uc, len); + return srslte_viterbi_decode_uc(q, q->symbols_uc, data, frame_length); } else { return q->decode_f(q, symbols, data, frame_length); - } - - + } } -int srslte_viterbi_decode_uc(srslte_viterbi_t *q, uint8_t *symbols, uint8_t *data, - uint32_t frame_length) { + +int srslte_viterbi_decode_uc(srslte_viterbi_t *q, uint8_t *symbols, uint8_t *data, uint32_t frame_length) +{ return q->decode(q, symbols, data, frame_length); } diff --git a/srslte/lib/fec/src/viterbi37.h b/srslte/lib/fec/src/viterbi37.h index 28f9a60d0..dd325d7b4 100644 --- a/srslte/lib/fec/src/viterbi37.h +++ b/srslte/lib/fec/src/viterbi37.h @@ -30,7 +30,7 @@ void *create_viterbi37_port(uint32_t polys[3], uint32_t len); int init_viterbi37_port(void *p, - uint32_t starting_state); + int starting_state); int chainback_viterbi37_port(void *p, uint8_t *data, @@ -43,3 +43,22 @@ int update_viterbi37_blk_port(void *p, uint8_t *syms, uint32_t nbits, uint32_t *best_state); + + +void *create_viterbi37_sse(uint32_t polys[3], + uint32_t len); + +int init_viterbi37_sse(void *p, + int starting_state); + +int chainback_viterbi37_sse(void *p, + uint8_t *data, + uint32_t nbits, + uint32_t endstate); + +void delete_viterbi37_sse(void *p); + +int update_viterbi37_blk_sse(void *p, + uint8_t *syms, + uint32_t nbits, + uint32_t *best_state); diff --git a/srslte/lib/fec/src/viterbi37_port.c b/srslte/lib/fec/src/viterbi37_port.c index f7644f345..c55d95b4e 100644 --- a/srslte/lib/fec/src/viterbi37_port.c +++ b/srslte/lib/fec/src/viterbi37_port.c @@ -1,8 +1,8 @@ -/* Adapted Viterbi Phil Karn's r=1/3 k=9 viterbi decoder to r=1/3 k=7 +/* Adapted Phil Karn's r=1/3 k=9 viterbi decoder to r=1/3 k=7 * * K=9 r=1/3 Viterbi decoder in portable C * Copyright Aug 2006, Phil Karn, KA9Q - * May be used under the terms of the GNU Affero General Public License (LGPL) + * May be used under the terms of the GNU Lesser General Public License (LGPL) */ #include #include @@ -13,6 +13,8 @@ #include "parity.h" #include +//#define DEBUG + typedef union { uint32_t w[64]; } metric_t; @@ -21,7 +23,7 @@ typedef union { } decision_t; static union { - uint8_t c[128]; + uint8_t c[32]; } Branchtab37[3]; /* State info for instance of Viterbi decoder */ @@ -34,7 +36,7 @@ struct v37 { }; /* Initialize Viterbi decoder for start of new frame */ -int init_viterbi37_port(void *p, uint32_t starting_state) { +int init_viterbi37_port(void *p, int starting_state) { struct v37 *vp = p; uint32_t i; @@ -112,6 +114,9 @@ int chainback_viterbi37_port(void *p, uint8_t *data, /* Decoded output data */ k = (d[nbits].w[(endstate >> 2) / 32] >> ((endstate >> 2) % 32)) & 1; endstate = (endstate >> 1) | (k << 7); data[nbits] = k; +#ifdef DEBUG + // printf("endstate=%3d, k=%d, w[0]=%d, w[1]=%d\n", endstate, k, d[nbits].w[0]&1, d[nbits].w[1]&1); +#endif } return 0; } @@ -156,6 +161,11 @@ int update_viterbi37_blk_port(void *p, uint8_t *syms, uint32_t nbits, uint32_t * return -1; uint32_t k=0; d = (decision_t *) vp->dp; + +#ifdef DEBUG + printf("["); +#endif + while (nbits--) { void *tmp; uint8_t sym0, sym1, sym2; @@ -170,7 +180,20 @@ int update_viterbi37_blk_port(void *p, uint8_t *syms, uint32_t nbits, uint32_t * k++; for (i = 0; i < 32; i++) BFLY(i); + +#ifdef DEBUG + uint32_t wmin=UINT_MAX; + int minstate = 0; + for (int j=0;j<64;j++) { + if (vp->new_metrics->w[j] <= wmin) { + wmin = vp->new_metrics->w[j]; + minstate = j; + } + } + printf("%3d, ", minstate); +#endif + d++; tmp = vp->old_metrics; vp->old_metrics = vp->new_metrics; @@ -188,5 +211,10 @@ int update_viterbi37_blk_port(void *p, uint8_t *syms, uint32_t nbits, uint32_t * *best_state = bst; } vp->dp = d; + +#ifdef DEBUG + printf("];\n"); +#endif + return 0; } diff --git a/srslte/lib/fec/src/viterbi37_sse.c b/srslte/lib/fec/src/viterbi37_sse.c new file mode 100644 index 000000000..39c4bf5e2 --- /dev/null +++ b/srslte/lib/fec/src/viterbi37_sse.c @@ -0,0 +1,294 @@ +/* Adapted Phil Karn's r=1/3 k=9 viterbi decoder to r=1/3 k=7 + * + * K=15 r=1/6 Viterbi decoder for x86 SSE2 + * Copyright Mar 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#include +#include +#include +#include +#include +#include "parity.h" + +//#define DEBUG + +#ifdef LV_HAVE_SSE + +#include + +typedef union { + unsigned char c[64]; + __m128i v[4]; +} metric_t; +typedef union { + unsigned long w[2]; + unsigned char c[8]; + unsigned short s[4]; + __m64 v[1]; +} decision_t; + +union branchtab27 { + unsigned char c[32]; + __m128i v[2]; +} Branchtab37_sse2[3]; + + +/* State info for instance of Viterbi decoder */ +struct v37 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +void set_viterbi37_polynomial_sse(uint32_t polys[3]) { + int state; + + for(state=0;state < 32;state++){ + Branchtab37_sse2[0].c[state] = (polys[0] < 0) ^ parity((2*state) & polys[0]) ? 255:0; + Branchtab37_sse2[1].c[state] = (polys[1] < 0) ^ parity((2*state) & polys[1]) ? 255:0; + Branchtab37_sse2[2].c[state] = (polys[2] < 0) ^ parity((2*state) & polys[2]) ? 255:0; + } +} + + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi37_sse(void *p, int starting_state) { + struct v37 *vp = p; + uint32_t i; + + for(i=0;i<64;i++) + vp->metrics1.c[i] = 63; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + if (starting_state != -1) { + vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ + } + return 0; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi37_sse(uint32_t polys[3], uint32_t len) { + void *p; + struct v37 *vp; + + set_viterbi37_polynomial_sse(polys); + + /* Ordinary malloc() only returns 8-byte alignment, we need 16 */ + if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v37))) + return NULL; + + vp = (struct v37 *)p; + if(posix_memalign(&p, sizeof(__m128i),(len+6)*sizeof(decision_t))) { + free(vp); + return NULL; + } + vp->decisions = (decision_t *)p; + return vp; +} + + +/* Viterbi chainback */ +int chainback_viterbi37_sse( + void *p, + uint8_t *data, /* Decoded output data */ + uint32_t nbits, /* Number of data bits */ + uint32_t endstate) { /* Terminal encoder state */ + struct v37 *vp = p; + + if (p == NULL) + return -1; + + decision_t *d = (decision_t *)vp->decisions; + + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 64; + endstate <<= 2; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 6; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1; + endstate = (endstate >> 1) | (k << 7); + data[nbits] = k; +#ifdef DEBUG +// printf("endstate=%3d, k=%d, w[0]=%d, w[1]=%d\n", endstate, k, d[nbits].s[1]&1, d[nbits].s[2]&1); +#endif + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi37_sse(void *p){ + struct v37 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + +void print_128i(char *s, __m128i val) { + + printf("%s: ", s); + + uint8_t *x = (uint8_t*) &val; + for (int i=0;i<16;i++) { + printf("%3d, ", x[i]); + } + printf("\n"); +} + +void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *best_state) { + struct v37 *vp = p; + decision_t *d; + + if(p == NULL) + return; + +#ifdef DEBUG + printf("["); +#endif + + d = (decision_t *) vp->dp; + while(nbits--) { + __m128i sym0v,sym1v,sym2v; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + sym0v = _mm_set1_epi8(syms[0]); + sym1v = _mm_set1_epi8(syms[1]); + sym2v = _mm_set1_epi8(syms[2]); + syms += 3; + + for(i=0;i<2;i++){ + __m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics */ + m0 = _mm_avg_epu8(_mm_xor_si128(Branchtab37_sse2[0].v[i],sym0v),_mm_xor_si128(Branchtab37_sse2[1].v[i],sym1v)); + metric = _mm_avg_epu8(_mm_xor_si128(Branchtab37_sse2[2].v[i],sym2v),m0); + +#ifdef DEBUG + print_128i("metric_initial", metric); +#endif + /* There's no packed bytes right shift in SSE2, so we use the word version and mask + * (I'm *really* starting to like Altivec...) + */ + metric = _mm_srli_epi16(metric,3); + metric = _mm_and_si128(metric,_mm_set1_epi8(31)); + m_metric = _mm_sub_epi8(_mm_set1_epi8(31),metric); + +#ifdef DEBUG + print_128i("metric ", metric); + print_128i("m_metric ", m_metric); +#endif + + /* Add branch metrics to path metrics */ + m0 = _mm_add_epi8(vp->old_metrics->v[i],metric); + m3 = _mm_add_epi8(vp->old_metrics->v[2+i],metric); + m1 = _mm_add_epi8(vp->old_metrics->v[2+i],m_metric); + m2 = _mm_add_epi8(vp->old_metrics->v[i],m_metric); + + /* Compare and select, using modulo arithmetic */ + decision0 = _mm_cmpgt_epi8(_mm_sub_epi8(m0,m1),_mm_setzero_si128()); + decision1 = _mm_cmpgt_epi8(_mm_sub_epi8(m2,m3),_mm_setzero_si128()); + survivor0 = _mm_or_si128(_mm_and_si128(decision0,m1),_mm_andnot_si128(decision0,m0)); + survivor1 = _mm_or_si128(_mm_and_si128(decision1,m3),_mm_andnot_si128(decision1,m2)); + + /* Pack each set of decisions into 16 bits */ + d->s[2*i] = _mm_movemask_epi8(_mm_unpacklo_epi8(decision0,decision1)); + d->s[2*i+1] = _mm_movemask_epi8(_mm_unpackhi_epi8(decision0,decision1)); + + /* Store surviving metrics */ + vp->new_metrics->v[2*i] = _mm_unpacklo_epi8(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = _mm_unpackhi_epi8(survivor0,survivor1); + + } + + +#ifdef DEBUG + uint8_t wmin=UINT8_MAX; + int minstate = 0; + printf("[%d]: ", nbits); + for (int j=0;j<64;j++) { + printf("%d, ", vp->new_metrics->c[j]); + if (vp->new_metrics->c[j] <= wmin) { + wmin = vp->new_metrics->c[j]; + minstate = j; + } + } + printf("\n"); + printf("%3d, ",minstate); +#endif + + // See if we need to normalize + if (vp->new_metrics->c[0] > 100) { + int i; + uint8_t adjust; + __m128i adjustv; + union { __m128i v; signed short w[8]; } t; + + adjustv = vp->new_metrics->v[0]; + for(i=1;i<4;i++) { + adjustv = _mm_min_epu8(adjustv,vp->new_metrics->v[i]); + } + + adjustv = _mm_min_epu8(adjustv,_mm_srli_si128(adjustv,8)); + adjustv = _mm_min_epu8(adjustv,_mm_srli_si128(adjustv,4)); + adjustv = _mm_min_epu8(adjustv,_mm_srli_si128(adjustv,2)); + + t.v = adjustv; + adjust = t.w[0]; + adjustv = _mm_set1_epi8(adjust); + + /* We cannot use a saturated subtract, because we often have to adjust by more than SHRT_MAX + * This is okay since it can't overflow anyway + */ + for(i=0;i<4;i++) + vp->new_metrics->v[i] = _mm_sub_epi8(vp->new_metrics->v[i],adjustv); + } + + + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + + if (best_state) { + uint32_t i, bst=0; + uint8_t minmetric=UINT8_MAX; + for (i=0;i<64;i++) { + if (vp->old_metrics->c[i] <= minmetric) { + bst = i; + minmetric = vp->old_metrics->c[i]; + } + } + *best_state = bst; + } + + #ifdef DEBUG + printf("];\n===========================================\n"); +#endif + + vp->dp = d; +} + +#endif + + + diff --git a/srslte/lib/fec/test/CMakeLists.txt b/srslte/lib/fec/test/CMakeLists.txt index b76a67152..b87076ec9 100644 --- a/srslte/lib/fec/test/CMakeLists.txt +++ b/srslte/lib/fec/test/CMakeLists.txt @@ -61,15 +61,15 @@ ADD_TEST(turbocoder_test_all turbocoder_test) ADD_EXECUTABLE(viterbi_test viterbi_test.c) TARGET_LINK_LIBRARIES(viterbi_test srslte) -ADD_TEST(viterbi_40_0 viterbi_test -n 1000 -s 1 -l 40 -k 7 -t -e 0.0) -ADD_TEST(viterbi_40_2 viterbi_test -n 1000 -s 1 -l 40 -k 7 -t -e 2.0) -ADD_TEST(viterbi_40_3 viterbi_test -n 1000 -s 1 -l 40 -k 7 -t -e 3.0) -ADD_TEST(viterbi_40_4 viterbi_test -n 1000 -s 1 -l 40 -k 7 -t -e 4.5) - -ADD_TEST(viterbi_1000_0 viterbi_test -n 100 -s 1 -l 1000 -k 7 -t -e 0.0) -ADD_TEST(viterbi_1000_2 viterbi_test -n 100 -s 1 -l 1000 -k 7 -t -e 2.0) -ADD_TEST(viterbi_1000_3 viterbi_test -n 100 -s 1 -l 1000 -k 7 -t -e 3.0) -ADD_TEST(viterbi_1000_4 viterbi_test -n 100 -s 1 -l 1000 -k 7 -t -e 4.5) +ADD_TEST(viterbi_40_0 viterbi_test -n 1000 -s 1 -l 40 -t -e 0.0) +ADD_TEST(viterbi_40_2 viterbi_test -n 1000 -s 1 -l 40 -t -e 2.0) +ADD_TEST(viterbi_40_3 viterbi_test -n 1000 -s 1 -l 40 -t -e 3.0) +ADD_TEST(viterbi_40_4 viterbi_test -n 1000 -s 1 -l 40 -t -e 4.5) + +ADD_TEST(viterbi_1000_0 viterbi_test -n 100 -s 1 -l 1000 -t -e 0.0) +ADD_TEST(viterbi_1000_2 viterbi_test -n 100 -s 1 -l 1000 -t -e 2.0) +ADD_TEST(viterbi_1000_3 viterbi_test -n 100 -s 1 -l 1000 -t -e 3.0) +ADD_TEST(viterbi_1000_4 viterbi_test -n 100 -s 1 -l 1000 -t -e 4.5) BuildMex(MEXNAME viterbi SOURCES viterbi_test_mex.c LIBRARIES srslte srslte_mex) diff --git a/srslte/lib/fec/test/viterbi_test.c b/srslte/lib/fec/test/viterbi_test.c index 06c47436f..9354e226e 100644 --- a/srslte/lib/fec/test/viterbi_test.c +++ b/srslte/lib/fec/test/viterbi_test.c @@ -37,33 +37,27 @@ #include "viterbi_test.h" - int frame_length = 1000, nof_frames = 128; float ebno_db = 100.0; uint32_t seed = 0; bool tail_biting = false; -int K = -1; #define SNR_POINTS 10 #define SNR_MIN 0.0 #define SNR_MAX 5.0 -#define NCODS 3 -#define NTYPES 1+NCODS - void usage(char *prog) { - printf("Usage: %s [nlestk]\n", prog); + printf("Usage: %s [nlest]\n", prog); printf("\t-n nof_frames [Default %d]\n", nof_frames); printf("\t-l frame_length [Default %d]\n", frame_length); printf("\t-e ebno in dB [Default scan]\n"); printf("\t-s seed [Default 0=time]\n"); printf("\t-t tail_bitting [Default %s]\n", tail_biting ? "yes" : "no"); - printf("\t-k constraint length [Default both]\n", K); } void parse_args(int argc, char **argv) { int opt; - while ((opt = getopt(argc, argv, "nlstek")) != -1) { + while ((opt = getopt(argc, argv, "nlste")) != -1) { switch (opt) { case 'n': nof_frames = atoi(argv[optind]); @@ -80,9 +74,6 @@ void parse_args(int argc, char **argv) { case 't': tail_biting = true; break; - case 'k': - K = atoi(argv[optind]); - break; default: usage(argv[0]); exit(-1); @@ -90,49 +81,22 @@ void parse_args(int argc, char **argv) { } } -void output_matlab(float ber[NTYPES][SNR_POINTS], int snr_points, - srslte_convcoder_t cod[NCODS], int ncods) { - int i, j, n; - FILE *f = fopen("srslte_viterbi_snr.m", "w"); - if (!f) { - perror("fopen"); - exit(-1); - } - fprintf(f, "ber=["); - for (j = 0; j < NTYPES; j++) { - for (i = 0; i < snr_points; i++) { - fprintf(f, "%g ", ber[j][i]); - } - fprintf(f, "; "); - } - fprintf(f, "];\n"); - fprintf(f, "snr=linspace(%g,%g-%g/%d,%d);\n", SNR_MIN, SNR_MAX, SNR_MAX, - snr_points, snr_points); - fprintf(f, "semilogy(snr,ber,snr,0.5*erfc(sqrt(10.^(snr/10))));\n"); - fprintf(f, "legend('uncoded',"); - for (n=0;n max_coded_length) { - max_coded_length = coded_length[i]; - } - srslte_viterbi_init(&dec[i], srslte_viterbi_type[i], cod[i].poly, frame_length, cod[i].tail_biting); - printf("Convolutional Code 1/3 K=%d Tail bitting: %s\n", cod[i].K, cod[i].tail_biting ? "yes" : "no"); - } - + cod.poly[0] = 0x6D; + cod.poly[1] = 0x4F; + cod.poly[2] = 0x57; + cod.K = 7; + cod.tail_biting = tail_biting; + + cod.R = 3; + coded_length = cod.R * (frame_length + ((cod.tail_biting) ? 0 : cod.K - 1)); + srslte_viterbi_init(&dec, SRSLTE_VITERBI_37, cod.poly, frame_length, cod.tail_biting); + printf("Convolutional Code 1/3 K=%d Tail bitting: %s\n", cod.K, cod.tail_biting ? "yes" : "no"); + +#ifdef TEST_SSE + srslte_viterbi_init_sse(&dec_sse, SRSLTE_VITERBI_37, cod.poly, frame_length, cod.tail_biting); +#endif + printf(" Frame length: %d\n", frame_length); if (ebno_db < 100.0) { printf(" EbNo: %.2f\n", ebno_db); @@ -205,25 +132,29 @@ int main(int argc, char **argv) { exit(-1); } - for (i = 0; i < NTYPES; i++) { - data_rx[i] = malloc(frame_length * sizeof(uint8_t)); - if (!data_rx[i]) { - perror("malloc"); - exit(-1); - } + data_rx = malloc(frame_length * sizeof(uint8_t)); + if (!data_rx) { + perror("malloc"); + exit(-1); } - symbols = malloc(max_coded_length * sizeof(uint8_t)); + data_rx2 = malloc(frame_length * sizeof(uint8_t)); + if (!data_rx2) { + perror("malloc"); + exit(-1); + } + + symbols = malloc(coded_length * sizeof(uint8_t)); if (!symbols) { perror("malloc"); exit(-1); } - llr = malloc(max_coded_length * sizeof(float)); + llr = malloc(coded_length * sizeof(float)); if (!llr) { perror("malloc"); exit(-1); } - llr_c = malloc(2 * max_coded_length * sizeof(uint8_t)); + llr_c = malloc(2 * coded_length * sizeof(uint8_t)); if (!llr_c) { perror("malloc"); exit(-1); @@ -250,9 +181,10 @@ int main(int argc, char **argv) { for (i = 0; i < snr_points; i++) { frame_cnt = 0; - for (j = 0; j < NTYPES; j++) { - errors[j] = 0; - } + errors = 0; +#ifdef TEST_SSE + errors2 = 0; +#endif while (frame_cnt < nof_frames) { /* generate data_tx */ @@ -265,76 +197,84 @@ int main(int argc, char **argv) { llr[j] = data_tx[j] ? sqrt(2) : -sqrt(2); } srslte_ch_awgn_f(llr, llr, varunc[i], frame_length); - for (j = 0; j < frame_length; j++) { - data_rx[0][j] = llr[j] > 0 ? 1 : 0; - } /* coded BER */ - for (n=0;n expected_errors); + printf("errors =%d, expected =%d\n", errors, expected_errors); + exit(errors > expected_errors); } } else { printf("\n"); - output_matlab(ber, snr_points, cod, ncods); printf("Done\n"); exit(0); } diff --git a/srslte/lib/fec/test/viterbi_test.h b/srslte/lib/fec/test/viterbi_test.h index 9c9e5d6ab..7d94e7c7e 100644 --- a/srslte/lib/fec/test/viterbi_test.h +++ b/srslte/lib/fec/test/viterbi_test.h @@ -30,35 +30,52 @@ typedef struct { int n; uint32_t s; int len; - int k; bool tail; float ebno; int errors; }expected_errors_t; +/* The SSE implementation uses 5-bit metrics and has 0.75 dB loss approximation */ +#ifdef LV_HAVE_SSE static expected_errors_t expected_errors[] = { - {1000, 1, 40, 7, true, 0.0, 5363}, - {1000, 1, 40, 7, true, 2.0, 356}, - {1000, 1, 40, 7, true, 3.0, 48}, - {1000, 1, 40, 7, true, 4.5, 0}, + {1000, 1, 40, true, 0.0, 7282}, + {1000, 1, 40, true, 2.0, 725}, + {1000, 1, 40, true, 3.0, 176}, + {1000, 1, 40, true, 4.5, 24}, - {100, 1, 1000, 7, true, 0.0, 8753}, - {100, 1, 1000, 7, true, 2.0, 350}, - {100, 1, 1000, 7, true, 3.0, 33}, - {100, 1, 1000, 7, true, 4.5, 0}, + {100, 1, 1000, true, 0.0, 13208}, + {100, 1, 1000, true, 2.0, 939}, + {100, 1, 1000, true, 3.0, 110}, + {100, 1, 1000, true, 4.5, 5}, - {-1, -1, -1, -1, true, -1.0, -1} + {-1, -1, -1, true, -1.0, -1} }; -int get_expected_errors(int n, uint32_t s, int len, int k, bool tail, float ebno) { +#else + +static expected_errors_t expected_errors[] = { + {1000, 1, 40, true, 0.0, 5363}, + {1000, 1, 40, true, 2.0, 356}, + {1000, 1, 40, true, 3.0, 48}, + {1000, 1, 40, true, 4.5, 0}, + + {100, 1, 1000, true, 0.0, 8753}, + {100, 1, 1000, true, 2.0, 350}, + {100, 1, 1000, true, 3.0, 33}, + {100, 1, 1000, true, 4.5, 0}, + + {-1, -1, -1, true, -1.0, -1} +}; + +#endif +int get_expected_errors(int n, uint32_t s, int len, bool tail, float ebno) { int i; i=0; while(expected_errors[i].n != -1) { if (expected_errors[i].n == n && expected_errors[i].s == s && expected_errors[i].len == len - && expected_errors[i].k == k && expected_errors[i].tail == tail && expected_errors[i].ebno == ebno) { break; diff --git a/srslte/lib/phch/test/pdsch_test.c b/srslte/lib/phch/test/pdsch_test.c index afd48ac7c..8ad2737ab 100644 --- a/srslte/lib/phch/test/pdsch_test.c +++ b/srslte/lib/phch/test/pdsch_test.c @@ -105,17 +105,27 @@ void parse_args(int argc, char **argv) { } } +uint8_t *data = NULL; +cf_t *ce[SRSLTE_MAX_PORTS]; +srslte_softbuffer_rx_t softbuffer_rx; +srslte_ra_dl_grant_t grant; +srslte_pdsch_cfg_t pdsch_cfg; +cf_t *sf_symbols; +cf_t *slot_symbols[SRSLTE_MAX_PORTS]; +srslte_pdsch_t pdsch; +srslte_ofdm_t ofdm_tx, ofdm_rx; + +int dummy_function() { + srslte_ofdm_rx_sf(&ofdm_rx, sf_symbols, slot_symbols[1]); + srslte_softbuffer_rx_reset_tbs(&softbuffer_rx, grant.mcs.tbs); + return srslte_pdsch_decode(&pdsch, &pdsch_cfg, &softbuffer_rx, slot_symbols[0], ce, 0, data); +} + int main(int argc, char **argv) { - srslte_pdsch_t pdsch; uint32_t i, j; - uint8_t *data = NULL; - cf_t *ce[SRSLTE_MAX_PORTS]; - cf_t *slot_symbols[SRSLTE_MAX_PORTS]; int ret = -1; struct timeval t[3]; - srslte_pdsch_cfg_t pdsch_cfg; srslte_softbuffer_tx_t softbuffer_tx; - srslte_softbuffer_rx_t softbuffer_rx; uint32_t rv; parse_args(argc,argv); @@ -132,12 +142,16 @@ int main(int argc, char **argv) { dci.mcs_idx = mcs; dci.rv_idx = rv_idx; dci.type0_alloc.rbg_bitmask = 0xffffffff; - srslte_ra_dl_grant_t grant; if (srslte_ra_dl_dci_to_grant(&dci, cell.nof_prb, true, &grant)) { fprintf(stderr, "Error computing resource allocation\n"); return ret; } + + srslte_ofdm_tx_init(&ofdm_tx, cell.cp, cell.nof_prb); + srslte_ofdm_rx_init(&ofdm_rx, cell.cp, cell.nof_prb); + sf_symbols=srslte_vec_malloc(sizeof(cf_t)*SRSLTE_SF_LEN_PRB(cell.nof_prb)); + /* Configure PDSCH */ if (srslte_pdsch_cfg(&pdsch_cfg, cell, &grant, cfi, subframe, 0)) { fprintf(stderr, "Error configuring PDSCH\n"); @@ -145,7 +159,7 @@ int main(int argc, char **argv) { } /* init memory */ - for (i=0;i