Added viterbi SIMD

master
ismagom 9 years ago
parent 13d6926737
commit 08c67573ed

@ -39,6 +39,8 @@
#include <stdbool.h> #include <stdbool.h>
#include "srslte/config.h" #include "srslte/config.h"
typedef enum { typedef enum {
SRSLTE_VITERBI_27 = 0, SRSLTE_VITERBI_27 = 0,
SRSLTE_VITERBI_29, SRSLTE_VITERBI_29,
@ -82,4 +84,14 @@ SRSLTE_API int srslte_viterbi_decode_uc(srslte_viterbi_t *q,
uint8_t *data, uint8_t *data,
uint32_t frame_length); uint32_t frame_length);
SRSLTE_API int srslte_viterbi_init_sse(srslte_viterbi_t *q,
srslte_viterbi_type_t type,
uint32_t poly[3],
uint32_t max_frame_length,
bool tail_bitting);
#endif #endif

@ -38,6 +38,8 @@
#define DEB 0 #define DEB 0
#undef LV_HAVE_SSE
int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
srslte_viterbi_t *q = o; srslte_viterbi_t *q = o;
uint32_t i; uint32_t i;
@ -54,16 +56,17 @@ int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
init_viterbi37_port(q->ptr, q->tail_biting ? -1 : 0); init_viterbi37_port(q->ptr, q->tail_biting ? -1 : 0);
/* Decode block */ /* Decode block */
uint8_t *tmp = q->tmp;
if (q->tail_biting) { if (q->tail_biting) {
memcpy(q->tmp, symbols, 3 * frame_length * sizeof(uint8_t)); memcpy(tmp, symbols, 3 * frame_length * sizeof(uint8_t));
for (i = 0; i < 3 * (q->K - 1); i++) { for (i = 0; i < 3 * (q->K - 1); i++) {
q->tmp[i + 3 * frame_length] = q->tmp[i]; q->tmp[i + 3 * frame_length] = q->tmp[i];
} }
} else { } else {
q->tmp = symbols; tmp = symbols;
} }
update_viterbi37_blk_port(q->ptr, q->tmp, frame_length + q->K - 1, update_viterbi37_blk_port(q->ptr, tmp, frame_length + q->K - 1,
q->tail_biting ? &best_state : NULL); q->tail_biting ? &best_state : NULL);
/* Do Viterbi chainback */ /* Do Viterbi chainback */
@ -73,6 +76,57 @@ int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
return q->framebits; return q->framebits;
} }
#ifdef LV_HAVE_SSE
int decode37_sse(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
srslte_viterbi_t *q = o;
uint32_t i;
uint32_t best_state;
if (frame_length > q->framebits) {
fprintf(stderr, "Initialized decoder for max frame length %d bits\n",
q->framebits);
return -1;
}
/* Initialize Viterbi decoder */
init_viterbi37_sse(q->ptr, q->tail_biting ? -1 : 0);
/* Decode block */
uint8_t *tmp = q->tmp;
if (q->tail_biting) {
memcpy(tmp, symbols, 3 * frame_length * sizeof(uint8_t));
for (i = 0; i < 3 * (q->K - 1); i++) {
q->tmp[i + 3 * frame_length] = q->tmp[i];
}
} else {
tmp = symbols;
}
update_viterbi37_blk_sse(q->ptr, tmp, frame_length + q->K - 1,
q->tail_biting ? &best_state : NULL);
/* Do Viterbi chainback */
chainback_viterbi37_sse(q->ptr, data, frame_length,
q->tail_biting ? best_state : 0);
return q->framebits;
}
void free37_sse(void *o) {
srslte_viterbi_t *q = o;
if (q->symbols_uc) {
free(q->symbols_uc);
}
if (q->tmp) {
free(q->tmp);
}
delete_viterbi37_sse(q->ptr);
}
#endif
void free37(void *o) { void free37(void *o) {
srslte_viterbi_t *q = o; srslte_viterbi_t *q = o;
if (q->symbols_uc) { if (q->symbols_uc) {
@ -118,21 +172,68 @@ int init37(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool tail_
} }
} }
#ifdef LV_HAVE_SSE
int init37_sse(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool tail_biting) {
q->K = 7;
q->R = 3;
q->framebits = framebits;
q->gain_quant = 10;
q->tail_biting = tail_biting;
q->decode = decode37_sse;
q->free = free37_sse;
q->decode_f = NULL;
q->symbols_uc = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t));
if (!q->symbols_uc) {
perror("malloc");
return -1;
}
if (q->tail_biting) {
q->tmp = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t));
if (!q->tmp) {
perror("malloc");
free37(q);
return -1;
}
} else {
q->tmp = NULL;
}
if ((q->ptr = create_viterbi37_sse(poly, framebits)) == NULL) {
fprintf(stderr, "create_viterbi37 failed\n");
free37(q);
return -1;
} else {
return 0;
}
}
#endif
void srslte_viterbi_set_gain_quant(srslte_viterbi_t *q, float gain_quant) { void srslte_viterbi_set_gain_quant(srslte_viterbi_t *q, float gain_quant) {
q->gain_quant = gain_quant; q->gain_quant = gain_quant;
} }
int srslte_viterbi_init(srslte_viterbi_t *q, srslte_viterbi_type_t type, uint32_t poly[3], int srslte_viterbi_init(srslte_viterbi_t *q, srslte_viterbi_type_t type, uint32_t poly[3], uint32_t max_frame_length, bool tail_bitting)
uint32_t max_frame_length, bool tail_bitting) { {
switch (type) { switch (type) {
case SRSLTE_VITERBI_37: case SRSLTE_VITERBI_37:
#ifdef LV_HAVE_SSE
return init37_sse(q, poly, max_frame_length, tail_bitting);
#else
return init37(q, poly, max_frame_length, tail_bitting); return init37(q, poly, max_frame_length, tail_bitting);
#endif
default: default:
fprintf(stderr, "Decoder not implemented\n"); fprintf(stderr, "Decoder not implemented\n");
return -1; return -1;
} }
} }
#ifdef LV_HAVE_SSE
int srslte_viterbi_init_sse(srslte_viterbi_t *q, srslte_viterbi_type_t type, uint32_t poly[3], uint32_t max_frame_length, bool tail_bitting)
{
return init37_sse(q, poly, max_frame_length, tail_bitting);
}
#endif
void srslte_viterbi_free(srslte_viterbi_t *q) { void srslte_viterbi_free(srslte_viterbi_t *q) {
if (q->free) { if (q->free) {
q->free(q); q->free(q);
@ -141,7 +242,8 @@ void srslte_viterbi_free(srslte_viterbi_t *q) {
} }
/* symbols are real-valued */ /* symbols are real-valued */
int srslte_viterbi_decode_f(srslte_viterbi_t *q, float *symbols, uint8_t *data, uint32_t frame_length) { int srslte_viterbi_decode_f(srslte_viterbi_t *q, float *symbols, uint8_t *data, uint32_t frame_length)
{
uint32_t len; uint32_t len;
if (frame_length > q->framebits) { if (frame_length > q->framebits) {
fprintf(stderr, "Initialized decoder for max frame length %d bits\n", fprintf(stderr, "Initialized decoder for max frame length %d bits\n",
@ -155,15 +257,15 @@ int srslte_viterbi_decode_f(srslte_viterbi_t *q, float *symbols, uint8_t *data,
} }
if (!q->decode_f) { if (!q->decode_f) {
srslte_vec_quant_fuc(symbols, q->symbols_uc, q->gain_quant, 127.5, 255, len); srslte_vec_quant_fuc(symbols, q->symbols_uc, q->gain_quant, 127.5, 255, len);
return q->decode(q, q->symbols_uc, data, frame_length); srslte_vec_fprint_b(stdout, q->symbols_uc, len);
return srslte_viterbi_decode_uc(q, q->symbols_uc, data, frame_length);
} else { } else {
return q->decode_f(q, symbols, data, frame_length); return q->decode_f(q, symbols, data, frame_length);
} }
} }
int srslte_viterbi_decode_uc(srslte_viterbi_t *q, uint8_t *symbols, uint8_t *data,
uint32_t frame_length) { int srslte_viterbi_decode_uc(srslte_viterbi_t *q, uint8_t *symbols, uint8_t *data, uint32_t frame_length)
{
return q->decode(q, symbols, data, frame_length); return q->decode(q, symbols, data, frame_length);
} }

@ -30,7 +30,7 @@ void *create_viterbi37_port(uint32_t polys[3],
uint32_t len); uint32_t len);
int init_viterbi37_port(void *p, int init_viterbi37_port(void *p,
uint32_t starting_state); int starting_state);
int chainback_viterbi37_port(void *p, int chainback_viterbi37_port(void *p,
uint8_t *data, uint8_t *data,
@ -43,3 +43,22 @@ int update_viterbi37_blk_port(void *p,
uint8_t *syms, uint8_t *syms,
uint32_t nbits, uint32_t nbits,
uint32_t *best_state); uint32_t *best_state);
void *create_viterbi37_sse(uint32_t polys[3],
uint32_t len);
int init_viterbi37_sse(void *p,
int starting_state);
int chainback_viterbi37_sse(void *p,
uint8_t *data,
uint32_t nbits,
uint32_t endstate);
void delete_viterbi37_sse(void *p);
int update_viterbi37_blk_sse(void *p,
uint8_t *syms,
uint32_t nbits,
uint32_t *best_state);

@ -1,8 +1,8 @@
/* Adapted Viterbi Phil Karn's r=1/3 k=9 viterbi decoder to r=1/3 k=7 /* Adapted Phil Karn's r=1/3 k=9 viterbi decoder to r=1/3 k=7
* *
* K=9 r=1/3 Viterbi decoder in portable C * K=9 r=1/3 Viterbi decoder in portable C
* Copyright Aug 2006, Phil Karn, KA9Q * Copyright Aug 2006, Phil Karn, KA9Q
* May be used under the terms of the GNU Affero General Public License (LGPL) * May be used under the terms of the GNU Lesser General Public License (LGPL)
*/ */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -13,6 +13,8 @@
#include "parity.h" #include "parity.h"
#include <limits.h> #include <limits.h>
//#define DEBUG
typedef union { typedef union {
uint32_t w[64]; uint32_t w[64];
} metric_t; } metric_t;
@ -21,7 +23,7 @@ typedef union {
} decision_t; } decision_t;
static union { static union {
uint8_t c[128]; uint8_t c[32];
} Branchtab37[3]; } Branchtab37[3];
/* State info for instance of Viterbi decoder */ /* State info for instance of Viterbi decoder */
@ -34,7 +36,7 @@ struct v37 {
}; };
/* Initialize Viterbi decoder for start of new frame */ /* Initialize Viterbi decoder for start of new frame */
int init_viterbi37_port(void *p, uint32_t starting_state) { int init_viterbi37_port(void *p, int starting_state) {
struct v37 *vp = p; struct v37 *vp = p;
uint32_t i; uint32_t i;
@ -112,6 +114,9 @@ int chainback_viterbi37_port(void *p, uint8_t *data, /* Decoded output data */
k = (d[nbits].w[(endstate >> 2) / 32] >> ((endstate >> 2) % 32)) & 1; k = (d[nbits].w[(endstate >> 2) / 32] >> ((endstate >> 2) % 32)) & 1;
endstate = (endstate >> 1) | (k << 7); endstate = (endstate >> 1) | (k << 7);
data[nbits] = k; data[nbits] = k;
#ifdef DEBUG
// printf("endstate=%3d, k=%d, w[0]=%d, w[1]=%d\n", endstate, k, d[nbits].w[0]&1, d[nbits].w[1]&1);
#endif
} }
return 0; return 0;
} }
@ -156,6 +161,11 @@ int update_viterbi37_blk_port(void *p, uint8_t *syms, uint32_t nbits, uint32_t *
return -1; return -1;
uint32_t k=0; uint32_t k=0;
d = (decision_t *) vp->dp; d = (decision_t *) vp->dp;
#ifdef DEBUG
printf("[");
#endif
while (nbits--) { while (nbits--) {
void *tmp; void *tmp;
uint8_t sym0, sym1, sym2; uint8_t sym0, sym1, sym2;
@ -171,6 +181,19 @@ int update_viterbi37_blk_port(void *p, uint8_t *syms, uint32_t nbits, uint32_t *
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
BFLY(i); BFLY(i);
#ifdef DEBUG
uint32_t wmin=UINT_MAX;
int minstate = 0;
for (int j=0;j<64;j++) {
if (vp->new_metrics->w[j] <= wmin) {
wmin = vp->new_metrics->w[j];
minstate = j;
}
}
printf("%3d, ", minstate);
#endif
d++; d++;
tmp = vp->old_metrics; tmp = vp->old_metrics;
vp->old_metrics = vp->new_metrics; vp->old_metrics = vp->new_metrics;
@ -188,5 +211,10 @@ int update_viterbi37_blk_port(void *p, uint8_t *syms, uint32_t nbits, uint32_t *
*best_state = bst; *best_state = bst;
} }
vp->dp = d; vp->dp = d;
#ifdef DEBUG
printf("];\n");
#endif
return 0; return 0;
} }

@ -0,0 +1,294 @@
/* Adapted Phil Karn's r=1/3 k=9 viterbi decoder to r=1/3 k=7
*
* K=15 r=1/6 Viterbi decoder for x86 SSE2
* Copyright Mar 2004, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <memory.h>
#include <limits.h>
#include "parity.h"
//#define DEBUG
#ifdef LV_HAVE_SSE
#include <emmintrin.h>
typedef union {
unsigned char c[64];
__m128i v[4];
} metric_t;
typedef union {
unsigned long w[2];
unsigned char c[8];
unsigned short s[4];
__m64 v[1];
} decision_t;
union branchtab27 {
unsigned char c[32];
__m128i v[2];
} Branchtab37_sse2[3];
/* State info for instance of Viterbi decoder */
struct v37 {
metric_t metrics1; /* path metric buffer 1 */
metric_t metrics2; /* path metric buffer 2 */
decision_t *dp; /* Pointer to current decision */
metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
decision_t *decisions; /* Beginning of decisions for block */
};
void set_viterbi37_polynomial_sse(uint32_t polys[3]) {
int state;
for(state=0;state < 32;state++){
Branchtab37_sse2[0].c[state] = (polys[0] < 0) ^ parity((2*state) & polys[0]) ? 255:0;
Branchtab37_sse2[1].c[state] = (polys[1] < 0) ^ parity((2*state) & polys[1]) ? 255:0;
Branchtab37_sse2[2].c[state] = (polys[2] < 0) ^ parity((2*state) & polys[2]) ? 255:0;
}
}
/* Initialize Viterbi decoder for start of new frame */
int init_viterbi37_sse(void *p, int starting_state) {
struct v37 *vp = p;
uint32_t i;
for(i=0;i<64;i++)
vp->metrics1.c[i] = 63;
vp->old_metrics = &vp->metrics1;
vp->new_metrics = &vp->metrics2;
vp->dp = vp->decisions;
if (starting_state != -1) {
vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */
}
return 0;
}
/* Create a new instance of a Viterbi decoder */
void *create_viterbi37_sse(uint32_t polys[3], uint32_t len) {
void *p;
struct v37 *vp;
set_viterbi37_polynomial_sse(polys);
/* Ordinary malloc() only returns 8-byte alignment, we need 16 */
if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v37)))
return NULL;
vp = (struct v37 *)p;
if(posix_memalign(&p, sizeof(__m128i),(len+6)*sizeof(decision_t))) {
free(vp);
return NULL;
}
vp->decisions = (decision_t *)p;
return vp;
}
/* Viterbi chainback */
int chainback_viterbi37_sse(
void *p,
uint8_t *data, /* Decoded output data */
uint32_t nbits, /* Number of data bits */
uint32_t endstate) { /* Terminal encoder state */
struct v37 *vp = p;
if (p == NULL)
return -1;
decision_t *d = (decision_t *)vp->decisions;
/* Make room beyond the end of the encoder register so we can
* accumulate a full byte of decoded data
*/
endstate %= 64;
endstate <<= 2;
/* The store into data[] only needs to be done every 8 bits.
* But this avoids a conditional branch, and the writes will
* combine in the cache anyway
*/
d += 6; /* Look past tail */
while(nbits-- != 0){
int k;
k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1;
endstate = (endstate >> 1) | (k << 7);
data[nbits] = k;
#ifdef DEBUG
// printf("endstate=%3d, k=%d, w[0]=%d, w[1]=%d\n", endstate, k, d[nbits].s[1]&1, d[nbits].s[2]&1);
#endif
}
return 0;
}
/* Delete instance of a Viterbi decoder */
void delete_viterbi37_sse(void *p){
struct v37 *vp = p;
if(vp != NULL){
free(vp->decisions);
free(vp);
}
}
void print_128i(char *s, __m128i val) {
printf("%s: ", s);
uint8_t *x = (uint8_t*) &val;
for (int i=0;i<16;i++) {
printf("%3d, ", x[i]);
}
printf("\n");
}
void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *best_state) {
struct v37 *vp = p;
decision_t *d;
if(p == NULL)
return;
#ifdef DEBUG
printf("[");
#endif
d = (decision_t *) vp->dp;
while(nbits--) {
__m128i sym0v,sym1v,sym2v;
void *tmp;
int i;
/* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
sym0v = _mm_set1_epi8(syms[0]);
sym1v = _mm_set1_epi8(syms[1]);
sym2v = _mm_set1_epi8(syms[2]);
syms += 3;
for(i=0;i<2;i++){
__m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
/* Form branch metrics */
m0 = _mm_avg_epu8(_mm_xor_si128(Branchtab37_sse2[0].v[i],sym0v),_mm_xor_si128(Branchtab37_sse2[1].v[i],sym1v));
metric = _mm_avg_epu8(_mm_xor_si128(Branchtab37_sse2[2].v[i],sym2v),m0);
#ifdef DEBUG
print_128i("metric_initial", metric);
#endif
/* There's no packed bytes right shift in SSE2, so we use the word version and mask
* (I'm *really* starting to like Altivec...)
*/
metric = _mm_srli_epi16(metric,3);
metric = _mm_and_si128(metric,_mm_set1_epi8(31));
m_metric = _mm_sub_epi8(_mm_set1_epi8(31),metric);
#ifdef DEBUG
print_128i("metric ", metric);
print_128i("m_metric ", m_metric);
#endif
/* Add branch metrics to path metrics */
m0 = _mm_add_epi8(vp->old_metrics->v[i],metric);
m3 = _mm_add_epi8(vp->old_metrics->v[2+i],metric);
m1 = _mm_add_epi8(vp->old_metrics->v[2+i],m_metric);
m2 = _mm_add_epi8(vp->old_metrics->v[i],m_metric);
/* Compare and select, using modulo arithmetic */
decision0 = _mm_cmpgt_epi8(_mm_sub_epi8(m0,m1),_mm_setzero_si128());
decision1 = _mm_cmpgt_epi8(_mm_sub_epi8(m2,m3),_mm_setzero_si128());
survivor0 = _mm_or_si128(_mm_and_si128(decision0,m1),_mm_andnot_si128(decision0,m0));
survivor1 = _mm_or_si128(_mm_and_si128(decision1,m3),_mm_andnot_si128(decision1,m2));
/* Pack each set of decisions into 16 bits */
d->s[2*i] = _mm_movemask_epi8(_mm_unpacklo_epi8(decision0,decision1));
d->s[2*i+1] = _mm_movemask_epi8(_mm_unpackhi_epi8(decision0,decision1));
/* Store surviving metrics */
vp->new_metrics->v[2*i] = _mm_unpacklo_epi8(survivor0,survivor1);
vp->new_metrics->v[2*i+1] = _mm_unpackhi_epi8(survivor0,survivor1);
}
#ifdef DEBUG
uint8_t wmin=UINT8_MAX;
int minstate = 0;
printf("[%d]: ", nbits);
for (int j=0;j<64;j++) {
printf("%d, ", vp->new_metrics->c[j]);
if (vp->new_metrics->c[j] <= wmin) {
wmin = vp->new_metrics->c[j];
minstate = j;
}
}
printf("\n");
printf("%3d, ",minstate);
#endif
// See if we need to normalize
if (vp->new_metrics->c[0] > 100) {
int i;
uint8_t adjust;
__m128i adjustv;
union { __m128i v; signed short w[8]; } t;
adjustv = vp->new_metrics->v[0];
for(i=1;i<4;i++) {
adjustv = _mm_min_epu8(adjustv,vp->new_metrics->v[i]);
}
adjustv = _mm_min_epu8(adjustv,_mm_srli_si128(adjustv,8));
adjustv = _mm_min_epu8(adjustv,_mm_srli_si128(adjustv,4));
adjustv = _mm_min_epu8(adjustv,_mm_srli_si128(adjustv,2));
t.v = adjustv;
adjust = t.w[0];
adjustv = _mm_set1_epi8(adjust);
/* We cannot use a saturated subtract, because we often have to adjust by more than SHRT_MAX
* This is okay since it can't overflow anyway
*/
for(i=0;i<4;i++)
vp->new_metrics->v[i] = _mm_sub_epi8(vp->new_metrics->v[i],adjustv);
}
d++;
/* Swap pointers to old and new metrics */
tmp = vp->old_metrics;
vp->old_metrics = vp->new_metrics;
vp->new_metrics = tmp;
}
if (best_state) {
uint32_t i, bst=0;
uint8_t minmetric=UINT8_MAX;
for (i=0;i<64;i++) {
if (vp->old_metrics->c[i] <= minmetric) {
bst = i;
minmetric = vp->old_metrics->c[i];
}
}
*best_state = bst;
}
#ifdef DEBUG
printf("];\n===========================================\n");
#endif
vp->dp = d;
}
#endif

@ -61,15 +61,15 @@ ADD_TEST(turbocoder_test_all turbocoder_test)
ADD_EXECUTABLE(viterbi_test viterbi_test.c) ADD_EXECUTABLE(viterbi_test viterbi_test.c)
TARGET_LINK_LIBRARIES(viterbi_test srslte) TARGET_LINK_LIBRARIES(viterbi_test srslte)
ADD_TEST(viterbi_40_0 viterbi_test -n 1000 -s 1 -l 40 -k 7 -t -e 0.0) ADD_TEST(viterbi_40_0 viterbi_test -n 1000 -s 1 -l 40 -t -e 0.0)
ADD_TEST(viterbi_40_2 viterbi_test -n 1000 -s 1 -l 40 -k 7 -t -e 2.0) ADD_TEST(viterbi_40_2 viterbi_test -n 1000 -s 1 -l 40 -t -e 2.0)
ADD_TEST(viterbi_40_3 viterbi_test -n 1000 -s 1 -l 40 -k 7 -t -e 3.0) ADD_TEST(viterbi_40_3 viterbi_test -n 1000 -s 1 -l 40 -t -e 3.0)
ADD_TEST(viterbi_40_4 viterbi_test -n 1000 -s 1 -l 40 -k 7 -t -e 4.5) ADD_TEST(viterbi_40_4 viterbi_test -n 1000 -s 1 -l 40 -t -e 4.5)
ADD_TEST(viterbi_1000_0 viterbi_test -n 100 -s 1 -l 1000 -k 7 -t -e 0.0) ADD_TEST(viterbi_1000_0 viterbi_test -n 100 -s 1 -l 1000 -t -e 0.0)
ADD_TEST(viterbi_1000_2 viterbi_test -n 100 -s 1 -l 1000 -k 7 -t -e 2.0) ADD_TEST(viterbi_1000_2 viterbi_test -n 100 -s 1 -l 1000 -t -e 2.0)
ADD_TEST(viterbi_1000_3 viterbi_test -n 100 -s 1 -l 1000 -k 7 -t -e 3.0) ADD_TEST(viterbi_1000_3 viterbi_test -n 100 -s 1 -l 1000 -t -e 3.0)
ADD_TEST(viterbi_1000_4 viterbi_test -n 100 -s 1 -l 1000 -k 7 -t -e 4.5) ADD_TEST(viterbi_1000_4 viterbi_test -n 100 -s 1 -l 1000 -t -e 4.5)
BuildMex(MEXNAME viterbi SOURCES viterbi_test_mex.c LIBRARIES srslte srslte_mex) BuildMex(MEXNAME viterbi SOURCES viterbi_test_mex.c LIBRARIES srslte srslte_mex)

@ -37,33 +37,27 @@
#include "viterbi_test.h" #include "viterbi_test.h"
int frame_length = 1000, nof_frames = 128; int frame_length = 1000, nof_frames = 128;
float ebno_db = 100.0; float ebno_db = 100.0;
uint32_t seed = 0; uint32_t seed = 0;
bool tail_biting = false; bool tail_biting = false;
int K = -1;
#define SNR_POINTS 10 #define SNR_POINTS 10
#define SNR_MIN 0.0 #define SNR_MIN 0.0
#define SNR_MAX 5.0 #define SNR_MAX 5.0
#define NCODS 3
#define NTYPES 1+NCODS
void usage(char *prog) { void usage(char *prog) {
printf("Usage: %s [nlestk]\n", prog); printf("Usage: %s [nlest]\n", prog);
printf("\t-n nof_frames [Default %d]\n", nof_frames); printf("\t-n nof_frames [Default %d]\n", nof_frames);
printf("\t-l frame_length [Default %d]\n", frame_length); printf("\t-l frame_length [Default %d]\n", frame_length);
printf("\t-e ebno in dB [Default scan]\n"); printf("\t-e ebno in dB [Default scan]\n");
printf("\t-s seed [Default 0=time]\n"); printf("\t-s seed [Default 0=time]\n");
printf("\t-t tail_bitting [Default %s]\n", tail_biting ? "yes" : "no"); printf("\t-t tail_bitting [Default %s]\n", tail_biting ? "yes" : "no");
printf("\t-k constraint length [Default both]\n", K);
} }
void parse_args(int argc, char **argv) { void parse_args(int argc, char **argv) {
int opt; int opt;
while ((opt = getopt(argc, argv, "nlstek")) != -1) { while ((opt = getopt(argc, argv, "nlste")) != -1) {
switch (opt) { switch (opt) {
case 'n': case 'n':
nof_frames = atoi(argv[optind]); nof_frames = atoi(argv[optind]);
@ -80,9 +74,6 @@ void parse_args(int argc, char **argv) {
case 't': case 't':
tail_biting = true; tail_biting = true;
break; break;
case 'k':
K = atoi(argv[optind]);
break;
default: default:
usage(argv[0]); usage(argv[0]);
exit(-1); exit(-1);
@ -90,49 +81,22 @@ void parse_args(int argc, char **argv) {
} }
} }
void output_matlab(float ber[NTYPES][SNR_POINTS], int snr_points,
srslte_convcoder_t cod[NCODS], int ncods) {
int i, j, n;
FILE *f = fopen("srslte_viterbi_snr.m", "w");
if (!f) {
perror("fopen");
exit(-1);
}
fprintf(f, "ber=[");
for (j = 0; j < NTYPES; j++) {
for (i = 0; i < snr_points; i++) {
fprintf(f, "%g ", ber[j][i]);
}
fprintf(f, "; ");
}
fprintf(f, "];\n");
fprintf(f, "snr=linspace(%g,%g-%g/%d,%d);\n", SNR_MIN, SNR_MAX, SNR_MAX,
snr_points, snr_points);
fprintf(f, "semilogy(snr,ber,snr,0.5*erfc(sqrt(10.^(snr/10))));\n");
fprintf(f, "legend('uncoded',");
for (n=0;n<ncods;n++) {
fprintf(f,"'1/3 K=%d%s',",cod[n].K,cod[n].tail_biting?" tb":"");
}
fprintf(f,"'theory-uncoded');");
fprintf(f, "grid on;\n");
fclose(f);
}
int main(int argc, char **argv) { int main(int argc, char **argv) {
int frame_cnt; int frame_cnt;
float *llr; float *llr;
uint8_t *llr_c; uint8_t *llr_c;
uint8_t *data_tx, *data_rx[NTYPES], *symbols; uint8_t *data_tx, *data_rx, *data_rx2, *symbols;
int i, j; int i, j;
float var[SNR_POINTS], varunc[SNR_POINTS]; float var[SNR_POINTS], varunc[SNR_POINTS];
int snr_points; int snr_points;
float ber[NTYPES][SNR_POINTS]; uint32_t errors;
uint32_t errors[NTYPES]; #ifdef TEST_SSE
srslte_viterbi_type_t srslte_viterbi_type[NCODS]; uint32_t errors2;
srslte_viterbi_t dec[NCODS]; srslte_viterbi_t dec_sse;
srslte_convcoder_t cod[NCODS]; #endif
int coded_length[NCODS]; srslte_viterbi_t dec;
int n, ncods, max_coded_length; srslte_convcoder_t cod;
int coded_length;
parse_args(argc, argv); parse_args(argc, argv);
@ -142,57 +106,20 @@ int main(int argc, char **argv) {
} }
srand(seed); srand(seed);
switch (K) { cod.poly[0] = 0x6D;
case 9: cod.poly[1] = 0x4F;
cod[0].poly[0] = 0x1ed; cod.poly[2] = 0x57;
cod[0].poly[1] = 0x19b; cod.K = 7;
cod[0].poly[2] = 0x127; cod.tail_biting = tail_biting;
cod[0].tail_biting = false;
cod[0].K = 9;
srslte_viterbi_type[0] = SRSLTE_VITERBI_39;
ncods=1;
break;
case 7:
cod[0].poly[0] = 0x6D;
cod[0].poly[1] = 0x4F;
cod[0].poly[2] = 0x57;
cod[0].K = 7;
cod[0].tail_biting = tail_biting;
srslte_viterbi_type[0] = SRSLTE_VITERBI_37;
ncods=1;
break;
default:
cod[0].poly[0] = 0x1ed;
cod[0].poly[1] = 0x19b;
cod[0].poly[2] = 0x127;
cod[0].tail_biting = false;
cod[0].K = 9;
srslte_viterbi_type[0] = SRSLTE_VITERBI_39;
cod[1].poly[0] = 0x6D;
cod[1].poly[1] = 0x4F;
cod[1].poly[2] = 0x57;
cod[1].tail_biting = false;
cod[1].K = 7;
srslte_viterbi_type[1] = SRSLTE_VITERBI_37;
cod[2].poly[0] = 0x6D;
cod[2].poly[1] = 0x4F;
cod[2].poly[2] = 0x57;
cod[2].tail_biting = true;
cod[2].K = 7;
srslte_viterbi_type[2] = SRSLTE_VITERBI_37;
ncods=3;
}
max_coded_length = 0; cod.R = 3;
for (i=0;i<ncods;i++) { coded_length = cod.R * (frame_length + ((cod.tail_biting) ? 0 : cod.K - 1));
cod[i].R = 3; srslte_viterbi_init(&dec, SRSLTE_VITERBI_37, cod.poly, frame_length, cod.tail_biting);
coded_length[i] = cod[i].R * (frame_length + ((cod[i].tail_biting) ? 0 : cod[i].K - 1)); printf("Convolutional Code 1/3 K=%d Tail bitting: %s\n", cod.K, cod.tail_biting ? "yes" : "no");
if (coded_length[i] > max_coded_length) {
max_coded_length = coded_length[i]; #ifdef TEST_SSE
} srslte_viterbi_init_sse(&dec_sse, SRSLTE_VITERBI_37, cod.poly, frame_length, cod.tail_biting);
srslte_viterbi_init(&dec[i], srslte_viterbi_type[i], cod[i].poly, frame_length, cod[i].tail_biting); #endif
printf("Convolutional Code 1/3 K=%d Tail bitting: %s\n", cod[i].K, cod[i].tail_biting ? "yes" : "no");
}
printf(" Frame length: %d\n", frame_length); printf(" Frame length: %d\n", frame_length);
if (ebno_db < 100.0) { if (ebno_db < 100.0) {
@ -205,25 +132,29 @@ int main(int argc, char **argv) {
exit(-1); exit(-1);
} }
for (i = 0; i < NTYPES; i++) { data_rx = malloc(frame_length * sizeof(uint8_t));
data_rx[i] = malloc(frame_length * sizeof(uint8_t)); if (!data_rx) {
if (!data_rx[i]) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
} }
data_rx2 = malloc(frame_length * sizeof(uint8_t));
if (!data_rx2) {
perror("malloc");
exit(-1);
} }
symbols = malloc(max_coded_length * sizeof(uint8_t)); symbols = malloc(coded_length * sizeof(uint8_t));
if (!symbols) { if (!symbols) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
} }
llr = malloc(max_coded_length * sizeof(float)); llr = malloc(coded_length * sizeof(float));
if (!llr) { if (!llr) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
} }
llr_c = malloc(2 * max_coded_length * sizeof(uint8_t)); llr_c = malloc(2 * coded_length * sizeof(uint8_t));
if (!llr_c) { if (!llr_c) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
@ -250,9 +181,10 @@ int main(int argc, char **argv) {
for (i = 0; i < snr_points; i++) { for (i = 0; i < snr_points; i++) {
frame_cnt = 0; frame_cnt = 0;
for (j = 0; j < NTYPES; j++) { errors = 0;
errors[j] = 0; #ifdef TEST_SSE
} errors2 = 0;
#endif
while (frame_cnt < nof_frames) { while (frame_cnt < nof_frames) {
/* generate data_tx */ /* generate data_tx */
@ -265,76 +197,84 @@ int main(int argc, char **argv) {
llr[j] = data_tx[j] ? sqrt(2) : -sqrt(2); llr[j] = data_tx[j] ? sqrt(2) : -sqrt(2);
} }
srslte_ch_awgn_f(llr, llr, varunc[i], frame_length); srslte_ch_awgn_f(llr, llr, varunc[i], frame_length);
for (j = 0; j < frame_length; j++) {
data_rx[0][j] = llr[j] > 0 ? 1 : 0;
}
/* coded BER */ /* coded BER */
for (n=0;n<ncods;n++) { srslte_convcoder_encode(&cod, data_tx, symbols, frame_length);
srslte_convcoder_encode(&cod[n], data_tx, symbols, frame_length);
for (j = 0; j < coded_length[n]; j++) { for (j = 0; j < coded_length; j++) {
llr[j] = symbols[j] ? sqrt(2) : -sqrt(2); llr[j] = symbols[j] ? sqrt(2) : -sqrt(2);
} }
srslte_ch_awgn_f(llr, llr, var[i], coded_length[n]); srslte_ch_awgn_f(llr, llr, var[i], coded_length);
srslte_vec_quant_fuc(llr, llr_c, Gain, 127.5, 255, coded_length[n]); srslte_vec_quant_fuc(llr, llr_c, Gain, 127.5, 255, coded_length);
/* decoder 1 */
srslte_viterbi_decode_uc(&dec[n], llr_c, data_rx[1+n], frame_length); struct timeval t[3];
gettimeofday(&t[1], NULL);
int M = 1;
for (int i=0;i<M;i++) {
srslte_viterbi_decode_uc(&dec, llr_c, data_rx, frame_length);
} }
/* check errors */ #ifdef TEST_SSE
for (j = 0; j < 1+ncods; j++) { gettimeofday(&t[2], NULL);
errors[j] += srslte_bit_diff(data_tx, data_rx[j], frame_length); get_time_interval(t);
//printf("Execution time:\t\t%.1f us\n", (float) t[0].tv_usec/M);
gettimeofday(&t[1], NULL);
for (int i=0;i<M;i++) {
srslte_viterbi_decode_uc(&dec_sse, llr_c, data_rx2, frame_length);
} }
gettimeofday(&t[2], NULL);
get_time_interval(t);
//printf("Execution time SIMD:\t%.1f us\n", (float) t[0].tv_usec/M);
#endif
/* check errors */
errors += srslte_bit_diff(data_tx, data_rx, frame_length);
#ifdef TEST_SSE
errors2 += srslte_bit_diff(data_tx, data_rx2, frame_length);
#endif
frame_cnt++; frame_cnt++;
printf("Eb/No: %3.2f %10d/%d ", printf("Eb/No: %3.2f %10d/%d ", SNR_MIN + i * ebno_inc,frame_cnt,nof_frames);
SNR_MIN + i * ebno_inc,frame_cnt,nof_frames); printf("BER: %.2e ", (float) errors / (frame_cnt * frame_length));
for (n=0;n<1+ncods;n++) { #ifdef TEST_SSE
printf("BER: %.2e ",(float) errors[n] / (frame_cnt * frame_length)); printf("BER2: %.2e ", (float) errors2 / (frame_cnt * frame_length));
} #endif
printf("\r"); printf("\r");
} }
printf("\n"); printf("\n");
for (j = 0; j < 1+ncods; j++) {
ber[j][i] = (float) errors[j] / (frame_cnt * frame_length);
}
if (snr_points == 1) { if (snr_points == 1) {
printf("BER uncoded: %g\t%u errors\n", printf("BER : %g\t%u errors\n", (float) errors / (frame_cnt * frame_length), errors);
(float) errors[0] / (frame_cnt * frame_length), errors[0]); #ifdef TEST_SSE
for (n=0;n<ncods;n++) { printf("BER SSE: %g\t%u errors\n", (float) errors2 / (frame_cnt * frame_length), errors2);
printf("BER K=%d: %g\t%u errors\n",cod[n].K, #endif
(float) errors[1+n] / (frame_cnt * frame_length), errors[1+n]);
}
}
} }
for (n=0;n<ncods;n++) {
srslte_viterbi_free(&dec[n]);
} }
srslte_viterbi_free(&dec);
#ifdef TEST_SSE
srslte_viterbi_free(&dec_sse);
#endif
free(data_tx); free(data_tx);
free(symbols); free(symbols);
free(llr); free(llr);
free(llr_c); free(llr_c);
for (i = 0; i < NTYPES; i++) { free(data_rx);
free(data_rx[i]);
}
if (snr_points == 1) { if (snr_points == 1) {
int expected_errors = get_expected_errors(nof_frames, int expected_errors = get_expected_errors(nof_frames,
seed, frame_length, K, tail_biting, ebno_db); seed, frame_length, tail_biting, ebno_db);
if (expected_errors == -1) { if (expected_errors == -1) {
fprintf(stderr, "Test parameters not defined in test_results.h\n"); fprintf(stderr, "Test parameters not defined in test_results.h\n");
exit(-1); exit(-1);
} else { } else {
printf("errors =%d, expected =%d\n", errors[1], expected_errors); printf("errors =%d, expected =%d\n", errors, expected_errors);
exit(errors[1] > expected_errors); exit(errors > expected_errors);
} }
} else { } else {
printf("\n"); printf("\n");
output_matlab(ber, snr_points, cod, ncods);
printf("Done\n"); printf("Done\n");
exit(0); exit(0);
} }

@ -30,35 +30,52 @@ typedef struct {
int n; int n;
uint32_t s; uint32_t s;
int len; int len;
int k;
bool tail; bool tail;
float ebno; float ebno;
int errors; int errors;
}expected_errors_t; }expected_errors_t;
/* The SSE implementation uses 5-bit metrics and has 0.75 dB loss approximation */
#ifdef LV_HAVE_SSE
static expected_errors_t expected_errors[] = { static expected_errors_t expected_errors[] = {
{1000, 1, 40, 7, true, 0.0, 5363}, {1000, 1, 40, true, 0.0, 7282},
{1000, 1, 40, 7, true, 2.0, 356}, {1000, 1, 40, true, 2.0, 725},
{1000, 1, 40, 7, true, 3.0, 48}, {1000, 1, 40, true, 3.0, 176},
{1000, 1, 40, 7, true, 4.5, 0}, {1000, 1, 40, true, 4.5, 24},
{100, 1, 1000, 7, true, 0.0, 8753}, {100, 1, 1000, true, 0.0, 13208},
{100, 1, 1000, 7, true, 2.0, 350}, {100, 1, 1000, true, 2.0, 939},
{100, 1, 1000, 7, true, 3.0, 33}, {100, 1, 1000, true, 3.0, 110},
{100, 1, 1000, 7, true, 4.5, 0}, {100, 1, 1000, true, 4.5, 5},
{-1, -1, -1, -1, true, -1.0, -1} {-1, -1, -1, true, -1.0, -1}
}; };
int get_expected_errors(int n, uint32_t s, int len, int k, bool tail, float ebno) { #else
static expected_errors_t expected_errors[] = {
{1000, 1, 40, true, 0.0, 5363},
{1000, 1, 40, true, 2.0, 356},
{1000, 1, 40, true, 3.0, 48},
{1000, 1, 40, true, 4.5, 0},
{100, 1, 1000, true, 0.0, 8753},
{100, 1, 1000, true, 2.0, 350},
{100, 1, 1000, true, 3.0, 33},
{100, 1, 1000, true, 4.5, 0},
{-1, -1, -1, true, -1.0, -1}
};
#endif
int get_expected_errors(int n, uint32_t s, int len, bool tail, float ebno) {
int i; int i;
i=0; i=0;
while(expected_errors[i].n != -1) { while(expected_errors[i].n != -1) {
if (expected_errors[i].n == n if (expected_errors[i].n == n
&& expected_errors[i].s == s && expected_errors[i].s == s
&& expected_errors[i].len == len && expected_errors[i].len == len
&& expected_errors[i].k == k
&& expected_errors[i].tail == tail && expected_errors[i].tail == tail
&& expected_errors[i].ebno == ebno) { && expected_errors[i].ebno == ebno) {
break; break;

@ -105,17 +105,27 @@ void parse_args(int argc, char **argv) {
} }
} }
int main(int argc, char **argv) {
srslte_pdsch_t pdsch;
uint32_t i, j;
uint8_t *data = NULL; uint8_t *data = NULL;
cf_t *ce[SRSLTE_MAX_PORTS]; cf_t *ce[SRSLTE_MAX_PORTS];
srslte_softbuffer_rx_t softbuffer_rx;
srslte_ra_dl_grant_t grant;
srslte_pdsch_cfg_t pdsch_cfg;
cf_t *sf_symbols;
cf_t *slot_symbols[SRSLTE_MAX_PORTS]; cf_t *slot_symbols[SRSLTE_MAX_PORTS];
srslte_pdsch_t pdsch;
srslte_ofdm_t ofdm_tx, ofdm_rx;
int dummy_function() {
srslte_ofdm_rx_sf(&ofdm_rx, sf_symbols, slot_symbols[1]);
srslte_softbuffer_rx_reset_tbs(&softbuffer_rx, grant.mcs.tbs);
return srslte_pdsch_decode(&pdsch, &pdsch_cfg, &softbuffer_rx, slot_symbols[0], ce, 0, data);
}
int main(int argc, char **argv) {
uint32_t i, j;
int ret = -1; int ret = -1;
struct timeval t[3]; struct timeval t[3];
srslte_pdsch_cfg_t pdsch_cfg;
srslte_softbuffer_tx_t softbuffer_tx; srslte_softbuffer_tx_t softbuffer_tx;
srslte_softbuffer_rx_t softbuffer_rx;
uint32_t rv; uint32_t rv;
parse_args(argc,argv); parse_args(argc,argv);
@ -132,12 +142,16 @@ int main(int argc, char **argv) {
dci.mcs_idx = mcs; dci.mcs_idx = mcs;
dci.rv_idx = rv_idx; dci.rv_idx = rv_idx;
dci.type0_alloc.rbg_bitmask = 0xffffffff; dci.type0_alloc.rbg_bitmask = 0xffffffff;
srslte_ra_dl_grant_t grant;
if (srslte_ra_dl_dci_to_grant(&dci, cell.nof_prb, true, &grant)) { if (srslte_ra_dl_dci_to_grant(&dci, cell.nof_prb, true, &grant)) {
fprintf(stderr, "Error computing resource allocation\n"); fprintf(stderr, "Error computing resource allocation\n");
return ret; return ret;
} }
srslte_ofdm_tx_init(&ofdm_tx, cell.cp, cell.nof_prb);
srslte_ofdm_rx_init(&ofdm_rx, cell.cp, cell.nof_prb);
sf_symbols=srslte_vec_malloc(sizeof(cf_t)*SRSLTE_SF_LEN_PRB(cell.nof_prb));
/* Configure PDSCH */ /* Configure PDSCH */
if (srslte_pdsch_cfg(&pdsch_cfg, cell, &grant, cfi, subframe, 0)) { if (srslte_pdsch_cfg(&pdsch_cfg, cell, &grant, cfi, subframe, 0)) {
fprintf(stderr, "Error configuring PDSCH\n"); fprintf(stderr, "Error configuring PDSCH\n");
@ -145,7 +159,7 @@ int main(int argc, char **argv) {
} }
/* init memory */ /* init memory */
for (i=0;i<cell.nof_ports;i++) { for (i=0;i<SRSLTE_MAX_PORTS;i++) {
ce[i] = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp)); ce[i] = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp));
if (!ce[i]) { if (!ce[i]) {
perror("srslte_vec_malloc"); perror("srslte_vec_malloc");
@ -223,12 +237,19 @@ int main(int argc, char **argv) {
} }
} }
srslte_ofdm_tx_sf(&ofdm_tx, slot_symbols[0], sf_symbols);
int M=1;
int r=0;
srslte_sch_set_max_noi(&pdsch.dl_sch, 1);
gettimeofday(&t[1], NULL); gettimeofday(&t[1], NULL);
int r = srslte_pdsch_decode(&pdsch, &pdsch_cfg, &softbuffer_rx, slot_symbols[0], ce, 0, data); for (i=0;i<M;i++) {
r = dummy_function();
}
gettimeofday(&t[2], NULL); gettimeofday(&t[2], NULL);
get_time_interval(t); get_time_interval(t);
printf("DECODED %s in %d:%d (%.2f Mbps)\n", r?"Error":"OK", printf("DECODED %s in %.2f (PHY bitrate=%.2f Mbps. Processing bitrate=%.2f Mbps)\n", r?"Error":"OK",
(int) t[0].tv_sec, (int) t[0].tv_usec, (float) grant.mcs.tbs/t[0].tv_usec); (float) t[0].tv_usec/M, (float) grant.mcs.tbs/1000, (float) grant.mcs.tbs*M/t[0].tv_usec);
if (r) { if (r) {
ret = -1; ret = -1;
goto quit; goto quit;

Loading…
Cancel
Save