Improved BER performance in Viterbi

master
ismagom 9 years ago
parent c3268a93e2
commit 9b8709f744

@ -1,10 +1,10 @@
clear clear
blen=40; blen=40;
SNR_values_db=linspace(-6,4,8); SNR_values_db=linspace(-3,4,8);
Nrealizations=5000; Nrealizations=6000;
addpath('../../debug/lte/phy/lib/fec/test') addpath('../../build/srslte/lib/fec/test')
errors1=zeros(1,length(SNR_values_db)); errors1=zeros(1,length(SNR_values_db));
errors2=zeros(1,length(SNR_values_db)); errors2=zeros(1,length(SNR_values_db));
@ -28,7 +28,7 @@ for snr_idx=1:length(SNR_values_db)
decodedData = lteConvolutionalDecode(noisysymbols); decodedData = lteConvolutionalDecode(noisysymbols);
interleavedSymbols = reshape(reshape(noisysymbols,[],3)',1,[]); interleavedSymbols = reshape(reshape(noisysymbols,[],3)',1,[]);
[decodedData2, quant] = srslte_viterbi(interleavedSymbols); [decodedData2, quant] = srslte_viterbi(interleavedSymbols, 32);
errors1(snr_idx) = errors1(snr_idx) + any(decodedData ~= Data); errors1(snr_idx) = errors1(snr_idx) + any(decodedData ~= Data);
errors2(snr_idx) = errors2(snr_idx) + any(decodedData2 ~= Data); errors2(snr_idx) = errors2(snr_idx) + any(decodedData2 ~= Data);

@ -80,7 +80,6 @@ int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
int decode37_sse(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { int decode37_sse(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
srslte_viterbi_t *q = o; srslte_viterbi_t *q = o;
uint32_t i;
uint32_t best_state; uint32_t best_state;
@ -91,26 +90,22 @@ int decode37_sse(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length
} }
/* Initialize Viterbi decoder */ /* Initialize Viterbi decoder */
init_viterbi37_sse(q->ptr, q->tail_biting ? -1 : 0); init_viterbi37_sse(q->ptr, -1);
/* Decode block */ /* Decode block */
uint8_t *tmp = q->tmp;
if (q->tail_biting) { if (q->tail_biting) {
memcpy(tmp, symbols, 3 * frame_length * sizeof(uint8_t)); memcpy(q->tmp, symbols, 3*frame_length*sizeof(uint8_t));
for (i = 0; i < 3 * (q->K - 1); i++) { memcpy(&q->tmp[3*frame_length], symbols, 3*frame_length*sizeof(uint8_t));
q->tmp[i + 3 * frame_length] = q->tmp[i]; memcpy(&q->tmp[6*frame_length], symbols, 3*frame_length*sizeof(uint8_t));
} update_viterbi37_blk_sse(q->ptr, q->tmp, 3*frame_length, &best_state);
chainback_viterbi37_sse(q->ptr, q->tmp, 3*frame_length, best_state);
memcpy(data, &q->tmp[frame_length], frame_length*sizeof(uint8_t));
} else { } else {
tmp = symbols; update_viterbi37_blk_sse(q->ptr, symbols, frame_length+q->K-1, &best_state);
chainback_viterbi37_sse(q->ptr, data, frame_length, best_state);
} }
update_viterbi37_blk_sse(q->ptr, tmp, frame_length + q->K - 1,
q->tail_biting ? &best_state : NULL);
/* Do Viterbi chainback */
chainback_viterbi37_sse(q->ptr, data, frame_length,
q->tail_biting ? best_state : 0);
return q->framebits; return q->framebits;
} }
@ -154,6 +149,7 @@ int init37(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool tail_
} }
if (q->tail_biting) { if (q->tail_biting) {
q->tmp = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t)); q->tmp = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t));
bzero(q->tmp, 3 * (q->framebits + q->K - 1) * sizeof(uint8_t));
if (!q->tmp) { if (!q->tmp) {
perror("malloc"); perror("malloc");
free37(q); free37(q);
@ -188,7 +184,7 @@ int init37_sse(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool t
return -1; return -1;
} }
if (q->tail_biting) { if (q->tail_biting) {
q->tmp = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t)); q->tmp = srslte_vec_malloc(10 * (q->framebits + q->K - 1) * sizeof(uint8_t));
if (!q->tmp) { if (!q->tmp) {
perror("malloc"); perror("malloc");
free37(q); free37(q);
@ -198,7 +194,7 @@ int init37_sse(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool t
q->tmp = NULL; q->tmp = NULL;
} }
if ((q->ptr = create_viterbi37_sse(poly, framebits)) == NULL) { if ((q->ptr = create_viterbi37_sse(poly, 3*framebits)) == NULL) {
fprintf(stderr, "create_viterbi37 failed\n"); fprintf(stderr, "create_viterbi37 failed\n");
free37(q); free37(q);
return -1; return -1;

@ -111,21 +111,19 @@ int chainback_viterbi37_sse(
*/ */
endstate %= 64; endstate %= 64;
endstate <<= 2; endstate <<= 2;
/* The store into data[] only needs to be done every 8 bits. /* The store into data[] only needs to be done every 8 bits.
* But this avoids a conditional branch, and the writes will * But this avoids a conditional branch, and the writes will
* combine in the cache anyway * combine in the cache anyway
*/ */
d += 6; /* Look past tail */ d += 6; /* Look past tail */
while(nbits-- != 0){ while(nbits--) {
int k; int k;
k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1; k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1;
endstate = (endstate >> 1) | (k << 7); endstate = (endstate >> 1) | (k << 7);
data[nbits] = k; data[nbits] = k;
#ifdef DEBUG //printf("nbits=%d, endstate=%3d, k=%d, w[0]=%d, w[1]=%d, c=%d\n", nbits, endstate, k, d[nbits].s[1]&1, d[nbits].s[2]&1, d[nbits].c[(endstate>>2)/8]&1);
// printf("endstate=%3d, k=%d, w[0]=%d, w[1]=%d\n", endstate, k, d[nbits].s[1]&1, d[nbits].s[2]&1);
#endif
} }
return 0; return 0;
} }
@ -163,17 +161,24 @@ void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *b
#endif #endif
d = (decision_t *) vp->dp; d = (decision_t *) vp->dp;
for (int s=0;s<nbits;s++) {
memset(d+s,0,sizeof(decision_t));
}
while(nbits--) { while(nbits--) {
__m128i sym0v,sym1v,sym2v; __m128i sym0v,sym1v,sym2v;
void *tmp; void *tmp;
int i; int i;
// printf("nbits=%d, syms=%d,%d,%d\n", nbits, syms[0], syms[1], syms[2]);fflush(stdout);
/* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
sym0v = _mm_set1_epi8(syms[0]); sym0v = _mm_set1_epi8(syms[0]);
sym1v = _mm_set1_epi8(syms[1]); sym1v = _mm_set1_epi8(syms[1]);
sym2v = _mm_set1_epi8(syms[2]); sym2v = _mm_set1_epi8(syms[2]);
syms += 3; syms += 3;
for(i=0;i<2;i++){ for(i=0;i<2;i++){
__m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; __m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
@ -185,7 +190,6 @@ void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *b
print_128i("metric_initial", metric); print_128i("metric_initial", metric);
#endif #endif
/* There's no packed bytes right shift in SSE2, so we use the word version and mask /* There's no packed bytes right shift in SSE2, so we use the word version and mask
* (I'm *really* starting to like Altivec...)
*/ */
metric = _mm_srli_epi16(metric,3); metric = _mm_srli_epi16(metric,3);
metric = _mm_and_si128(metric,_mm_set1_epi8(31)); metric = _mm_and_si128(metric,_mm_set1_epi8(31));
@ -209,7 +213,7 @@ void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *b
survivor1 = _mm_or_si128(_mm_and_si128(decision1,m3),_mm_andnot_si128(decision1,m2)); survivor1 = _mm_or_si128(_mm_and_si128(decision1,m3),_mm_andnot_si128(decision1,m2));
/* Pack each set of decisions into 16 bits */ /* Pack each set of decisions into 16 bits */
d->s[2*i] = _mm_movemask_epi8(_mm_unpacklo_epi8(decision0,decision1)); d->s[2*i] = _mm_movemask_epi8(_mm_unpacklo_epi8(decision0,decision1));
d->s[2*i+1] = _mm_movemask_epi8(_mm_unpackhi_epi8(decision0,decision1)); d->s[2*i+1] = _mm_movemask_epi8(_mm_unpackhi_epi8(decision0,decision1));
/* Store surviving metrics */ /* Store surviving metrics */
@ -218,22 +222,6 @@ void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *b
} }
#ifdef DEBUG
uint8_t wmin=UINT8_MAX;
int minstate = 0;
printf("[%d]: ", nbits);
for (int j=0;j<64;j++) {
printf("%d, ", vp->new_metrics->c[j]);
if (vp->new_metrics->c[j] <= wmin) {
wmin = vp->new_metrics->c[j];
minstate = j;
}
}
printf("\n");
printf("%3d, ",minstate);
#endif
// See if we need to normalize // See if we need to normalize
if (vp->new_metrics->c[0] > 100) { if (vp->new_metrics->c[0] > 100) {
int i; int i;

@ -212,10 +212,13 @@ int main(int argc, char **argv) {
struct timeval t[3]; struct timeval t[3];
gettimeofday(&t[1], NULL); gettimeofday(&t[1], NULL);
int M = 1; int M = 1;
//srslte_vec_fprint_b(stdout, data_tx, frame_length);
for (int i=0;i<M;i++) { for (int i=0;i<M;i++) {
srslte_viterbi_decode_uc(&dec, llr_c, data_rx, frame_length); srslte_viterbi_decode_uc(&dec, llr_c, data_rx, frame_length);
} }
#ifdef TEST_SSE #ifdef TEST_SSE
gettimeofday(&t[2], NULL); gettimeofday(&t[2], NULL);
get_time_interval(t); get_time_interval(t);

@ -276,9 +276,6 @@ static int dci_decode(srslte_pdcch_t *q, float *e, uint8_t *data, uint32_t E, ui
/* unrate matching */ /* unrate matching */
srslte_rm_conv_rx(e, E, q->rm_f, 3 * (nof_bits + 16)); srslte_rm_conv_rx(e, E, q->rm_f, 3 * (nof_bits + 16));
/* Normalize LLR */
srslte_vec_sc_prod_fff(q->rm_f, (float) 3 * nof_bits/E, q->rm_f, 3*(nof_bits+16));
/* viterbi decoder */ /* viterbi decoder */
srslte_viterbi_decode_f(&q->decoder, q->rm_f, data, nof_bits + 16); srslte_viterbi_decode_f(&q->decoder, q->rm_f, data, nof_bits + 16);

@ -406,8 +406,7 @@ int srslte_ue_dl_decode_rnti_rv(srslte_ue_dl_t *q, cf_t *input, uint8_t *data, u
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
int found_dci = srslte_ue_dl_find_dl_dci(q, &dci_msg, q->cfi, sf_idx, rnti); int found_dci = srslte_ue_dl_find_dl_dci(q, &dci_msg, q->cfi, sf_idx, rnti);
if (found_dci == 1) { if (found_dci == 1) {
if (srslte_dci_msg_to_dl_grant(&dci_msg, rnti, q->cell.nof_prb, &dci_unpacked, &grant)) { if (srslte_dci_msg_to_dl_grant(&dci_msg, rnti, q->cell.nof_prb, &dci_unpacked, &grant)) {

Loading…
Cancel
Save