diff --git a/matlab/tests/viterbi_bler.m b/matlab/tests/viterbi_bler.m index 2c27d9ee5..c42877bc6 100644 --- a/matlab/tests/viterbi_bler.m +++ b/matlab/tests/viterbi_bler.m @@ -1,10 +1,10 @@ clear blen=40; -SNR_values_db=linspace(-6,4,8); -Nrealizations=5000; +SNR_values_db=linspace(-3,4,8); +Nrealizations=6000; -addpath('../../debug/lte/phy/lib/fec/test') +addpath('../../build/srslte/lib/fec/test') errors1=zeros(1,length(SNR_values_db)); errors2=zeros(1,length(SNR_values_db)); @@ -28,7 +28,7 @@ for snr_idx=1:length(SNR_values_db) decodedData = lteConvolutionalDecode(noisysymbols); interleavedSymbols = reshape(reshape(noisysymbols,[],3)',1,[]); - [decodedData2, quant] = srslte_viterbi(interleavedSymbols); + [decodedData2, quant] = srslte_viterbi(interleavedSymbols, 32); errors1(snr_idx) = errors1(snr_idx) + any(decodedData ~= Data); errors2(snr_idx) = errors2(snr_idx) + any(decodedData2 ~= Data); diff --git a/srslte/lib/fec/src/viterbi.c b/srslte/lib/fec/src/viterbi.c index fef653df2..b42400547 100644 --- a/srslte/lib/fec/src/viterbi.c +++ b/srslte/lib/fec/src/viterbi.c @@ -80,7 +80,6 @@ int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { #ifdef LV_HAVE_SSE int decode37_sse(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { srslte_viterbi_t *q = o; - uint32_t i; uint32_t best_state; @@ -91,26 +90,22 @@ int decode37_sse(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length } /* Initialize Viterbi decoder */ - init_viterbi37_sse(q->ptr, q->tail_biting ? -1 : 0); + init_viterbi37_sse(q->ptr, -1); /* Decode block */ - uint8_t *tmp = q->tmp; if (q->tail_biting) { - memcpy(tmp, symbols, 3 * frame_length * sizeof(uint8_t)); - for (i = 0; i < 3 * (q->K - 1); i++) { - q->tmp[i + 3 * frame_length] = q->tmp[i]; - } + memcpy(q->tmp, symbols, 3*frame_length*sizeof(uint8_t)); + memcpy(&q->tmp[3*frame_length], symbols, 3*frame_length*sizeof(uint8_t)); + memcpy(&q->tmp[6*frame_length], symbols, 3*frame_length*sizeof(uint8_t)); + update_viterbi37_blk_sse(q->ptr, q->tmp, 3*frame_length, &best_state); + chainback_viterbi37_sse(q->ptr, q->tmp, 3*frame_length, best_state); + memcpy(data, &q->tmp[frame_length], frame_length*sizeof(uint8_t)); } else { - tmp = symbols; + update_viterbi37_blk_sse(q->ptr, symbols, frame_length+q->K-1, &best_state); + chainback_viterbi37_sse(q->ptr, data, frame_length, best_state); } - update_viterbi37_blk_sse(q->ptr, tmp, frame_length + q->K - 1, - q->tail_biting ? &best_state : NULL); - - /* Do Viterbi chainback */ - chainback_viterbi37_sse(q->ptr, data, frame_length, - q->tail_biting ? best_state : 0); - + return q->framebits; } @@ -154,6 +149,7 @@ int init37(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool tail_ } if (q->tail_biting) { q->tmp = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t)); + bzero(q->tmp, 3 * (q->framebits + q->K - 1) * sizeof(uint8_t)); if (!q->tmp) { perror("malloc"); free37(q); @@ -188,7 +184,7 @@ int init37_sse(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool t return -1; } if (q->tail_biting) { - q->tmp = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t)); + q->tmp = srslte_vec_malloc(10 * (q->framebits + q->K - 1) * sizeof(uint8_t)); if (!q->tmp) { perror("malloc"); free37(q); @@ -198,7 +194,7 @@ int init37_sse(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool t q->tmp = NULL; } - if ((q->ptr = create_viterbi37_sse(poly, framebits)) == NULL) { + if ((q->ptr = create_viterbi37_sse(poly, 3*framebits)) == NULL) { fprintf(stderr, "create_viterbi37 failed\n"); free37(q); return -1; diff --git a/srslte/lib/fec/src/viterbi37_sse.c b/srslte/lib/fec/src/viterbi37_sse.c index 39c4bf5e2..ffafd693d 100644 --- a/srslte/lib/fec/src/viterbi37_sse.c +++ b/srslte/lib/fec/src/viterbi37_sse.c @@ -111,21 +111,19 @@ int chainback_viterbi37_sse( */ endstate %= 64; endstate <<= 2; - + /* The store into data[] only needs to be done every 8 bits. * But this avoids a conditional branch, and the writes will * combine in the cache anyway */ d += 6; /* Look past tail */ - while(nbits-- != 0){ + while(nbits--) { int k; k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1; endstate = (endstate >> 1) | (k << 7); data[nbits] = k; -#ifdef DEBUG -// printf("endstate=%3d, k=%d, w[0]=%d, w[1]=%d\n", endstate, k, d[nbits].s[1]&1, d[nbits].s[2]&1); -#endif + //printf("nbits=%d, endstate=%3d, k=%d, w[0]=%d, w[1]=%d, c=%d\n", nbits, endstate, k, d[nbits].s[1]&1, d[nbits].s[2]&1, d[nbits].c[(endstate>>2)/8]&1); } return 0; } @@ -163,17 +161,24 @@ void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *b #endif d = (decision_t *) vp->dp; + + for (int s=0;ss[2*i] = _mm_movemask_epi8(_mm_unpacklo_epi8(decision0,decision1)); + d->s[2*i] = _mm_movemask_epi8(_mm_unpacklo_epi8(decision0,decision1)); d->s[2*i+1] = _mm_movemask_epi8(_mm_unpackhi_epi8(decision0,decision1)); /* Store surviving metrics */ @@ -218,22 +222,6 @@ void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *b } - -#ifdef DEBUG - uint8_t wmin=UINT8_MAX; - int minstate = 0; - printf("[%d]: ", nbits); - for (int j=0;j<64;j++) { - printf("%d, ", vp->new_metrics->c[j]); - if (vp->new_metrics->c[j] <= wmin) { - wmin = vp->new_metrics->c[j]; - minstate = j; - } - } - printf("\n"); - printf("%3d, ",minstate); -#endif - // See if we need to normalize if (vp->new_metrics->c[0] > 100) { int i; diff --git a/srslte/lib/fec/test/viterbi_test.c b/srslte/lib/fec/test/viterbi_test.c index 463de43bc..f619b50b9 100644 --- a/srslte/lib/fec/test/viterbi_test.c +++ b/srslte/lib/fec/test/viterbi_test.c @@ -212,10 +212,13 @@ int main(int argc, char **argv) { struct timeval t[3]; gettimeofday(&t[1], NULL); int M = 1; + + //srslte_vec_fprint_b(stdout, data_tx, frame_length); + for (int i=0;irm_f, 3 * (nof_bits + 16)); - /* Normalize LLR */ - srslte_vec_sc_prod_fff(q->rm_f, (float) 3 * nof_bits/E, q->rm_f, 3*(nof_bits+16)); - /* viterbi decoder */ srslte_viterbi_decode_f(&q->decoder, q->rm_f, data, nof_bits + 16); diff --git a/srslte/lib/ue/src/ue_dl.c b/srslte/lib/ue/src/ue_dl.c index 8ed19799e..265344d61 100644 --- a/srslte/lib/ue/src/ue_dl.c +++ b/srslte/lib/ue/src/ue_dl.c @@ -406,8 +406,7 @@ int srslte_ue_dl_decode_rnti_rv(srslte_ue_dl_t *q, cf_t *input, uint8_t *data, u return SRSLTE_ERROR; } - int found_dci = srslte_ue_dl_find_dl_dci(q, &dci_msg, q->cfi, sf_idx, rnti); - + int found_dci = srslte_ue_dl_find_dl_dci(q, &dci_msg, q->cfi, sf_idx, rnti); if (found_dci == 1) { if (srslte_dci_msg_to_dl_grant(&dci_msg, rnti, q->cell.nof_prb, &dci_unpacked, &grant)) {