Improved BER performance in Viterbi

master
ismagom 9 years ago
parent c3268a93e2
commit 9b8709f744

@ -1,10 +1,10 @@
clear
blen=40;
SNR_values_db=linspace(-6,4,8);
Nrealizations=5000;
SNR_values_db=linspace(-3,4,8);
Nrealizations=6000;
addpath('../../debug/lte/phy/lib/fec/test')
addpath('../../build/srslte/lib/fec/test')
errors1=zeros(1,length(SNR_values_db));
errors2=zeros(1,length(SNR_values_db));
@ -28,7 +28,7 @@ for snr_idx=1:length(SNR_values_db)
decodedData = lteConvolutionalDecode(noisysymbols);
interleavedSymbols = reshape(reshape(noisysymbols,[],3)',1,[]);
[decodedData2, quant] = srslte_viterbi(interleavedSymbols);
[decodedData2, quant] = srslte_viterbi(interleavedSymbols, 32);
errors1(snr_idx) = errors1(snr_idx) + any(decodedData ~= Data);
errors2(snr_idx) = errors2(snr_idx) + any(decodedData2 ~= Data);

@ -80,7 +80,6 @@ int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
#ifdef LV_HAVE_SSE
int decode37_sse(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
srslte_viterbi_t *q = o;
uint32_t i;
uint32_t best_state;
@ -91,25 +90,21 @@ int decode37_sse(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length
}
/* Initialize Viterbi decoder */
init_viterbi37_sse(q->ptr, q->tail_biting ? -1 : 0);
init_viterbi37_sse(q->ptr, -1);
/* Decode block */
uint8_t *tmp = q->tmp;
if (q->tail_biting) {
memcpy(tmp, symbols, 3 * frame_length * sizeof(uint8_t));
for (i = 0; i < 3 * (q->K - 1); i++) {
q->tmp[i + 3 * frame_length] = q->tmp[i];
}
memcpy(q->tmp, symbols, 3*frame_length*sizeof(uint8_t));
memcpy(&q->tmp[3*frame_length], symbols, 3*frame_length*sizeof(uint8_t));
memcpy(&q->tmp[6*frame_length], symbols, 3*frame_length*sizeof(uint8_t));
update_viterbi37_blk_sse(q->ptr, q->tmp, 3*frame_length, &best_state);
chainback_viterbi37_sse(q->ptr, q->tmp, 3*frame_length, best_state);
memcpy(data, &q->tmp[frame_length], frame_length*sizeof(uint8_t));
} else {
tmp = symbols;
update_viterbi37_blk_sse(q->ptr, symbols, frame_length+q->K-1, &best_state);
chainback_viterbi37_sse(q->ptr, data, frame_length, best_state);
}
update_viterbi37_blk_sse(q->ptr, tmp, frame_length + q->K - 1,
q->tail_biting ? &best_state : NULL);
/* Do Viterbi chainback */
chainback_viterbi37_sse(q->ptr, data, frame_length,
q->tail_biting ? best_state : 0);
return q->framebits;
}
@ -154,6 +149,7 @@ int init37(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool tail_
}
if (q->tail_biting) {
q->tmp = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t));
bzero(q->tmp, 3 * (q->framebits + q->K - 1) * sizeof(uint8_t));
if (!q->tmp) {
perror("malloc");
free37(q);
@ -188,7 +184,7 @@ int init37_sse(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool t
return -1;
}
if (q->tail_biting) {
q->tmp = srslte_vec_malloc(3 * (q->framebits + q->K - 1) * sizeof(uint8_t));
q->tmp = srslte_vec_malloc(10 * (q->framebits + q->K - 1) * sizeof(uint8_t));
if (!q->tmp) {
perror("malloc");
free37(q);
@ -198,7 +194,7 @@ int init37_sse(srslte_viterbi_t *q, uint32_t poly[3], uint32_t framebits, bool t
q->tmp = NULL;
}
if ((q->ptr = create_viterbi37_sse(poly, framebits)) == NULL) {
if ((q->ptr = create_viterbi37_sse(poly, 3*framebits)) == NULL) {
fprintf(stderr, "create_viterbi37 failed\n");
free37(q);
return -1;

@ -117,15 +117,13 @@ int chainback_viterbi37_sse(
* combine in the cache anyway
*/
d += 6; /* Look past tail */
while(nbits-- != 0){
while(nbits--) {
int k;
k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1;
endstate = (endstate >> 1) | (k << 7);
data[nbits] = k;
#ifdef DEBUG
// printf("endstate=%3d, k=%d, w[0]=%d, w[1]=%d\n", endstate, k, d[nbits].s[1]&1, d[nbits].s[2]&1);
#endif
//printf("nbits=%d, endstate=%3d, k=%d, w[0]=%d, w[1]=%d, c=%d\n", nbits, endstate, k, d[nbits].s[1]&1, d[nbits].s[2]&1, d[nbits].c[(endstate>>2)/8]&1);
}
return 0;
}
@ -163,11 +161,18 @@ void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *b
#endif
d = (decision_t *) vp->dp;
for (int s=0;s<nbits;s++) {
memset(d+s,0,sizeof(decision_t));
}
while(nbits--) {
__m128i sym0v,sym1v,sym2v;
void *tmp;
int i;
// printf("nbits=%d, syms=%d,%d,%d\n", nbits, syms[0], syms[1], syms[2]);fflush(stdout);
/* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
sym0v = _mm_set1_epi8(syms[0]);
sym1v = _mm_set1_epi8(syms[1]);
@ -185,7 +190,6 @@ void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *b
print_128i("metric_initial", metric);
#endif
/* There's no packed bytes right shift in SSE2, so we use the word version and mask
* (I'm *really* starting to like Altivec...)
*/
metric = _mm_srli_epi16(metric,3);
metric = _mm_and_si128(metric,_mm_set1_epi8(31));
@ -218,22 +222,6 @@ void update_viterbi37_blk_sse(void *p,unsigned char *syms,int nbits, uint32_t *b
}
#ifdef DEBUG
uint8_t wmin=UINT8_MAX;
int minstate = 0;
printf("[%d]: ", nbits);
for (int j=0;j<64;j++) {
printf("%d, ", vp->new_metrics->c[j]);
if (vp->new_metrics->c[j] <= wmin) {
wmin = vp->new_metrics->c[j];
minstate = j;
}
}
printf("\n");
printf("%3d, ",minstate);
#endif
// See if we need to normalize
if (vp->new_metrics->c[0] > 100) {
int i;

@ -212,6 +212,9 @@ int main(int argc, char **argv) {
struct timeval t[3];
gettimeofday(&t[1], NULL);
int M = 1;
//srslte_vec_fprint_b(stdout, data_tx, frame_length);
for (int i=0;i<M;i++) {
srslte_viterbi_decode_uc(&dec, llr_c, data_rx, frame_length);
}

@ -276,9 +276,6 @@ static int dci_decode(srslte_pdcch_t *q, float *e, uint8_t *data, uint32_t E, ui
/* unrate matching */
srslte_rm_conv_rx(e, E, q->rm_f, 3 * (nof_bits + 16));
/* Normalize LLR */
srslte_vec_sc_prod_fff(q->rm_f, (float) 3 * nof_bits/E, q->rm_f, 3*(nof_bits+16));
/* viterbi decoder */
srslte_viterbi_decode_f(&q->decoder, q->rm_f, data, nof_bits + 16);

@ -407,7 +407,6 @@ int srslte_ue_dl_decode_rnti_rv(srslte_ue_dl_t *q, cf_t *input, uint8_t *data, u
}
int found_dci = srslte_ue_dl_find_dl_dci(q, &dci_msg, q->cfi, sf_idx, rnti);
if (found_dci == 1) {
if (srslte_dci_msg_to_dl_grant(&dci_msg, rnti, q->cell.nof_prb, &dci_unpacked, &grant)) {

Loading…
Cancel
Save