diff --git a/srslte/lib/fec/rm_turbo.c b/srslte/lib/fec/rm_turbo.c index 751c9fe0c..31cbd82ca 100644 --- a/srslte/lib/fec/rm_turbo.c +++ b/srslte/lib/fec/rm_turbo.c @@ -327,11 +327,8 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - // For -O0 builds: shuffle j-th element to pos 0 and extract from there - _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); - int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); - _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); + int16_t x = (int16_t) _mm_extract_epi16(xVal, j); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); output[l] += x; } xPtr ++; @@ -349,10 +346,8 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); - int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); - _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); + int16_t x = (int16_t) _mm_extract_epi16(xVal, j); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); output[l] += x; } xPtr++; @@ -718,4 +713,3 @@ int srslte_rm_turbo_rx(float *w_buff, uint32_t w_buff_len, float *input, uint32_ return 0; } -