diff --git a/lib/src/phy/utils/bit.c b/lib/src/phy/utils/bit.c index 6ef061252..674c1da81 100644 --- a/lib/src/phy/utils/bit.c +++ b/lib/src/phy/utils/bit.c @@ -225,6 +225,102 @@ void srslte_bit_interleave_i_w_offset(uint8_t *input, uint8_t *output, uint32_t } w_offset_p=8-w_offset; } + +#ifdef LV_HAVE_SSE + __m64 m64mask = _mm_setr_pi8((uint8_t) 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1); + + union { + uint8_t v[8]; + __m64 m64; + } a, b, c; + + union { + __m128i m128; + uint16_t u32[4]; + uint16_t u16[8]; + uint8_t u8[16]; + struct { + __m64 reg_a; + __m64 reg_b; + } m64; + struct { + uint16_t i0, i1, i2, i3, i4, i5, i6, i7; + } v16; + struct { + uint32_t i0, i1, i2, i3; + } v32; + } ipx1, ipx2, epx1, epx2; + for (uint32_t i = st; i < nof_bits / 8; i++) { + ipx1.m128 = _mm_loadu_si128((__m128i *) (interleaver + (i * 8 + 0) - w_offset_p)); + epx1.m128 = _mm_shuffle_epi8(ipx1.m128, _mm_set_epi8(0x00, 0x04, 0x08, 0x0C, + 0x00, 0x04, 0x08, 0x0C, + 0x00, 0x04, 0x08, 0x0C, + 0x00, 0x04, 0x08, 0x0C)); + ipx2.m128 = _mm_loadu_si128((__m128i *) (interleaver + (i * 8 + 4) - w_offset_p)); + epx2.m128 = _mm_shuffle_epi8(ipx2.m128, _mm_set_epi8(0x00, 0x04, 0x08, 0x0C, + 0x00, 0x04, 0x08, 0x0C, + 0x00, 0x04, 0x08, 0x0C, + 0x00, 0x04, 0x08, 0x0C)); + + epx1.m128 = _mm_blendv_epi8(epx2.m128, epx1.m128, _mm_setr_epi8(+1, +1, +1, +1, + -1, -1, -1, -1, + +1, +1, +1, +1, + -1, -1, -1, -1)); + + b.m64 = _mm_and_si64(epx1.m64.reg_a, _mm_set1_pi8(0x7)); + b.m64 = _mm_shuffle_pi8(m64mask, b.m64); + + ipx1.m128 = _mm_srli_epi32(ipx1.m128, 3); + ipx2.m128 = _mm_srli_epi32(ipx2.m128, 3); + + a.m64 = _mm_set_pi8(input[ipx1.v32.i0], + input[ipx1.v32.i1], + input[ipx1.v32.i2], + input[ipx1.v32.i3], + input[ipx2.v32.i0], + input[ipx2.v32.i1], + input[ipx2.v32.i2], + input[ipx2.v32.i3]); + + c.m64 = _mm_cmpeq_pi8(_mm_and_si64(a.m64, b.m64), b.m64); + output[i] = (uint8_t) _mm_movemask_pi8(c.m64); + } + +#if 0 /* Disabled */ + /* THIS PIECE OF CODE IS FOR CHECKING SIMD BEHAVIOUR. DO NOT ENABLE. */ + uint8_t *output2 = malloc(nof_bits/8); + for (uint32_t i=st;i