diff --git a/lib/include/srslte/phy/phch/ra.h b/lib/include/srslte/phy/phch/ra.h index 680d80dd6..aef8f4751 100644 --- a/lib/include/srslte/phy/phch/ra.h +++ b/lib/include/srslte/phy/phch/ra.h @@ -103,9 +103,7 @@ typedef struct SRSLTE_API { bool prb_idx[2][SRSLTE_MAX_PRB]; uint32_t nof_prb; uint32_t Qm[SRSLTE_MAX_CODEWORDS]; - uint32_t Qm2[SRSLTE_MAX_CODEWORDS]; srslte_ra_mcs_t mcs[SRSLTE_MAX_CODEWORDS]; - srslte_ra_mcs_t mcs2[SRSLTE_MAX_CODEWORDS]; uint32_t nof_tb; srslte_sf_t sf_type; bool tb_en[SRSLTE_MAX_CODEWORDS]; diff --git a/lib/src/phy/phch/ra.c b/lib/src/phy/phch/ra.c index 8cae9fe65..913bd9548 100644 --- a/lib/src/phy/phch/ra.c +++ b/lib/src/phy/phch/ra.c @@ -549,7 +549,7 @@ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *gr } grant->pinfo = dci->pinfo; - if (tbs < 0) { + if (grant->mcs[0].tbs < 0 || grant->mcs[1].tbs < 0) { return SRSLTE_ERROR; } else { return SRSLTE_SUCCESS; @@ -584,10 +584,12 @@ int srslte_ra_dl_dci_to_grant(srslte_ra_dl_dci_t *dci, if (msg_rnti >= SRSLTE_CRNTI_START && msg_rnti <= SRSLTE_CRNTI_END) { crc_is_crnti = true; } - // Compute PRB allocation - if (!srslte_ra_dl_dci_to_grant_prb_allocation(dci, grant, nof_prb)) { - // Compute MCS - if (!dl_dci_to_grant_mcs(dci, grant, crc_is_crnti)) { + // Compute PRB allocation + int ret =srslte_ra_dl_dci_to_grant_prb_allocation(dci, grant, nof_prb); + if (!ret) { + // Compute MCS + ret = dl_dci_to_grant_mcs(dci, grant, crc_is_crnti); + if (ret == SRSLTE_SUCCESS) { // Apply Section 7.1.7.3. If RA-RNTI and Format1C rv_idx=0 if (msg_rnti >= SRSLTE_RARNTI_START && msg_rnti <= SRSLTE_RARNTI_END && dci->dci_is_1c) @@ -869,4 +871,4 @@ void srslte_ra_prb_fprint(FILE *f, srslte_ra_dl_grant_t *grant) { } } -} \ No newline at end of file +} diff --git a/lib/src/phy/utils/bit.c b/lib/src/phy/utils/bit.c index 9ef53c35a..b1ae383a6 100644 --- a/lib/src/phy/utils/bit.c +++ b/lib/src/phy/utils/bit.c @@ -31,6 +31,12 @@ #include #include +#ifdef LV_HAVE_SSE + +#include + +#endif /* LV_HAVE_SSE */ + #include "srslte/phy/utils/bit.h" void srslte_bit_interleave(uint8_t *input, uint8_t *output, uint16_t *interleaver, uint32_t nof_bits) { @@ -53,6 +59,125 @@ void srslte_bit_interleave_w_offset(uint8_t *input, uint8_t *output, uint16_t *i } w_offset_p=8-w_offset; } +#ifdef LV_HAVE_SSE + __m64 m64mask = _mm_setr_pi8((uint8_t) 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1); + __m128i m128mask = _mm_set1_epi64(m64mask); + + union { + uint8_t v[8]; + __m64 m64; + } a, b, c; + + union { + __m128i m128; + uint16_t u16[8]; + uint8_t u8[16]; + struct { + __m64 reg_a; + __m64 reg_b; + } m64; + struct { + uint16_t i0, i1, i2, i3, i4, i5, i6, i7; + } v; + } ipx, epx, ipx2, epx2, b128, a128, c128; + + uint32_t i = st; + for (; i < (nof_bits / 8 - 1); i += 2) { + ipx.m128 = _mm_loadu_si128((__m128i *) (interleaver + (i * 8) - w_offset_p)); + epx.m128 = _mm_shuffle_epi8(ipx.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + ipx2.m128 = _mm_loadu_si128((__m128i *) (interleaver + ((i + 1) * 8) - w_offset_p)); + epx2.m128 = _mm_shuffle_epi8(ipx2.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + + epx.m64.reg_b = epx2.m64.reg_a; + + b128.m128 = _mm_and_si128(epx.m128, _mm_set1_epi8(0x7)); + b128.m128 = _mm_shuffle_epi8(m128mask, b128.m128); + + ipx.m128 = _mm_srli_epi16(ipx.m128, 3); + ipx2.m128 = _mm_srli_epi16(ipx2.m128, 3); + + a128.m128 = _mm_set_epi8(input[ipx2.v.i0], + input[ipx2.v.i1], + input[ipx2.v.i2], + input[ipx2.v.i3], + input[ipx2.v.i4], + input[ipx2.v.i5], + input[ipx2.v.i6], + input[ipx2.v.i7], + input[ipx.v.i0], + input[ipx.v.i1], + input[ipx.v.i2], + input[ipx.v.i3], + input[ipx.v.i4], + input[ipx.v.i5], + input[ipx.v.i6], + input[ipx.v.i7]); + + c128.m128 = _mm_cmpeq_epi8(_mm_and_si128(a128.m128, b128.m128), b128.m128); + uint16_t o = (uint16_t) _mm_movemask_epi8(c128.m128); + *((uint16_t *) (output + i)) = o; + } + + for (; i < nof_bits / 8; i++) { + ipx.m128 = _mm_loadu_si128((__m128i *) (interleaver + i * 8 - w_offset_p)); + epx.m128 = _mm_shuffle_epi8(ipx.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + b.m64 = _mm_and_si64(epx.m64.reg_a, _mm_set1_pi8(0x7)); + b.m64 = _mm_shuffle_pi8(m64mask, b.m64); + + ipx.m128 = _mm_srli_epi16(ipx.m128, 3); + + a.m64 = _mm_set_pi8(input[ipx.v.i0], + input[ipx.v.i1], + input[ipx.v.i2], + input[ipx.v.i3], + input[ipx.v.i4], + input[ipx.v.i5], + input[ipx.v.i6], + input[ipx.v.i7]); + + c.m64 = _mm_cmpeq_pi8(_mm_and_si64(a.m64, b.m64), b.m64); + output[i] = (uint8_t) _mm_movemask_pi8(c.m64); + } + +#if 0 + /* THIS PIECE OF CODE IS FOR CHECKING SIMD BEHAVIOUR. DO NOT ENABLE. */ + uint8_t *output2 = malloc(nof_bits/8); + for (i=st;i