diff --git a/lib/include/srsran/phy/common/sequence.h b/lib/include/srsran/phy/common/sequence.h index ee01f446d..6d8bfeaf9 100644 --- a/lib/include/srsran/phy/common/sequence.h +++ b/lib/include/srsran/phy/common/sequence.h @@ -36,6 +36,8 @@ SRSRAN_API void srsran_sequence_state_init(srsran_sequence_state_t* s, uint32_t SRSRAN_API void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float* out, uint32_t length); +SRSRAN_API void srsran_sequence_state_apply_f(srsran_sequence_state_t* s, const float* in, float* out, uint32_t length); + SRSRAN_API void srsran_sequence_state_advance(srsran_sequence_state_t* s, uint32_t length); typedef struct SRSRAN_API { diff --git a/lib/src/phy/common/sequence.c b/lib/src/phy/common/sequence.c index 1216564e0..2ec871398 100644 --- a/lib/src/phy/common/sequence.c +++ b/lib/src/phy/common/sequence.c @@ -264,6 +264,59 @@ void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float* } } +void srsran_sequence_state_apply_f(srsran_sequence_state_t* s, const float* in, float* out, uint32_t length) +{ + uint32_t i = 0; + const float xor [2] = {+0.0F, -0.0F}; + + if (length >= SEQUENCE_PAR_BITS) { + for (; i < length - (SEQUENCE_PAR_BITS - 1); i += SEQUENCE_PAR_BITS) { + uint32_t c = (uint32_t)(s->x1 ^ s->x2); + + uint32_t j = 0; +#ifdef LV_HAVE_SSE + for (; j < SEQUENCE_PAR_BITS - 3; j += 4) { + // Preloads bits of interest in the 4 LSB + __m128i mask = _mm_set1_epi32(c >> j); + + // Masks each bit + mask = _mm_and_si128(mask, _mm_setr_epi32(1, 2, 4, 8)); + + // Get non zero mask + mask = _mm_cmpgt_epi32(mask, _mm_set1_epi32(0)); + + // And with MSB + mask = _mm_and_si128(mask, (__m128i)_mm_set1_ps(-0.0F)); + + // Load input + __m128 v = _mm_load_ps(in + i + j); + + // Loads input and perform sign XOR + v = _mm_xor_ps((__m128)mask, v); + + _mm_storeu_ps(out + i + j, v); + } +#endif + // Finish the parallel bits with generic code + for (; j < SEQUENCE_PAR_BITS; j++) { + *((uint32_t*)&out[i + j]) = *((uint32_t*)&in[i + j]) ^ *((uint32_t*)&xor[(c >> j) & 1U]); + } + + // Step sequences + s->x1 = sequence_gen_LTE_pr_memless_step_par_x1(s->x1); + s->x2 = sequence_gen_LTE_pr_memless_step_par_x2(s->x2); + } + } + + for (; i < length; i++) { + *((uint32_t*)&out[i]) = *((uint32_t*)&in[i]) ^ *((uint32_t*)&xor[(s->x1 ^ s->x2) & 1U]); + + // Step sequences + s->x1 = sequence_gen_LTE_pr_memless_step_x1(s->x1); + s->x2 = sequence_gen_LTE_pr_memless_step_x2(s->x2); + } +} + void srsran_sequence_state_advance(srsran_sequence_state_t* s, uint32_t length) { uint32_t i = 0; @@ -430,56 +483,10 @@ void srsran_sequence_free(srsran_sequence_t* q) void srsran_sequence_apply_f(const float* in, float* out, uint32_t length, uint32_t seed) { - uint32_t x1 = sequence_x1_init; // X1 initial state is fix - uint32_t x2 = sequence_get_x2_init(seed); // loads x2 initial state - - uint32_t i = 0; - - if (length >= SEQUENCE_PAR_BITS) { - for (; i < length - (SEQUENCE_PAR_BITS - 1); i += SEQUENCE_PAR_BITS) { - uint32_t c = (uint32_t)(x1 ^ x2); - - uint32_t j = 0; -#ifdef LV_HAVE_SSE - for (; j < SEQUENCE_PAR_BITS - 3; j += 4) { - // Preloads bits of interest in the 4 LSB - __m128i mask = _mm_set1_epi32(c >> j); - - // Masks each bit - mask = _mm_and_si128(mask, _mm_setr_epi32(1, 2, 4, 8)); - - // Get non zero mask - mask = _mm_cmpgt_epi32(mask, _mm_set1_epi32(0)); - - // And with MSB - mask = _mm_and_si128(mask, (__m128i)_mm_set1_ps(-0.0F)); + srsran_sequence_state_t seq = {}; + srsran_sequence_state_init(&seq, seed); - // Load input - __m128 v = _mm_loadu_ps(in + i + j); - - // Loads input and perform sign XOR - v = _mm_xor_ps((__m128)mask, v); - - _mm_storeu_ps(out + i + j, v); - } -#endif - for (; j < SEQUENCE_PAR_BITS; j++) { - ((uint32_t*)out)[i + j] = ((uint32_t*)in)[i] ^ (((c >> j) & 1U) << 31U); - } - - // Step sequences - x1 = sequence_gen_LTE_pr_memless_step_par_x1(x1); - x2 = sequence_gen_LTE_pr_memless_step_par_x2(x2); - } - } - - for (; i < length; i++) { - ((uint32_t*)out)[i] = ((uint32_t*)in)[i] ^ (((x1 ^ x2) & 1U) << 31U); - - // Step sequences - x1 = sequence_gen_LTE_pr_memless_step_x1(x1); - x2 = sequence_gen_LTE_pr_memless_step_x2(x2); - } + srsran_sequence_state_apply_f(&seq, in, out, length); } void srsran_sequence_apply_s(const int16_t* in, int16_t* out, uint32_t length, uint32_t seed)