diff --git a/lib/include/srslte/phy/common/sequence.h b/lib/include/srslte/phy/common/sequence.h index 50d939759..0cf9cbaa6 100644 --- a/lib/include/srslte/phy/common/sequence.h +++ b/lib/include/srslte/phy/common/sequence.h @@ -69,6 +69,8 @@ SRSLTE_API void srslte_sequence_apply_s(const int16_t* in, int16_t* out, uint32_ SRSLTE_API void srslte_sequence_apply_c(const int8_t* in, int8_t* out, uint32_t length, uint32_t seed); +SRSLTE_API void srslte_sequence_apply_bit(const uint8_t* in, uint8_t* out, uint32_t length, uint32_t seed); + SRSLTE_API int srslte_sequence_pbch(srslte_sequence_t* seq, srslte_cp_t cp, uint32_t cell_id); SRSLTE_API int srslte_sequence_pcfich(srslte_sequence_t* seq, uint32_t nslot, uint32_t cell_id); diff --git a/lib/src/phy/common/sequence.c b/lib/src/phy/common/sequence.c index e0ff1622c..33761c24b 100644 --- a/lib/src/phy/common/sequence.c +++ b/lib/src/phy/common/sequence.c @@ -603,6 +603,65 @@ void srslte_sequence_apply_c(const int8_t* in, int8_t* out, uint32_t length, uin for (; i < length; i++) { out[i] = in[i] * (((x1 ^ x2) & 1U) ? -1 : +1); + // Step sequences + x1 = sequence_gen_LTE_pr_memless_step_x1(x1); + x2 = sequence_gen_LTE_pr_memless_step_x2(x2); + } +} + +void srslte_sequence_apply_bit(const uint8_t* in, uint8_t* out, uint32_t length, uint32_t seed) +{ + uint32_t x1 = sequence_x1_init; // X1 initial state is fix + uint32_t x2 = sequence_get_x2_init(seed); // loads x2 initial state + + uint32_t i = 0; + + if (length >= SEQUENCE_PAR_BITS) { + for (; i < length - (SEQUENCE_PAR_BITS - 1); i += SEQUENCE_PAR_BITS) { + uint32_t c = (uint32_t)(x1 ^ x2); + + uint32_t j = 0; +#ifdef LV_HAVE_SSE + if (SEQUENCE_PAR_BITS >= 16) { + // Preloads bits of interest in the 16 LSB + __m128i mask = _mm_set1_epi32(c); + mask = _mm_shuffle_epi8(mask, _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1)); + + // Masks each bit + // mask = _mm_and_si128( mask, _mm_set_epi64x(0x0102040810204080, 0x0102040810204080)); + mask = _mm_and_si128(mask, _mm_set_epi64x(0x8040201008040201, 0x8040201008040201)); + + // Get non zero mask + mask = _mm_cmpeq_epi8(mask, _mm_set_epi64x(0x8040201008040201, 0x8040201008040201)); + + // Reduce to 1s and 0s + mask = _mm_and_si128(mask, _mm_set1_epi8(1)); + + // Load input + __m128i v = _mm_loadu_si128((__m128i*)(in + i + j)); + + // Apply XOR + v = _mm_xor_si128(mask, v); + + _mm_storeu_si128((__m128i*)(out + i + j), v); + + // Increment bit counter `j` + j += 16; + } +#endif + for (; j < SEQUENCE_PAR_BITS; j++) { + out[i + j] = in[i + j] ^ ((c >> j) & 1U); + } + + // Step sequences + x1 = sequence_gen_LTE_pr_memless_step_par_x1(x1); + x2 = sequence_gen_LTE_pr_memless_step_par_x2(x2); + } + } + + for (; i < length; i++) { + out[i] = in[i] ^ ((x1 ^ x2) & 1U); + // Step sequences x1 = sequence_gen_LTE_pr_memless_step_x1(x1); x2 = sequence_gen_LTE_pr_memless_step_x2(x2); diff --git a/lib/src/phy/common/test/sequence_test.c b/lib/src/phy/common/test/sequence_test.c index 70a5c8744..8d6a4db30 100644 --- a/lib/src/phy/common/test/sequence_test.c +++ b/lib/src/phy/common/test/sequence_test.c @@ -34,20 +34,23 @@ static float c_float[Nc + MAX_SEQ_LEN + 31]; static int16_t c_short[Nc + MAX_SEQ_LEN + 31]; static int8_t c_char[Nc + MAX_SEQ_LEN + 31]; static uint8_t c_packed[MAX_SEQ_LEN / 8]; +static uint8_t c_unpacked[MAX_SEQ_LEN]; static float ones_float[Nc + MAX_SEQ_LEN + 31]; static int16_t ones_short[Nc + MAX_SEQ_LEN + 31]; static int8_t ones_char[Nc + MAX_SEQ_LEN + 31]; static uint8_t ones_packed[MAX_SEQ_LEN / 8]; +static uint8_t ones_unpacked[MAX_SEQ_LEN]; static int test_sequence(srslte_sequence_t* sequence, uint32_t seed, uint32_t length, uint32_t repetitions) { - int ret = SRSLTE_SUCCESS; - struct timeval t[3] = {}; - uint64_t interval_gen_us = 0; - uint64_t interval_xor_float_us = 0; - uint64_t interval_xor_short_us = 0; - uint64_t interval_xor_char_us = 0; + int ret = SRSLTE_SUCCESS; + struct timeval t[3] = {}; + uint64_t interval_gen_us = 0; + uint64_t interval_xor_float_us = 0; + uint64_t interval_xor_short_us = 0; + uint64_t interval_xor_char_us = 0; + uint64_t interval_xor_unpacked_us = 0; gettimeofday(&t[1], NULL); @@ -135,6 +138,15 @@ static int test_sequence(srslte_sequence_t* sequence, uint32_t seed, uint32_t le get_time_interval(t); interval_xor_char_us = t->tv_sec * 1000000UL + t->tv_usec; + // Test in-place unpacked XOR + gettimeofday(&t[1], NULL); + for (uint32_t r = 0; r < repetitions; r++) { + srslte_sequence_apply_bit(ones_unpacked, c_unpacked, length, seed); + } + gettimeofday(&t[2], NULL); + get_time_interval(t); + interval_xor_unpacked_us = t->tv_sec * 1000000UL + t->tv_usec; + if (memcmp(c_char, sequence->c_char, length * sizeof(int8_t)) != 0) { ERROR("Unmatched XOR c_char"); ret = SRSLTE_ERROR; @@ -145,13 +157,19 @@ static int test_sequence(srslte_sequence_t* sequence, uint32_t seed, uint32_t le ret = SRSLTE_ERROR; } - printf("%08x; %8d; %8.1f; %8.1f; %8.1f; %8.1f; %8c\n", + if (memcmp(c, c_unpacked, length) != 0) { + ERROR("Unmatched c_unpacked"); + ret = SRSLTE_ERROR; + } + + printf("%08x; %8d; %8.1f; %8.1f; %8.1f; %8.1f; %8.1f; %8c\n", seed, length, (double)(length * repetitions) / (double)interval_gen_us, (double)(length * repetitions) / (double)interval_xor_float_us, (double)(length * repetitions) / (double)interval_xor_short_us, (double)(length * repetitions) / (double)interval_xor_char_us, + (double)(length * repetitions) / (double)interval_xor_unpacked_us, ret == SRSLTE_SUCCESS ? 'y' : 'n'); return SRSLTE_SUCCESS; @@ -168,9 +186,10 @@ int main(int argc, char** argv) // Initialise vectors with ones for (uint32_t i = 0; i < MAX_SEQ_LEN; i++) { - ones_float[i] = 1.0F; - ones_short[i] = 1; - ones_char[i] = 1; + ones_float[i] = 1.0F; + ones_short[i] = 1; + ones_char[i] = 1; + ones_unpacked[i] = 0; if (i < MAX_SEQ_LEN / 8) { ones_packed[i] = UINT8_MAX; } @@ -182,7 +201,15 @@ int main(int argc, char** argv) return SRSLTE_ERROR; } - printf("%8s; %8s; %8s; %8s; %8s; %8s; %8s\n", "seed", "length", "GEN", "XOR PS", "XOR 16", "XOR 8", "Passed"); + printf("%8s; %8s; %8s; %8s; %8s; %8s; %8s; %8s;\n", + "seed", + "length", + "GEN", + "XOR PS", + "XOR 16", + "XOR 8", + "XOR Unpack", + "Passed"); for (uint32_t length = min_length; length <= max_length; length = (length * 5) / 4) { test_sequence(&sequence, (uint32_t)srslte_random_uniform_int_dist(random_gen, 1, INT32_MAX), length, repetitions);