From 38903de07cc64a29300bda06fb1593deba1f5b85 Mon Sep 17 00:00:00 2001 From: yagoda Date: Mon, 27 Nov 2017 11:10:50 +0000 Subject: [PATCH] adding simd xor functionality --- lib/include/srslte/phy/utils/simd.h | 115 +++++++++++++++++++++ lib/include/srslte/phy/utils/vector.h | 4 + lib/include/srslte/phy/utils/vector_simd.h | 4 + lib/src/phy/scrambling/scrambling.c | 6 +- lib/src/phy/utils/test/vector_test.c | 34 +++++- lib/src/phy/utils/vector.c | 4 + lib/src/phy/utils/vector_simd.c | 29 ++++++ 7 files changed, 190 insertions(+), 6 deletions(-) diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index a9a79c486..cec003886 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -1506,4 +1506,119 @@ static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) { #endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_C16_SIZE */ +#if SRSLTE_SIMD_B_SIZE +/* Data types */ +#ifdef LV_HAVE_AVX512 +typedef __m512i simd_b_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256i simd_b_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128i simd_b_t; +#else /* HAVE_NEON */ +#ifdef HAVE_NEON +typedef int8x16_t simd_b_t; +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + + + +static inline simd_b_t srslte_simd_b_load(int8_t *ptr){ +#ifdef LV_HAVE_AVX512 + return _mm512_load_si512(ptr); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_si256((__m256i*) ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_load_si128((__m128i*) ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_s8(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_b_t srslte_simd_b_loadu(int8_t *ptr){ +#ifdef LV_HAVE_AVX512 + return _mm512_loadu_si512(ptr); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_loadu_si256((__m256i*) ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_loadu_si128((__m128i*) ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_s8(ptr); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_b_store(int8_t *ptr, simd_b_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_si512(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_si256((__m256i*) ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_si128((__m128i*) ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_s8( ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_b_storeu(int8_t *ptr, simd_b_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_si512(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_storeu_si256((__m256i*) ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_si128((__m128i*) ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_s8(ptr, simdreg); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + + +static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) { + +#ifdef LV_HAVE_AVX512 + return _mm512_xor_epi32(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_xor_si256(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_xor_si128 (a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return veorq_s8(a, b); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /*SRSLTE_SIMD_B_SIZE */ + + #endif //SRSLTE_SIMD_H_H diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index 4a5daefb3..7dad585a0 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -53,6 +53,10 @@ extern "C" { // Exponential moving average #define SRSLTE_VEC_EMA(data, average, alpha) ((alpha)*(data)+(1-alpha)*(average)) + +/*logical operations */ +SRSLTE_API void srslte_vec_xor_bbb(int8_t *x,int8_t *y,int8_t *z, uint32_t len); + /** Return the sum of all the elements */ SRSLTE_API float srslte_vec_acc_ff(float *x, uint32_t len); SRSLTE_API cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len); diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 294cff50f..468c5e11a 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -53,6 +53,10 @@ extern "C" { #endif /* LV_HAVE_AVX */ #endif /* LV_HAVE_AVX512 */ + +/*SIMD Logical operations*/ +SRSLTE_API void srslte_vec_xor_bbb_simd(int8_t *x, int8_t *y, int8_t *z, int len); + /* SIMD Basic vector math */ SRSLTE_API void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); diff --git a/lib/src/phy/scrambling/scrambling.c b/lib/src/phy/scrambling/scrambling.c index 42f16d1e8..ca0342905 100644 --- a/lib/src/phy/scrambling/scrambling.c +++ b/lib/src/phy/scrambling/scrambling.c @@ -60,10 +60,8 @@ void srslte_scrambling_c_offset(srslte_sequence_t *s, cf_t *data, int offset, in } void scrambling_b(uint8_t *c, uint8_t *data, int len) { - int i; - for (i = 0; i < len; i++) { - data[i] = (data[i] ^ c[i]); - } + + srslte_vec_xor_bbb((int8_t*)c,(int8_t*)data,(int8_t*)data,len); } void scrambling_b_word(uint8_t *c, uint8_t *data, int len) { diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index 9058e8813..a717a7537 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -47,8 +47,10 @@ bool verbose = false; #define MAX_FUNCTIONS (64) #define MAX_BLOCKS (16) + #define RANDOM_F() ((float)rand())/((float)RAND_MAX) #define RANDOM_S() ((int16_t)(rand() && 0x800F)) +#define RANDOM_B() ((int8_t)(rand() && 0x8008)) #define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F()) #define TEST_CALL(TEST_CODE) gettimeofday(&start, NULL);\ @@ -87,6 +89,29 @@ float squared_error (cf_t a, cf_t b) { return diff_re*diff_re + diff_im*diff_im; } + TEST(srslte_vec_xor_bbb, + MALLOC(int8_t, x); + MALLOC(int8_t, y); + MALLOC(int8_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_B(); + y[i] = RANDOM_B(); + } + + TEST_CALL(srslte_vec_xor_bbb(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] ^ y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + TEST(srslte_vec_acc_ff, MALLOC(float, x); float z; @@ -613,8 +638,8 @@ TEST(srslte_vec_div_fff, cf_t gold; for (int i = 0; i < block_size; i++) { - x[i] = RANDOM_F(); - y[i] = RANDOM_F(); + x[i] = RANDOM_F() + 0.0001; + y[i] = RANDOM_F()+ 0.0001; } TEST_CALL(srslte_vec_div_fff(x, y, z, block_size)) @@ -690,6 +715,11 @@ int main(int argc, char **argv) { for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) { func_count = 0; + + passed[func_count][size_count] = test_srslte_vec_xor_bbb(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_acc_ff(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 3bb7fb08f..5d5b6747a 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -37,6 +37,10 @@ +void srslte_vec_xor_bbb(int8_t *x,int8_t *y,int8_t *z, uint32_t len) { + srslte_vec_xor_bbb_simd(x, y, z, len); +} + // Used in PRACH detector, AGC and chest_dl for noise averaging float srslte_vec_acc_ff(float *x, uint32_t len) { return srslte_vec_acc_ff_simd(x, len); diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 23c58cfce..c7bb7b1fc 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -37,6 +37,35 @@ #include "srslte/phy/utils/simd.h" +void srslte_vec_xor_bbb_simd(int8_t *x, int8_t *y, int8_t *z, int len) { + int i = 0; +#if SRSLTE_SIMD_B_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) { + simd_b_t a = srslte_simd_b_load(&x[i]); + simd_b_t b = srslte_simd_b_load(&y[i]); + + simd_b_t r = srslte_simd_b_xor(a, b); + + srslte_simd_b_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) { + simd_b_t a = srslte_simd_b_loadu(&x[i]); + simd_b_t b = srslte_simd_b_loadu(&y[i]); + + simd_s_t r = srslte_simd_b_xor(a, b); + + srslte_simd_b_storeu(&z[i], r); + } + } +#endif /* SRSLTE_SIMD_B_SIZE */ + + for(; i < len; i++){ + z[i] = x[i] ^ y[i]; + } +} + int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len) { int i = 0; int result = 0;