Merge pull request #113 from softwareradiosystems/next_new_features

adding simd xor functionality
7 years ago · 854a77455d
parent aacd9e1e5b 38903de07c
commit 854a77455d
7 changed files with 190 additions and 6 deletions
--- a/lib/include/srslte/phy/utils/simd.h
+++ b/lib/include/srslte/phy/utils/simd.h
@ -1506,4 +1506,119 @@ static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) {

 #endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_C16_SIZE */

+#if SRSLTE_SIMD_B_SIZE
+/* Data types */
+#ifdef LV_HAVE_AVX512
+typedef __m512i simd_b_t;
+#else /* LV_HAVE_AVX512 */
+#ifdef LV_HAVE_AVX2
+typedef __m256i simd_b_t;
+#else /* LV_HAVE_AVX2 */
+#ifdef LV_HAVE_SSE
+typedef __m128i simd_b_t;
+#else /* HAVE_NEON */
+#ifdef HAVE_NEON
+typedef int8x16_t simd_b_t;
+#endif /* HAVE_NEON */
+#endif /* LV_HAVE_SSE */
+#endif /* LV_HAVE_AVX2 */
+#endif /* LV_HAVE_AVX512 */
+
+
+
+static inline simd_b_t srslte_simd_b_load(int8_t *ptr){
+#ifdef LV_HAVE_AVX512
+  return _mm512_load_si512(ptr);
+#else /* LV_HAVE_AVX512 */
+#ifdef LV_HAVE_AVX2
+  return _mm256_load_si256((__m256i*) ptr);
+#else /* LV_HAVE_AVX2 */
+#ifdef LV_HAVE_SSE
+  return _mm_load_si128((__m128i*) ptr);
+#else /* LV_HAVE_SSE */
+#ifdef HAVE_NEON
+  return vld1q_s8(ptr);
+#endif /* HAVE_NEON */
+#endif /* LV_HAVE_SSE */
+#endif /* LV_HAVE_AVX2 */
+#endif /* LV_HAVE_AVX512 */
+}  
+  
+static inline simd_b_t srslte_simd_b_loadu(int8_t *ptr){
+#ifdef LV_HAVE_AVX512
+  return _mm512_loadu_si512(ptr);
+#else /* LV_HAVE_AVX512 */
+#ifdef LV_HAVE_AVX2
+  return _mm256_loadu_si256((__m256i*) ptr);
+#else /* LV_HAVE_AVX2 */
+#ifdef LV_HAVE_SSE
+  return _mm_loadu_si128((__m128i*) ptr);
+#else /* LV_HAVE_SSE */
+#ifdef HAVE_NEON
+  return vld1q_s8(ptr);
+#endif /* HAVE_NEON */
+#endif /* LV_HAVE_SSE */
+#endif /* LV_HAVE_AVX2 */
+#endif /* LV_HAVE_AVX512 */
+}
+
+static inline void srslte_simd_b_store(int8_t *ptr, simd_b_t simdreg) {
+#ifdef LV_HAVE_AVX512
+  _mm512_store_si512(ptr, simdreg);
+#else /* LV_HAVE_AVX512 */
+#ifdef LV_HAVE_AVX2
+  _mm256_store_si256((__m256i*) ptr, simdreg);
+#else /* LV_HAVE_AVX2 */
+#ifdef LV_HAVE_SSE
+  _mm_store_si128((__m128i*) ptr, simdreg);
+#else /* LV_HAVE_SSE */
+#ifdef HAVE_NEON
+  vst1q_s8( ptr, simdreg);
+#endif /* HAVE_NEON */
+#endif /* LV_HAVE_SSE */
+#endif /* LV_HAVE_AVX2 */
+#endif /* LV_HAVE_AVX512 */
+}
+
+static inline void srslte_simd_b_storeu(int8_t *ptr, simd_b_t simdreg) {
+#ifdef LV_HAVE_AVX512
+  _mm512_storeu_si512(ptr, simdreg);
+#else /* LV_HAVE_AVX512 */
+#ifdef LV_HAVE_AVX2
+  _mm256_storeu_si256((__m256i*) ptr, simdreg);
+#else /* LV_HAVE_AVX2 */
+#ifdef LV_HAVE_SSE
+  _mm_storeu_si128((__m128i*) ptr, simdreg);
+#else /* LV_HAVE_SSE */
+#ifdef HAVE_NEON
+  vst1q_s8(ptr, simdreg);
+#endif /* HAVE_NEON */
+#endif /* LV_HAVE_SSE */
+#endif /* LV_HAVE_AVX2 */
+#endif /* LV_HAVE_AVX512 */
+}
+
+
+static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) {
+    
+#ifdef LV_HAVE_AVX512
+ return _mm512_xor_epi32(a, b);
+#else /* LV_HAVE_AVX512 */
+#ifdef LV_HAVE_AVX2
+ return _mm256_xor_si256(a, b);
+#else /* LV_HAVE_AVX2 */
+#ifdef LV_HAVE_SSE
+ return _mm_xor_si128 (a, b);
+#else /* LV_HAVE_SSE */
+#ifdef HAVE_NEON
+ return veorq_s8(a, b); 
+#endif /* HAVE_NEON */
+#endif /* LV_HAVE_SSE */
+#endif /* LV_HAVE_AVX2 */
+#endif /* LV_HAVE_AVX512 */
+}
+
+#endif /*SRSLTE_SIMD_B_SIZE */
+
+
 #endif //SRSLTE_SIMD_H_H
--- a/lib/include/srslte/phy/utils/vector.h
+++ b/lib/include/srslte/phy/utils/vector.h
@ -53,6 +53,10 @@ extern "C" {
 // Exponential moving average
 #define SRSLTE_VEC_EMA(data, average, alpha) ((alpha)*(data)+(1-alpha)*(average))

+
+/*logical operations */
+SRSLTE_API void srslte_vec_xor_bbb(int8_t *x,int8_t *y,int8_t *z, uint32_t len);
+
 /** Return the sum of all the elements */
 SRSLTE_API float srslte_vec_acc_ff(float *x, uint32_t len);
 SRSLTE_API cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len);
--- a/lib/include/srslte/phy/utils/vector_simd.h
+++ b/lib/include/srslte/phy/utils/vector_simd.h
@ -53,6 +53,10 @@ extern "C" {
 #endif /* LV_HAVE_AVX */
 #endif /* LV_HAVE_AVX512 */

+
+/*SIMD Logical operations*/
+SRSLTE_API void srslte_vec_xor_bbb_simd(int8_t *x, int8_t *y, int8_t *z, int len);
+
 /* SIMD Basic vector math */
 SRSLTE_API void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len);

--- a/lib/src/phy/scrambling/scrambling.c
+++ b/lib/src/phy/scrambling/scrambling.c
@ -60,10 +60,8 @@ void srslte_scrambling_c_offset(srslte_sequence_t *s, cf_t *data, int offset, in
 }

 void scrambling_b(uint8_t *c, uint8_t *data, int len) {
-  int i;  
-  for (i = 0; i < len; i++) {
-    data[i] = (data[i] ^ c[i]);
-  }  
+
+  srslte_vec_xor_bbb((int8_t*)c,(int8_t*)data,(int8_t*)data,len);  
 }

 void scrambling_b_word(uint8_t *c, uint8_t *data, int len) {
--- a/lib/src/phy/utils/test/vector_test.c
+++ b/lib/src/phy/utils/test/vector_test.c
@ -47,8 +47,10 @@ bool verbose = false;
 #define MAX_FUNCTIONS (64)
 #define MAX_BLOCKS (16)

+
 #define RANDOM_F() ((float)rand())/((float)RAND_MAX)
 #define RANDOM_S() ((int16_t)(rand() && 0x800F))
+#define RANDOM_B() ((int8_t)(rand() && 0x8008))
 #define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F())

 #define TEST_CALL(TEST_CODE)   gettimeofday(&start, NULL);\
@ -87,6 +89,29 @@ float squared_error (cf_t a, cf_t b) {
  return diff_re*diff_re + diff_im*diff_im;
 }

+ TEST(srslte_vec_xor_bbb,
+     MALLOC(int8_t, x);
+         MALLOC(int8_t, y);
+         MALLOC(int8_t, z);
+
+         cf_t gold = 0.0f;
+         for (int i = 0; i < block_size; i++) {
+           x[i] = RANDOM_B();
+           y[i] = RANDOM_B();
+         }
+
+         TEST_CALL(srslte_vec_xor_bbb(x, y, z, block_size))
+
+         for (int i = 0; i < block_size; i++) {
+           gold = x[i] ^ y[i];
+           mse += cabsf(gold - z[i]);
+         }
+
+         free(x);
+         free(y);
+         free(z);
+)
+
 TEST(srslte_vec_acc_ff,
     MALLOC(float, x);
         float z;
@ -613,8 +638,8 @@ TEST(srslte_vec_div_fff,

         cf_t gold;
         for (int i = 0; i < block_size; i++) {
-           x[i] = RANDOM_F();
-           y[i] = RANDOM_F();
+           x[i] = RANDOM_F() + 0.0001;
+           y[i] = RANDOM_F()+ 0.0001;
         }

         TEST_CALL(srslte_vec_div_fff(x, y, z, block_size))
@ -690,6 +715,11 @@ int main(int argc, char **argv) {
  for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) {
    func_count = 0;

+
+    passed[func_count][size_count] = test_srslte_vec_xor_bbb(func_names[func_count], &timmings[func_count][size_count], block_size);
+    func_count++;
+
+
    passed[func_count][size_count] = test_srslte_vec_acc_ff(func_names[func_count], &timmings[func_count][size_count], block_size);
    func_count++;

--- a/lib/src/phy/utils/vector.c
+++ b/lib/src/phy/utils/vector.c
@ -37,6 +37,10 @@



+void srslte_vec_xor_bbb(int8_t *x,int8_t *y,int8_t *z, uint32_t len) {
+  srslte_vec_xor_bbb_simd(x, y, z, len);
+}
+
 // Used in PRACH detector, AGC and chest_dl for noise averaging
 float srslte_vec_acc_ff(float *x, uint32_t len) {
  return srslte_vec_acc_ff_simd(x, len);
--- a/lib/src/phy/utils/vector_simd.c
+++ b/lib/src/phy/utils/vector_simd.c
@ -37,6 +37,35 @@
 #include "srslte/phy/utils/simd.h"


+void srslte_vec_xor_bbb_simd(int8_t *x, int8_t *y, int8_t *z, int len) {
+  int i = 0;
+#if SRSLTE_SIMD_B_SIZE
+  if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
+    for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
+      simd_b_t a = srslte_simd_b_load(&x[i]);
+      simd_b_t b = srslte_simd_b_load(&y[i]);
+
+      simd_b_t r = srslte_simd_b_xor(a, b);
+
+      srslte_simd_b_store(&z[i], r);
+    }
+  } else {
+    for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
+      simd_b_t a = srslte_simd_b_loadu(&x[i]);
+      simd_b_t b = srslte_simd_b_loadu(&y[i]);
+
+      simd_s_t r = srslte_simd_b_xor(a, b);
+
+      srslte_simd_b_storeu(&z[i], r);
+    }
+  }
+#endif /* SRSLTE_SIMD_B_SIZE */
+  
+    for(; i < len; i++){
+    z[i] = x[i] ^ y[i];
+  }
+}
+
 int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len) {
  int i = 0;
  int result = 0;