Added int16 to float SIMD vector function

7 years ago · ff5ac85c7d
parent 4df6227f5b
commit ff5ac85c7d
4 changed files with 59 additions and 5 deletions
--- a/lib/include/srslte/phy/utils/vector_simd.h
+++ b/lib/include/srslte/phy/utils/vector_simd.h
@ -120,6 +120,8 @@ SRSLTE_API void srslte_vec_abs_square_cf_simd(const cf_t *x, float *z, const int
 /* Other Functions */
 SRSLTE_API void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y, const int len);

+SRSLTE_API void srslte_vec_convert_if_simd(const int16_t *x, float *z, const float scale, const int len);
+
 SRSLTE_API void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const float scale, const int len);

 SRSLTE_API void srslte_vec_cp_simd(const cf_t *src, cf_t *dst, int len);
--- a/lib/src/phy/utils/test/vector_test.c
+++ b/lib/src/phy/utils/test/vector_test.c
@ -452,6 +452,28 @@ TEST(srslte_vec_convert_fi,
  free(z);
 )

+TEST(srslte_vec_convert_if,
+  MALLOC(int16_t, x);
+  MALLOC(float, z);
+      float scale = 1000.0f;
+
+  float gold;
+  float k = 1.0f/scale;
+  for (int i = 0; i < block_size; i++) {
+    x[i] = (int16_t) RANDOM_S();
+  }
+
+  TEST_CALL(srslte_vec_convert_if(x, scale, z, block_size))
+
+  for (int i = 0; i < block_size; i++) {
+      gold = ((float)x[i]) * k;
+      mse += fabsf(gold - z[i]);
+  }
+
+  free(x);
+  free(z);
+)
+
 TEST(srslte_vec_prod_fff,
  MALLOC(float, x);
  MALLOC(float, y);
@ -753,6 +775,9 @@ int main(int argc, char **argv) {
    passed[func_count][size_count] = test_srslte_vec_convert_fi(func_names[func_count], &timmings[func_count][size_count], block_size);
    func_count++;

+     passed[func_count][size_count] = test_srslte_vec_convert_if(func_names[func_count], &timmings[func_count][size_count], block_size);
+    func_count++;
+
    passed[func_count][size_count] = test_srslte_vec_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size);
    func_count++;

--- a/lib/src/phy/utils/vector.c
+++ b/lib/src/phy/utils/vector.c
@ -93,10 +93,7 @@ void srslte_vec_sc_prod_ccc(const cf_t *x, const cf_t h, cf_t *z, const uint32_t

 // Used in turbo decoder 
 void srslte_vec_convert_if(const int16_t *x, const float scale, float *z, const uint32_t len) {
-  int i;
-  for (i=0;i<len;i++) {
-    z[i] = ((float) x[i])/scale;
-  }
+  srslte_vec_convert_if_simd(x, z, scale, len);
 }

 void srslte_vec_convert_fi(const float *x, const float scale, int16_t *z, const uint32_t len) {
--- a/lib/src/phy/utils/vector_simd.c
+++ b/lib/src/phy/utils/vector_simd.c
@ -228,6 +228,36 @@ void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y
  }
 }

+void srslte_vec_convert_if_simd(const int16_t *x, float *z, const float scale, const int len) {
+  int i = 0;
+  const float gain = 1.0f / scale;
+
+#ifdef LV_HAVE_SSE
+  __m128 s = _mm_set1_ps(gain);
+  if (SRSLTE_IS_ALIGNED(z)) {
+    for (; i < len - 3; i += 4) {
+      __m64 *ptr = (__m64 *) &x[i];
+      __m128 fl = _mm_cvtpi16_ps(*ptr);
+      __m128 v = _mm_mul_ps(fl, s);
+
+      _mm_store_ps(&z[i], v);
+    }
+  } else {
+    for (; i < len - 3; i += 4) {
+      __m64 *ptr = (__m64 *) &x[i];
+      __m128 fl = _mm_cvtpi16_ps(*ptr);
+      __m128 v = _mm_mul_ps(fl, s);
+
+      _mm_storeu_ps(&z[i], v);
+    }
+  }
+#endif /* LV_HAVE_SSE */
+
+  for (; i < len; i++) {
+    z[i] = ((float) x[i]) * gain;
+  }
+}
+
 void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const float scale, const int len) {
  int i = 0;

@ -1220,4 +1250,4 @@ void srslte_vec_interleave_add_simd(const cf_t *x, const cf_t *y, cf_t *z, const
    z[k++] += x[i];
    z[k++] += y[i];
  }
-}
+}