diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index c3a94a115..8d661fd0e 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -135,6 +135,7 @@ SRSLTE_API void srslte_vec_sc_prod_ccc(const cf_t* x, const cf_t h, cf_t* z, con SRSLTE_API void srslte_vec_sc_prod_fff(const float* x, const float h, float* z, const uint32_t len); SRSLTE_API void srslte_vec_convert_fi(const float* x, const float scale, int16_t* z, const uint32_t len); +SRSLTE_API void srslte_vec_convert_conj_cs(const cf_t* x, const float scale, int16_t* z, const uint32_t len); SRSLTE_API void srslte_vec_convert_if(const int16_t* x, const float scale, float* z, const uint32_t len); SRSLTE_API void srslte_vec_convert_fb(const float* x, const float scale, int8_t* z, const uint32_t len); diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 7f758c970..450105a56 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -120,6 +120,8 @@ SRSLTE_API void srslte_vec_convert_if_simd(const int16_t* x, float* z, const flo SRSLTE_API void srslte_vec_convert_fi_simd(const float* x, int16_t* z, const float scale, const int len); +SRSLTE_API void srslte_vec_convert_conj_cs_simd(const cf_t* x, int16_t* z, const float scale, const int len); + SRSLTE_API void srslte_vec_convert_fb_simd(const float* x, int8_t* z, const float scale, const int len); SRSLTE_API void srslte_vec_interleave_simd(const cf_t* x, const cf_t* y, cf_t* z, const int len); diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index 3ffcdbdb5..0f4fa9176 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -408,6 +408,30 @@ TEST(srslte_vec_convert_fi, MALLOC(float, x); MALLOC(short, z); float scale = 10 free(x); free(z);) +TEST( + srslte_vec_convert_conj_cs, MALLOC(cf_t, x); int16_t* z = srslte_vec_i16_malloc(block_size * 2); + float scale = 1000.0f; + + short gold_re; + short gold_im; + for (int i = 0; i < block_size; i++) { x[i] = (float)RANDOM_CF(); } + + TEST_CALL(srslte_vec_convert_conj_cs(x, scale, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold_re = (short)(crealf(x[i]) * scale); + gold_im = (short)(cimagf(-x[i]) * scale); + cf_t t1 = (float)gold_re + I * (float)gold_im; + cf_t t2 = (float)z[2 * i] + I * (float)z[2 * i + 1]; + double err = cabsf(t1 - t2); + if (err > mse) { + mse = err; + } + } + + free(x); + free(z);) + TEST(srslte_vec_convert_if, MALLOC(int16_t, x); MALLOC(float, z); float scale = 1000.0f; float gold; @@ -591,6 +615,22 @@ TEST( free(y); free(z);) +TEST( + srslte_vec_conj_cc, MALLOC(cf_t, x); MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { x[i] = RANDOM_CF(); } + + TEST_CALL(srslte_vec_conj_cc(x, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = conjf(x[i]); + mse += cabsf(gold - z[i]); + } + + free(x); + free(z);) + TEST( srslte_vec_max_fi, MALLOC(float, x); @@ -802,6 +842,10 @@ int main(int argc, char** argv) test_srslte_vec_convert_fi(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; + passed[func_count][size_count] = + test_srslte_vec_convert_conj_cs(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + passed[func_count][size_count] = test_srslte_vec_convert_if(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; @@ -858,6 +902,10 @@ int main(int argc, char** argv) test_srslte_vec_div_fff(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; + passed[func_count][size_count] = + test_srslte_vec_conj_cc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + passed[func_count][size_count] = test_srslte_vec_max_fi(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index ec34126b3..16bcc904e 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -113,6 +113,11 @@ void srslte_vec_convert_fi(const float* x, const float scale, int16_t* z, const srslte_vec_convert_fi_simd(x, z, scale, len); } +void srslte_vec_convert_conj_cs(const cf_t* x, const float scale, int16_t* z, const uint32_t len) +{ + srslte_vec_convert_conj_cs_simd(x, z, scale, len); +} + void srslte_vec_convert_fb(const float* x, const float scale, int8_t* z, const uint32_t len) { srslte_vec_convert_fb_simd(x, z, scale, len); diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 935fab606..4e15cae39 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -471,6 +471,52 @@ void srslte_vec_convert_fi_simd(const float* x, int16_t* z, const float scale, c } } +void srslte_vec_convert_conj_cs_simd(const cf_t* x_, int16_t* z, const float scale, const int len_) +{ + int i = 0; + + const float* x = (float*)x_; + const int len = len_ * 2; + +#if SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE + srslte_simd_aligned float scale_v[SRSLTE_SIMD_F_SIZE]; + for (uint32_t j = 0; j < SRSLTE_SIMD_F_SIZE; j++) { + scale_v[j] = (j % 2 == 0) ? +scale : -scale; + } + + simd_f_t s = srslte_simd_f_load(scale_v); + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_f_t a = srslte_simd_f_load(&x[i]); + simd_f_t b = srslte_simd_f_load(&x[i + SRSLTE_SIMD_F_SIZE]); + + simd_f_t sa = srslte_simd_f_mul(a, s); + simd_f_t sb = srslte_simd_f_mul(b, s); + + simd_s_t i16 = srslte_simd_convert_2f_s(sa, sb); + + srslte_simd_s_store(&z[i], i16); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_f_t a = srslte_simd_f_loadu(&x[i]); + simd_f_t b = srslte_simd_f_loadu(&x[i + SRSLTE_SIMD_F_SIZE]); + + simd_f_t sa = srslte_simd_f_mul(a, s); + simd_f_t sb = srslte_simd_f_mul(b, s); + + simd_s_t i16 = srslte_simd_convert_2f_s(sa, sb); + + srslte_simd_s_storeu(&z[i], i16); + } + } +#endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE */ + + for (; i < len; i++) { + z[i] = (int16_t)(conjf(x[i]) * scale); + } +} + #define SRSLTE_IS_ALIGNED_SSE(PTR) (((size_t)(PTR)&0x0F) == 0) void srslte_vec_convert_fb_simd(const float* x, int8_t* z, const float scale, const int len) @@ -1691,8 +1737,8 @@ float srslte_vec_estimate_frequency_simd(const cf_t* x, int len) } // Accumulate using horizontal addition - simd_f_t _sum_re = srslte_simd_cf_re(_sum); - simd_f_t _sum_im = srslte_simd_cf_im(_sum); + simd_f_t _sum_re = srslte_simd_cf_re(_sum); + simd_f_t _sum_im = srslte_simd_cf_im(_sum); simd_f_t _sum_re_im = srslte_simd_f_hadd(_sum_re, _sum_im); for (int j = 2; j < SRSLTE_SIMD_F_SIZE; j *= 2) { _sum_re_im = srslte_simd_f_hadd(_sum_re_im, _sum_re_im);