From c41ad5453c0ecb099829d771e5f549c471f55834 Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Mon, 25 Sep 2017 17:08:11 +0200 Subject: [PATCH] Solved bugs and compilation error in simd and vector_simd --- lib/include/srslte/phy/utils/simd.h | 20 ++++++++++---------- lib/include/srslte/phy/utils/vector_simd.h | 2 +- lib/src/phy/utils/test/vector_test.c | 8 ++++---- lib/src/phy/utils/vector_simd.c | 10 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 774dd54bd..22d8db79d 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -226,7 +226,7 @@ static inline simd_f_t srslte_simd_f_mul(simd_f_t a, simd_f_t b) { static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { #ifdef LV_HAVE_AVX512 __m512 r = _mm512_add_ps(a, b); - return _mm512_mask_sub_ps(r, 0b1010101010101010, a, b); + return _mm512_mask_sub_ps(r, 0b0101010101010101, a, b); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 return _mm256_addsub_ps(a, b); @@ -642,10 +642,10 @@ static inline simd_s_t srslte_simd_s_load(int16_t *ptr) { return _mm512_load_si512(ptr); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - return _mm256_load_si256(ptr); + return _mm256_load_si256((__m256i*) ptr); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - return _mm_load_si128(ptr); + return _mm_load_si128((__m128i*) ptr); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -653,13 +653,13 @@ static inline simd_s_t srslte_simd_s_load(int16_t *ptr) { static inline simd_s_t srslte_simd_s_loadu(int16_t *ptr) { #ifdef LV_HAVE_AVX512 - return _mm512_load_si512(ptr); + return _mm512_loadu_si512(ptr); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - return _mm256_load_si256(ptr); + return _mm256_loadu_si256((__m256i*) ptr); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - return _mm_load_si128(ptr); + return _mm_loadu_si128((__m128i*) ptr); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -670,10 +670,10 @@ static inline void srslte_simd_s_store(int16_t *ptr, simd_s_t simdreg) { _mm512_store_si512(ptr, simdreg); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - _mm256_store_si256(ptr, simdreg); + _mm256_store_si256((__m256i*) ptr, simdreg); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - _mm_store_si128(ptr, simdreg); + _mm_store_si128((__m128i*) ptr, simdreg); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -684,10 +684,10 @@ static inline void srslte_simd_s_storeu(int16_t *ptr, simd_s_t simdreg) { _mm512_storeu_si512(ptr, simdreg); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - _mm256_storeu_si256(ptr, simdreg); + _mm256_storeu_si256((__m256i*) ptr, simdreg); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - _mm_storeu_si128(ptr, simdreg); + _mm_storeu_si128((__m128i*) ptr, simdreg); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 8ea2ce9bc..4ee839fab 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -44,7 +44,7 @@ extern "C" { #ifdef LV_HAVE_SSE #define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x0F) == 0) #else /* LV_HAVE_SSE */ -#define SRSLTE_IS_ALIGNED(PTR) (true) +#define SRSLTE_IS_ALIGNED(PTR) (1) #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX */ #endif /* LV_HAVE_AVX512 */ diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index e781d05b9..05dce1d35 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -45,7 +45,7 @@ bool mmse_solver = false; bool verbose = false; #define MAX_MSE (1e-3) -#define NOF_REPETITIONS (1024*128) +#define NOF_REPETITIONS (1024) #define MAX_FUNCTIONS (64) #define MAX_BLOCKS (16) @@ -70,7 +70,7 @@ bool verbose = false; return passed;\ } -#define MALLOC(TYPE, NAME) TYPE *NAME = srslte_vec_malloc(sizeof(TYPE)*block_size) +#define MALLOC(TYPE, NAME) TYPE *NAME = malloc(sizeof(TYPE)*block_size) static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { @@ -339,7 +339,7 @@ TEST(srslte_vec_prod_conj_ccc, TEST(srslte_vec_sc_prod_ccc, MALLOC(cf_t, x); MALLOC(cf_t, z); - cf_t y = RANDOM_F(); + cf_t y = RANDOM_CF(); cf_t gold; for (int i = 0; i < block_size; i++) { @@ -469,7 +469,7 @@ int main(int argc, char **argv) { uint32_t func_count = 0; bool passed = true; - for (uint32_t block_size = 1; block_size <= 1024*16; block_size *= 2) { + for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) { func_count = 0; passed &= test_srslte_vec_dot_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 21132390f..2eb0428b7 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -77,7 +77,7 @@ int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len) { void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { int i = 0; -#ifdef SRSLTE_SIMD_S_SIZE +#if SRSLTE_SIMD_S_SIZE if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { simd_s_t a = srslte_simd_s_load(&x[i]); @@ -106,7 +106,7 @@ void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { int i = 0; -#ifdef SRSLTE_SIMD_S_SIZE +#if SRSLTE_SIMD_S_SIZE if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { simd_s_t a = srslte_simd_s_load(&x[i]); @@ -135,7 +135,7 @@ void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { int i = 0; -#ifdef SRSLTE_SIMD_S_SIZE +#if SRSLTE_SIMD_S_SIZE if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { simd_s_t a = srslte_simd_s_load(&x[i]); @@ -721,14 +721,14 @@ void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, int len) { } } else { for (; i < len - SRSLTE_SIMD_F_SIZE / 2 + 1; i += SRSLTE_SIMD_F_SIZE / 2) { - simd_f_t temp = srslte_simd_f_load((float *) &x[i]); + simd_f_t temp = srslte_simd_f_loadu((float *) &x[i]); simd_f_t m1 = srslte_simd_f_mul(hre, temp); simd_f_t sw = srslte_simd_f_swap(temp); simd_f_t m2 = srslte_simd_f_mul(him, sw); simd_f_t r = srslte_simd_f_addsub(m1, m2); - srslte_simd_f_store((float *) &z[i], r); + srslte_simd_f_storeu((float *) &z[i], r); } } #endif