From 67c8bf13681c408e8545477c787ea9b182aff862 Mon Sep 17 00:00:00 2001 From: IgnasJ Date: Wed, 19 Sep 2018 18:34:59 +0300 Subject: [PATCH] Fixes for srsLTE on ARM (#229) * Fix ARM NEON code compilation Fix LV_HAVE_NEON defintion incorrecly used instead of HAVE_NEON in some places Replace vqabsq_s32 with vabsq_f32 as vqabsq_s32 requires int type (fails to compile) Fix missing NEON code path in mat.h in srslte_mat_2x2_mmse_csi_simd() * Fix timestamp overflow issue on 32-bit systems with Soapy driver 'time_t secs' can be 32-bit on some systems. This causes calculation: 'secs * 1000000000;' to overflow. --- lib/include/srslte/phy/utils/mat.h | 5 +++++ lib/include/srslte/phy/utils/simd.h | 28 ++++++++++++++-------------- lib/src/phy/rf/rf_soapy_imp.c | 2 +- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/lib/include/srslte/phy/utils/mat.h b/lib/include/srslte/phy/utils/mat.h index 8db0205f9..c8465b34e 100644 --- a/lib/include/srslte/phy/utils/mat.h +++ b/lib/include/srslte/phy/utils/mat.h @@ -166,8 +166,13 @@ static inline void srslte_mat_2x2_mmse_csi_simd(simd_cf_t y0, simd_cf_t _noise_estimate; simd_f_t _norm = srslte_simd_f_set1(norm); +#if HAVE_NEON + _noise_estimate.val[0] = srslte_simd_f_set1(noise_estimate); + _noise_estimate.val[1] = srslte_simd_f_zero(); +#else /* HAVE_NEON */ _noise_estimate.re = srslte_simd_f_set1(noise_estimate); _noise_estimate.im = srslte_simd_f_zero(); +#endif /* HAVE_NEON */ /* 1. A = H' x H + No*/ simd_cf_t a00 = diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 2a7566e18..3f19b5e88 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -136,7 +136,7 @@ #define SRSLTE_SIMD_S_SIZE 8 #define SRSLTE_SIMD_C16_SIZE 8 -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ #define SRSLTE_SIMD_F_SIZE 0 #define SRSLTE_SIMD_CF_SIZE 0 @@ -511,7 +511,7 @@ static inline simd_f_t srslte_simd_f_abs(simd_f_t a) { return _mm_andnot_ps(_mm_set1_ps(-0.0f), a); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - return vqabsq_s32(a); + return vabsq_f32(a); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -987,13 +987,13 @@ static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) { static inline simd_cf_t srslte_simd_cf_neg (simd_cf_t a) { simd_cf_t ret; -#if LV_HAVE_NEON +#if HAVE_NEON ret.val[0] = srslte_simd_f_neg(a.val[0]); ret.val[1] = srslte_simd_f_neg(a.val[1]); -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ ret.re = srslte_simd_f_neg(a.re); ret.im = srslte_simd_f_neg(a.im); -#endif /* LV_HAVE_NEON */ +#endif /* HAVE_NEON */ return ret; } @@ -1004,37 +1004,37 @@ static inline simd_cf_t srslte_simd_cf_neg_mask (simd_cf_t a, simd_f_t mask) { mask = _mm256_permutevar8x32_ps(mask, _mm256_setr_epi32(0,4,1,5,2,6,3,7)); #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ -#if LV_HAVE_NEON +#if HAVE_NEON ret.val[0] = srslte_simd_f_neg_mask(a.val[0], mask); ret.val[1] = srslte_simd_f_neg_mask(a.val[1], mask); -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ ret.re = srslte_simd_f_neg_mask(a.re, mask); ret.im = srslte_simd_f_neg_mask(a.im, mask); -#endif /* LV_HAVE_NEON */ +#endif /* HAVE_NEON */ return ret; } static inline simd_cf_t srslte_simd_cf_conj (simd_cf_t a) { simd_cf_t ret; -#if LV_HAVE_NEON +#if HAVE_NEON ret.val[0] = a.val[0]; ret.val[1] = srslte_simd_f_neg(a.val[1]); -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ ret.re = a.re; ret.im = srslte_simd_f_neg(a.im); -#endif /* LV_HAVE_NEON */ +#endif /* HAVE_NEON */ return ret; } static inline simd_cf_t srslte_simd_cf_mulj (simd_cf_t a) { simd_cf_t ret; -#if LV_HAVE_NEON +#if HAVE_NEON ret.val[0] = srslte_simd_f_neg(a.val[1]); ret.val[1] = a.val[0]; -#else /* LV_HAVE_NEON */ +#else /* HAVE_NEON */ ret.re = srslte_simd_f_neg(a.im); ret.im = a.re; -#endif /* LV_HAVE_NEON */ +#endif /* HAVE_NEON */ return ret; } diff --git a/lib/src/phy/rf/rf_soapy_imp.c b/lib/src/phy/rf/rf_soapy_imp.c index 96f44ad80..26ad5bac0 100644 --- a/lib/src/phy/rf/rf_soapy_imp.c +++ b/lib/src/phy/rf/rf_soapy_imp.c @@ -851,7 +851,7 @@ int rf_soapy_send_timed_multi(void *h, // Convert initial tx time if (has_time_spec) { - timeNs = secs * 1000000000; + timeNs = (long long)secs * 1000000000; timeNs = timeNs + (frac_secs * 1000000000); }