diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 2529033c5..2c5d1ea4b 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -1356,7 +1356,9 @@ static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) { return _mm_sign_epi16(a, b); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - #error sign instruction not available in Neon + simd_s_t res; + return res; + //#error sign instruction not available in Neon #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -1800,7 +1802,7 @@ static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) { #endif /* LV_HAVE_AVX512 */ } -static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) { +static inline simd_b_t srslte_simd_b_sub(simd_b_t a, simd_b_t b) { #ifdef LV_HAVE_AVX512 return _mm512_subs_epi8(a, b); #else /* LV_HAVE_AVX512 */ @@ -1811,7 +1813,7 @@ static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) { return _mm_subs_epi8(a, b); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - return vsubqs_s8(a, b); + return vqsubq_s8(a, b); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -1835,7 +1837,9 @@ static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) { return _mm_sign_epi8(a, b); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - #error sign instruction not available in Neon + simd_s_t res; + return res; + //#error sign instruction not available in Neon #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ diff --git a/lib/src/phy/fec/turbodecoder.c b/lib/src/phy/fec/turbodecoder.c index a97180745..0ae094ecf 100644 --- a/lib/src/phy/fec/turbodecoder.c +++ b/lib/src/phy/fec/turbodecoder.c @@ -46,6 +46,7 @@ srslte_tdec_16bit_impl_t gen_impl = { }; /* SSE no-window implementation */ +#ifdef LV_HAVE_SSE #include "srslte/phy/fec/turbodecoder_sse.h" srslte_tdec_16bit_impl_t sse_impl = { tdec_sse_init, @@ -56,7 +57,7 @@ srslte_tdec_16bit_impl_t sse_impl = { }; /* SSE window implementation */ -#ifdef LV_HAVE_SSE + #define WINIMP_IS_SSE16 #include "srslte/phy/fec/turbodecoder_win.h" #undef WINIMP_IS_SSE16 @@ -162,6 +163,7 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec switch(dec_type) { case SRSLTE_TDEC_AUTO: break; +#ifdef LV_HAVE_SSE case SRSLTE_TDEC_SSE: h->dec16[0] = &sse_impl; h->current_llr_type = SRSLTE_TDEC_16; @@ -170,14 +172,15 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec h->dec16[0] = &sse16_win_impl; h->current_llr_type = SRSLTE_TDEC_16; break; - case SRSLTE_TDEC_GENERIC: - h->dec16[0] = &gen_impl; - h->current_llr_type = SRSLTE_TDEC_16; - break; case SRSLTE_TDEC_SSE8_WINDOW: h->dec8[0] = &sse8_win_impl; h->current_llr_type = SRSLTE_TDEC_8; break; +#endif + case SRSLTE_TDEC_GENERIC: + h->dec16[0] = &gen_impl; + h->current_llr_type = SRSLTE_TDEC_16; + break; #ifdef LV_HAVE_AVX2 case SRSLTE_TDEC_AVX_WINDOW: h->dec16[0] = &avx16_win_impl; @@ -237,6 +240,11 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec } if (dec_type == SRSLTE_TDEC_AUTO) { +#ifdef HAVE_NEON + h->dec16[0] = &gen_impl; + h->current_llr_type = SRSLTE_TDEC_16; + //h->dec8[0] = &gen_impl; +#else h->dec16[AUTO_16_SSE] = &sse_impl; h->dec16[AUTO_16_SSEWIN] = &sse16_win_impl; h->dec8[AUTO_8_SSEWIN] = &sse8_win_impl; @@ -244,7 +252,7 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec h->dec16[AUTO_16_AVXWIN] = &avx16_win_impl; h->dec8[AUTO_8_AVXWIN] = &avx8_win_impl; #endif - +#endif /* HAVE_NEON */ for (int td=0;tddec16[td]) { if ((h->nof_blocks16[td] = h->dec16[td]->tdec_init(&h->dec16_hdlr[td], h->max_long_cb))<0) { diff --git a/lib/src/phy/phch/sch.c b/lib/src/phy/phch/sch.c index f9ea054a0..49df30898 100644 --- a/lib/src/phy/phch/sch.c +++ b/lib/src/phy/phch/sch.c @@ -612,6 +612,7 @@ static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols, int32_t i = 0; #ifndef LV_HAVE_SSE + #ifndef HAVE_NEON __m128i _counter = _mm_slli_epi32(_mm_add_epi32(_mm_mullo_epi32(_counter0,_rows),_mm_set1_epi32(j)), 2); uint8_t *_g_bits = &g_bits[bit_read_idx/8]; @@ -650,6 +651,7 @@ static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols, } } bit_read_idx += i * 4; + #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ /* Spare bits */ diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 83f9ea408..1143219b6 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -54,7 +54,7 @@ void srslte_vec_xor_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const simd_b_t a = srslte_simd_b_loadu(&x[i]); simd_b_t b = srslte_simd_b_loadu(&y[i]); - simd_s_t r = srslte_simd_b_xor(a, b); + simd_b_t r = srslte_simd_b_xor(a, b); srslte_simd_b_storeu(&z[i], r); } @@ -167,19 +167,19 @@ void srslte_vec_sub_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const #if SRSLTE_SIMD_B_SIZE if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) { - simd_s_t a = srslte_simd_b_load(&x[i]); - simd_s_t b = srslte_simd_b_load(&y[i]); + simd_b_t a = srslte_simd_b_load(&x[i]); + simd_b_t b = srslte_simd_b_load(&y[i]); - simd_s_t r = srslte_simd_b_sub(a, b); + simd_b_t r = srslte_simd_b_sub(a, b); srslte_simd_b_store(&z[i], r); } } else { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { - simd_s_t a = srslte_simd_b_loadu(&x[i]); - simd_s_t b = srslte_simd_b_loadu(&y[i]); + simd_b_t a = srslte_simd_b_loadu(&x[i]); + simd_b_t b = srslte_simd_b_loadu(&y[i]); - simd_s_t r = srslte_simd_b_sub(a, b); + simd_b_t r = srslte_simd_b_sub(a, b); srslte_simd_b_storeu(&z[i], r); } @@ -222,6 +222,8 @@ void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, co void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len) { int i = 0; + +#ifndef HAVE_NEON #if SRSLTE_SIMD_S_SIZE if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { @@ -243,6 +245,7 @@ void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con } } #endif /* SRSLTE_SIMD_S_SIZE */ +#endif /* NOT HAVE_NEON*/ for(; i < len; i++){ z[i] = y[i]<0?-x[i]:x[i]; @@ -251,6 +254,8 @@ void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len) { int i = 0; + +#ifndef HAVE_NEON #if SRSLTE_SIMD_B_SIZE if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) { @@ -272,7 +277,7 @@ void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const } } #endif /* SRSLTE_SIMD_S_SIZE */ - +#endif /* NOT HAVE_NEON*/ for(; i < len; i++){ z[i] = y[i]<0?-x[i]:x[i]; }