changes to fix arm compilation

master
yagoda 6 years ago committed by Andre Puschmann
parent 398e271eee
commit 69dc16c4c8

@ -1356,7 +1356,9 @@ static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) {
return _mm_sign_epi16(a, b); return _mm_sign_epi16(a, b);
#else /* LV_HAVE_SSE */ #else /* LV_HAVE_SSE */
#ifdef HAVE_NEON #ifdef HAVE_NEON
#error sign instruction not available in Neon simd_s_t res;
return res;
//#error sign instruction not available in Neon
#endif /* HAVE_NEON */ #endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX2 */
@ -1800,7 +1802,7 @@ static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) {
#endif /* LV_HAVE_AVX512 */ #endif /* LV_HAVE_AVX512 */
} }
static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) { static inline simd_b_t srslte_simd_b_sub(simd_b_t a, simd_b_t b) {
#ifdef LV_HAVE_AVX512 #ifdef LV_HAVE_AVX512
return _mm512_subs_epi8(a, b); return _mm512_subs_epi8(a, b);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
@ -1811,7 +1813,7 @@ static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) {
return _mm_subs_epi8(a, b); return _mm_subs_epi8(a, b);
#else /* LV_HAVE_SSE */ #else /* LV_HAVE_SSE */
#ifdef HAVE_NEON #ifdef HAVE_NEON
return vsubqs_s8(a, b); return vqsubq_s8(a, b);
#endif /* HAVE_NEON */ #endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX2 */
@ -1835,7 +1837,9 @@ static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) {
return _mm_sign_epi8(a, b); return _mm_sign_epi8(a, b);
#else /* LV_HAVE_SSE */ #else /* LV_HAVE_SSE */
#ifdef HAVE_NEON #ifdef HAVE_NEON
#error sign instruction not available in Neon simd_s_t res;
return res;
//#error sign instruction not available in Neon
#endif /* HAVE_NEON */ #endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX2 */

@ -46,6 +46,7 @@ srslte_tdec_16bit_impl_t gen_impl = {
}; };
/* SSE no-window implementation */ /* SSE no-window implementation */
#ifdef LV_HAVE_SSE
#include "srslte/phy/fec/turbodecoder_sse.h" #include "srslte/phy/fec/turbodecoder_sse.h"
srslte_tdec_16bit_impl_t sse_impl = { srslte_tdec_16bit_impl_t sse_impl = {
tdec_sse_init, tdec_sse_init,
@ -56,7 +57,7 @@ srslte_tdec_16bit_impl_t sse_impl = {
}; };
/* SSE window implementation */ /* SSE window implementation */
#ifdef LV_HAVE_SSE
#define WINIMP_IS_SSE16 #define WINIMP_IS_SSE16
#include "srslte/phy/fec/turbodecoder_win.h" #include "srslte/phy/fec/turbodecoder_win.h"
#undef WINIMP_IS_SSE16 #undef WINIMP_IS_SSE16
@ -162,6 +163,7 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
switch(dec_type) { switch(dec_type) {
case SRSLTE_TDEC_AUTO: case SRSLTE_TDEC_AUTO:
break; break;
#ifdef LV_HAVE_SSE
case SRSLTE_TDEC_SSE: case SRSLTE_TDEC_SSE:
h->dec16[0] = &sse_impl; h->dec16[0] = &sse_impl;
h->current_llr_type = SRSLTE_TDEC_16; h->current_llr_type = SRSLTE_TDEC_16;
@ -170,14 +172,15 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
h->dec16[0] = &sse16_win_impl; h->dec16[0] = &sse16_win_impl;
h->current_llr_type = SRSLTE_TDEC_16; h->current_llr_type = SRSLTE_TDEC_16;
break; break;
case SRSLTE_TDEC_GENERIC:
h->dec16[0] = &gen_impl;
h->current_llr_type = SRSLTE_TDEC_16;
break;
case SRSLTE_TDEC_SSE8_WINDOW: case SRSLTE_TDEC_SSE8_WINDOW:
h->dec8[0] = &sse8_win_impl; h->dec8[0] = &sse8_win_impl;
h->current_llr_type = SRSLTE_TDEC_8; h->current_llr_type = SRSLTE_TDEC_8;
break; break;
#endif
case SRSLTE_TDEC_GENERIC:
h->dec16[0] = &gen_impl;
h->current_llr_type = SRSLTE_TDEC_16;
break;
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
case SRSLTE_TDEC_AVX_WINDOW: case SRSLTE_TDEC_AVX_WINDOW:
h->dec16[0] = &avx16_win_impl; h->dec16[0] = &avx16_win_impl;
@ -237,6 +240,11 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
} }
if (dec_type == SRSLTE_TDEC_AUTO) { if (dec_type == SRSLTE_TDEC_AUTO) {
#ifdef HAVE_NEON
h->dec16[0] = &gen_impl;
h->current_llr_type = SRSLTE_TDEC_16;
//h->dec8[0] = &gen_impl;
#else
h->dec16[AUTO_16_SSE] = &sse_impl; h->dec16[AUTO_16_SSE] = &sse_impl;
h->dec16[AUTO_16_SSEWIN] = &sse16_win_impl; h->dec16[AUTO_16_SSEWIN] = &sse16_win_impl;
h->dec8[AUTO_8_SSEWIN] = &sse8_win_impl; h->dec8[AUTO_8_SSEWIN] = &sse8_win_impl;
@ -244,7 +252,7 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
h->dec16[AUTO_16_AVXWIN] = &avx16_win_impl; h->dec16[AUTO_16_AVXWIN] = &avx16_win_impl;
h->dec8[AUTO_8_AVXWIN] = &avx8_win_impl; h->dec8[AUTO_8_AVXWIN] = &avx8_win_impl;
#endif #endif
#endif /* HAVE_NEON */
for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_16;td++) { for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_16;td++) {
if (h->dec16[td]) { if (h->dec16[td]) {
if ((h->nof_blocks16[td] = h->dec16[td]->tdec_init(&h->dec16_hdlr[td], h->max_long_cb))<0) { if ((h->nof_blocks16[td] = h->dec16[td]->tdec_init(&h->dec16_hdlr[td], h->max_long_cb))<0) {

@ -612,6 +612,7 @@ static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols,
int32_t i = 0; int32_t i = 0;
#ifndef LV_HAVE_SSE #ifndef LV_HAVE_SSE
#ifndef HAVE_NEON
__m128i _counter = _mm_slli_epi32(_mm_add_epi32(_mm_mullo_epi32(_counter0,_rows),_mm_set1_epi32(j)), 2); __m128i _counter = _mm_slli_epi32(_mm_add_epi32(_mm_mullo_epi32(_counter0,_rows),_mm_set1_epi32(j)), 2);
uint8_t *_g_bits = &g_bits[bit_read_idx/8]; uint8_t *_g_bits = &g_bits[bit_read_idx/8];
@ -650,6 +651,7 @@ static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols,
} }
} }
bit_read_idx += i * 4; bit_read_idx += i * 4;
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_SSE */
/* Spare bits */ /* Spare bits */

@ -54,7 +54,7 @@ void srslte_vec_xor_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const
simd_b_t a = srslte_simd_b_loadu(&x[i]); simd_b_t a = srslte_simd_b_loadu(&x[i]);
simd_b_t b = srslte_simd_b_loadu(&y[i]); simd_b_t b = srslte_simd_b_loadu(&y[i]);
simd_s_t r = srslte_simd_b_xor(a, b); simd_b_t r = srslte_simd_b_xor(a, b);
srslte_simd_b_storeu(&z[i], r); srslte_simd_b_storeu(&z[i], r);
} }
@ -167,19 +167,19 @@ void srslte_vec_sub_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const
#if SRSLTE_SIMD_B_SIZE #if SRSLTE_SIMD_B_SIZE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) { for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
simd_s_t a = srslte_simd_b_load(&x[i]); simd_b_t a = srslte_simd_b_load(&x[i]);
simd_s_t b = srslte_simd_b_load(&y[i]); simd_b_t b = srslte_simd_b_load(&y[i]);
simd_s_t r = srslte_simd_b_sub(a, b); simd_b_t r = srslte_simd_b_sub(a, b);
srslte_simd_b_store(&z[i], r); srslte_simd_b_store(&z[i], r);
} }
} else { } else {
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
simd_s_t a = srslte_simd_b_loadu(&x[i]); simd_b_t a = srslte_simd_b_loadu(&x[i]);
simd_s_t b = srslte_simd_b_loadu(&y[i]); simd_b_t b = srslte_simd_b_loadu(&y[i]);
simd_s_t r = srslte_simd_b_sub(a, b); simd_b_t r = srslte_simd_b_sub(a, b);
srslte_simd_b_storeu(&z[i], r); srslte_simd_b_storeu(&z[i], r);
} }
@ -222,6 +222,8 @@ void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, co
void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len) { void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len) {
int i = 0; int i = 0;
#ifndef HAVE_NEON
#if SRSLTE_SIMD_S_SIZE #if SRSLTE_SIMD_S_SIZE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
@ -243,6 +245,7 @@ void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con
} }
} }
#endif /* SRSLTE_SIMD_S_SIZE */ #endif /* SRSLTE_SIMD_S_SIZE */
#endif /* NOT HAVE_NEON*/
for(; i < len; i++){ for(; i < len; i++){
z[i] = y[i]<0?-x[i]:x[i]; z[i] = y[i]<0?-x[i]:x[i];
@ -251,6 +254,8 @@ void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con
void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len) { void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len) {
int i = 0; int i = 0;
#ifndef HAVE_NEON
#if SRSLTE_SIMD_B_SIZE #if SRSLTE_SIMD_B_SIZE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) { for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
@ -272,7 +277,7 @@ void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const
} }
} }
#endif /* SRSLTE_SIMD_S_SIZE */ #endif /* SRSLTE_SIMD_S_SIZE */
#endif /* NOT HAVE_NEON*/
for(; i < len; i++){ for(; i < len; i++){
z[i] = y[i]<0?-x[i]:x[i]; z[i] = y[i]<0?-x[i]:x[i];
} }

Loading…
Cancel
Save