changes to fix arm compilation

6 years ago · 69dc16c4c8
parent 398e271eee
commit 69dc16c4c8
4 changed files with 37 additions and 18 deletions
--- a/lib/include/srslte/phy/utils/simd.h
+++ b/lib/include/srslte/phy/utils/simd.h
@ -1356,7 +1356,9 @@ static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) {
  return _mm_sign_epi16(a, b);
 #else /* LV_HAVE_SSE */
 #ifdef HAVE_NEON
-  #error sign instruction not available in Neon
+  simd_s_t res;
  return res;
  //#error sign instruction not available in Neon
 #endif /* HAVE_NEON */
 #endif /* LV_HAVE_SSE */
 #endif /* LV_HAVE_AVX2 */
@ -1800,7 +1802,7 @@ static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) {
 #endif /* LV_HAVE_AVX512 */
 }
-static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) {
+static inline simd_b_t srslte_simd_b_sub(simd_b_t a, simd_b_t b) {
 #ifdef LV_HAVE_AVX512
  return _mm512_subs_epi8(a, b);
 #else /* LV_HAVE_AVX512 */
@ -1811,7 +1813,7 @@ static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) {
  return _mm_subs_epi8(a, b);
 #else /* LV_HAVE_SSE */
 #ifdef HAVE_NEON
-  return vsubqs_s8(a, b);
+  return vqsubq_s8(a, b);
 #endif /* HAVE_NEON */
 #endif /* LV_HAVE_SSE */
 #endif /* LV_HAVE_AVX2 */
@ -1835,7 +1837,9 @@ static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) {
  return _mm_sign_epi8(a, b);
 #else /* LV_HAVE_SSE */
 #ifdef HAVE_NEON
-  #error sign instruction not available in Neon
+  simd_s_t res;
  return res;
  //#error sign instruction not available in Neon
 #endif /* HAVE_NEON */
 #endif /* LV_HAVE_SSE */
 #endif /* LV_HAVE_AVX2 */
--- a/lib/src/phy/fec/turbodecoder.c
+++ b/lib/src/phy/fec/turbodecoder.c
@ -46,6 +46,7 @@ srslte_tdec_16bit_impl_t gen_impl = {
 };
 /* SSE no-window implementation */
 #ifdef LV_HAVE_SSE
 #include "srslte/phy/fec/turbodecoder_sse.h"
 srslte_tdec_16bit_impl_t sse_impl = {
    tdec_sse_init,
@ -56,7 +57,7 @@ srslte_tdec_16bit_impl_t sse_impl = {
 };
 /* SSE window implementation */
-#ifdef LV_HAVE_SSE
+
 #define WINIMP_IS_SSE16
 #include "srslte/phy/fec/turbodecoder_win.h"
 #undef WINIMP_IS_SSE16
@ -162,6 +163,7 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
  switch(dec_type) {
    case SRSLTE_TDEC_AUTO:
      break;
 #ifdef LV_HAVE_SSE
    case SRSLTE_TDEC_SSE:
      h->dec16[0] = &sse_impl;
      h->current_llr_type = SRSLTE_TDEC_16;
@ -170,14 +172,15 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
      h->dec16[0] = &sse16_win_impl;
      h->current_llr_type = SRSLTE_TDEC_16;
      break;
    case SRSLTE_TDEC_GENERIC:
      h->dec16[0] = &gen_impl;
      h->current_llr_type = SRSLTE_TDEC_16;
      break;
    case SRSLTE_TDEC_SSE8_WINDOW:
      h->dec8[0] = &sse8_win_impl;
      h->current_llr_type = SRSLTE_TDEC_8;
      break;
 #endif
    case SRSLTE_TDEC_GENERIC:
      h->dec16[0] = &gen_impl;
      h->current_llr_type = SRSLTE_TDEC_16;
      break;
 #ifdef LV_HAVE_AVX2
    case SRSLTE_TDEC_AVX_WINDOW:
      h->dec16[0] = &avx16_win_impl;
@ -237,6 +240,11 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
  }
  if (dec_type == SRSLTE_TDEC_AUTO) {
 #ifdef HAVE_NEON
      h->dec16[0] = &gen_impl;
      h->current_llr_type = SRSLTE_TDEC_16;
      //h->dec8[0]  = &gen_impl;
 #else
    h->dec16[AUTO_16_SSE] = &sse_impl;
    h->dec16[AUTO_16_SSEWIN] = &sse16_win_impl;
    h->dec8[AUTO_8_SSEWIN]  = &sse8_win_impl;
@ -244,7 +252,7 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
    h->dec16[AUTO_16_AVXWIN] = &avx16_win_impl;
    h->dec8[AUTO_8_AVXWIN]  = &avx8_win_impl;
 #endif
-
+#endif /* HAVE_NEON */
    for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_16;td++) {
      if (h->dec16[td]) {
        if ((h->nof_blocks16[td] = h->dec16[td]->tdec_init(&h->dec16_hdlr[td], h->max_long_cb))<0) {
--- a/lib/src/phy/phch/sch.c
+++ b/lib/src/phy/phch/sch.c
@ -612,6 +612,7 @@ static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols,
    int32_t i = 0;
 #ifndef LV_HAVE_SSE
  #ifndef HAVE_NEON
    __m128i _counter = _mm_slli_epi32(_mm_add_epi32(_mm_mullo_epi32(_counter0,_rows),_mm_set1_epi32(j)), 2);
    uint8_t *_g_bits = &g_bits[bit_read_idx/8];
@ -650,6 +651,7 @@ static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols,
      }
    }
    bit_read_idx += i * 4;
  #endif /* HAVE_NEON */
 #endif /* LV_HAVE_SSE */
    /* Spare bits */
--- a/lib/src/phy/utils/vector_simd.c
+++ b/lib/src/phy/utils/vector_simd.c
@ -54,7 +54,7 @@ void srslte_vec_xor_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const
      simd_b_t a = srslte_simd_b_loadu(&x[i]);
      simd_b_t b = srslte_simd_b_loadu(&y[i]);
-      simd_s_t r = srslte_simd_b_xor(a, b);
+      simd_b_t r = srslte_simd_b_xor(a, b);
      srslte_simd_b_storeu(&z[i], r);
    }
@ -167,19 +167,19 @@ void srslte_vec_sub_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const
 #if SRSLTE_SIMD_B_SIZE
  if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
    for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
-      simd_s_t a = srslte_simd_b_load(&x[i]);
+      simd_b_t a = srslte_simd_b_load(&x[i]);
-      simd_s_t b = srslte_simd_b_load(&y[i]);
+      simd_b_t b = srslte_simd_b_load(&y[i]);
-      simd_s_t r = srslte_simd_b_sub(a, b);
+      simd_b_t r = srslte_simd_b_sub(a, b);
      srslte_simd_b_store(&z[i], r);
    }
  } else {
    for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
-      simd_s_t a = srslte_simd_b_loadu(&x[i]);
+      simd_b_t a = srslte_simd_b_loadu(&x[i]);
-      simd_s_t b = srslte_simd_b_loadu(&y[i]);
+      simd_b_t b = srslte_simd_b_loadu(&y[i]);
-      simd_s_t r = srslte_simd_b_sub(a, b);
+      simd_b_t r = srslte_simd_b_sub(a, b);
      srslte_simd_b_storeu(&z[i], r);
    }
@ -222,6 +222,8 @@ void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, co
 void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len) {
  int i = 0;
 #ifndef HAVE_NEON
 #if SRSLTE_SIMD_S_SIZE
  if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
    for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
@ -243,6 +245,7 @@ void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con
    }
  }
 #endif /* SRSLTE_SIMD_S_SIZE */
 #endif /* NOT HAVE_NEON*/
  for(; i < len; i++){
    z[i] = y[i]<0?-x[i]:x[i];
@ -251,6 +254,8 @@ void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con
 void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len) {
  int i = 0;
 #ifndef HAVE_NEON
 #if SRSLTE_SIMD_B_SIZE
  if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
    for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
@ -272,7 +277,7 @@ void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const
    }
  }
 #endif /* SRSLTE_SIMD_S_SIZE */
-
+#endif /* NOT HAVE_NEON*/
  for(; i < len; i++){
    z[i] = y[i]<0?-x[i]:x[i];
  }