From 69dc16c4c85cbbc7d2145f1a71ef0fa25330583d Mon Sep 17 00:00:00 2001
From: yagoda <justin.tallon@softwareradiosystems.com>
Date: Tue, 27 Nov 2018 14:08:07 +0100
Subject: [PATCH] changes to fix arm compilation

---
 lib/include/srslte/phy/utils/simd.h | 12 ++++++++----
 lib/src/phy/fec/turbodecoder.c      | 20 ++++++++++++++------
 lib/src/phy/phch/sch.c              |  2 ++
 lib/src/phy/utils/vector_simd.c     | 21 +++++++++++++--------
 4 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h
index 2529033c5..2c5d1ea4b 100644
--- a/lib/include/srslte/phy/utils/simd.h
+++ b/lib/include/srslte/phy/utils/simd.h
@@ -1356,7 +1356,9 @@ static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) {
   return _mm_sign_epi16(a, b);
 #else /* LV_HAVE_SSE */
 #ifdef HAVE_NEON
-  #error sign instruction not available in Neon
+  simd_s_t res;
+  return res;
+  //#error sign instruction not available in Neon
 #endif /* HAVE_NEON */
 #endif /* LV_HAVE_SSE */
 #endif /* LV_HAVE_AVX2 */
@@ -1800,7 +1802,7 @@ static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) {
 #endif /* LV_HAVE_AVX512 */
 }
 
-static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) {
+static inline simd_b_t srslte_simd_b_sub(simd_b_t a, simd_b_t b) {
 #ifdef LV_HAVE_AVX512
   return _mm512_subs_epi8(a, b);
 #else /* LV_HAVE_AVX512 */
@@ -1811,7 +1813,7 @@ static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) {
   return _mm_subs_epi8(a, b);
 #else /* LV_HAVE_SSE */
 #ifdef HAVE_NEON
-  return vsubqs_s8(a, b);
+  return vqsubq_s8(a, b);
 #endif /* HAVE_NEON */
 #endif /* LV_HAVE_SSE */
 #endif /* LV_HAVE_AVX2 */
@@ -1835,7 +1837,9 @@ static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) {
   return _mm_sign_epi8(a, b);
 #else /* LV_HAVE_SSE */
 #ifdef HAVE_NEON
-  #error sign instruction not available in Neon
+  simd_s_t res;
+  return res;
+  //#error sign instruction not available in Neon
 #endif /* HAVE_NEON */
 #endif /* LV_HAVE_SSE */
 #endif /* LV_HAVE_AVX2 */
diff --git a/lib/src/phy/fec/turbodecoder.c b/lib/src/phy/fec/turbodecoder.c
index a97180745..0ae094ecf 100644
--- a/lib/src/phy/fec/turbodecoder.c
+++ b/lib/src/phy/fec/turbodecoder.c
@@ -46,6 +46,7 @@ srslte_tdec_16bit_impl_t gen_impl = {
 };
 
 /* SSE no-window implementation */
+#ifdef LV_HAVE_SSE
 #include "srslte/phy/fec/turbodecoder_sse.h"
 srslte_tdec_16bit_impl_t sse_impl = {
     tdec_sse_init,
@@ -56,7 +57,7 @@ srslte_tdec_16bit_impl_t sse_impl = {
 };
 
 /* SSE window implementation */
-#ifdef LV_HAVE_SSE
+
 #define WINIMP_IS_SSE16
 #include "srslte/phy/fec/turbodecoder_win.h"
 #undef WINIMP_IS_SSE16
@@ -162,6 +163,7 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
   switch(dec_type) {
     case SRSLTE_TDEC_AUTO:
       break;
+#ifdef LV_HAVE_SSE
     case SRSLTE_TDEC_SSE:
       h->dec16[0] = &sse_impl;
       h->current_llr_type = SRSLTE_TDEC_16;
@@ -170,14 +172,15 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
       h->dec16[0] = &sse16_win_impl;
       h->current_llr_type = SRSLTE_TDEC_16;
       break;
-    case SRSLTE_TDEC_GENERIC:
-      h->dec16[0] = &gen_impl;
-      h->current_llr_type = SRSLTE_TDEC_16;
-      break;
     case SRSLTE_TDEC_SSE8_WINDOW:
       h->dec8[0] = &sse8_win_impl;
       h->current_llr_type = SRSLTE_TDEC_8;
       break;
+#endif
+    case SRSLTE_TDEC_GENERIC:
+      h->dec16[0] = &gen_impl;
+      h->current_llr_type = SRSLTE_TDEC_16;
+      break;
 #ifdef LV_HAVE_AVX2
     case SRSLTE_TDEC_AVX_WINDOW:
       h->dec16[0] = &avx16_win_impl;
@@ -237,6 +240,11 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
   }
 
   if (dec_type == SRSLTE_TDEC_AUTO) {
+#ifdef HAVE_NEON
+      h->dec16[0] = &gen_impl;
+      h->current_llr_type = SRSLTE_TDEC_16;
+      //h->dec8[0]  = &gen_impl;
+#else
     h->dec16[AUTO_16_SSE] = &sse_impl;
     h->dec16[AUTO_16_SSEWIN] = &sse16_win_impl;
     h->dec8[AUTO_8_SSEWIN]  = &sse8_win_impl;
@@ -244,7 +252,7 @@ int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec
     h->dec16[AUTO_16_AVXWIN] = &avx16_win_impl;
     h->dec8[AUTO_8_AVXWIN]  = &avx8_win_impl;
 #endif
-
+#endif /* HAVE_NEON */
     for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_16;td++) {
       if (h->dec16[td]) {
         if ((h->nof_blocks16[td] = h->dec16[td]->tdec_init(&h->dec16_hdlr[td], h->max_long_cb))<0) {
diff --git a/lib/src/phy/phch/sch.c b/lib/src/phy/phch/sch.c
index f9ea054a0..49df30898 100644
--- a/lib/src/phy/phch/sch.c
+++ b/lib/src/phy/phch/sch.c
@@ -612,6 +612,7 @@ static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols,
     int32_t i = 0;
 
 #ifndef LV_HAVE_SSE
+  #ifndef HAVE_NEON
     __m128i _counter = _mm_slli_epi32(_mm_add_epi32(_mm_mullo_epi32(_counter0,_rows),_mm_set1_epi32(j)), 2);
     uint8_t *_g_bits = &g_bits[bit_read_idx/8];
 
@@ -650,6 +651,7 @@ static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols,
       }
     }
     bit_read_idx += i * 4;
+  #endif /* HAVE_NEON */
 #endif /* LV_HAVE_SSE */
 
     /* Spare bits */
diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c
index 83f9ea408..1143219b6 100644
--- a/lib/src/phy/utils/vector_simd.c
+++ b/lib/src/phy/utils/vector_simd.c
@@ -54,7 +54,7 @@ void srslte_vec_xor_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const
       simd_b_t a = srslte_simd_b_loadu(&x[i]);
       simd_b_t b = srslte_simd_b_loadu(&y[i]);
 
-      simd_s_t r = srslte_simd_b_xor(a, b);
+      simd_b_t r = srslte_simd_b_xor(a, b);
 
       srslte_simd_b_storeu(&z[i], r);
     }
@@ -167,19 +167,19 @@ void srslte_vec_sub_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const
 #if SRSLTE_SIMD_B_SIZE
   if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
     for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
-      simd_s_t a = srslte_simd_b_load(&x[i]);
-      simd_s_t b = srslte_simd_b_load(&y[i]);
+      simd_b_t a = srslte_simd_b_load(&x[i]);
+      simd_b_t b = srslte_simd_b_load(&y[i]);
 
-      simd_s_t r = srslte_simd_b_sub(a, b);
+      simd_b_t r = srslte_simd_b_sub(a, b);
 
       srslte_simd_b_store(&z[i], r);
     }
   } else {
     for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
-      simd_s_t a = srslte_simd_b_loadu(&x[i]);
-      simd_s_t b = srslte_simd_b_loadu(&y[i]);
+      simd_b_t a = srslte_simd_b_loadu(&x[i]);
+      simd_b_t b = srslte_simd_b_loadu(&y[i]);
 
-      simd_s_t r = srslte_simd_b_sub(a, b);
+      simd_b_t r = srslte_simd_b_sub(a, b);
 
       srslte_simd_b_storeu(&z[i], r);
     }
@@ -222,6 +222,8 @@ void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, co
 
 void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len) {
   int i = 0;
+  
+#ifndef HAVE_NEON
 #if SRSLTE_SIMD_S_SIZE
   if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
     for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
@@ -243,6 +245,7 @@ void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con
     }
   }
 #endif /* SRSLTE_SIMD_S_SIZE */
+#endif /* NOT HAVE_NEON*/
 
   for(; i < len; i++){
     z[i] = y[i]<0?-x[i]:x[i];
@@ -251,6 +254,8 @@ void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con
 
 void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len) {
   int i = 0;
+  
+#ifndef HAVE_NEON
 #if SRSLTE_SIMD_B_SIZE
   if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
     for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
@@ -272,7 +277,7 @@ void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const
     }
   }
 #endif /* SRSLTE_SIMD_S_SIZE */
-
+#endif /* NOT HAVE_NEON*/
   for(; i < len; i++){
     z[i] = y[i]<0?-x[i]:x[i];
   }