|
|
|
@ -1506,4 +1506,119 @@ static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) {
|
|
|
|
|
|
|
|
|
|
#endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_C16_SIZE */
|
|
|
|
|
|
|
|
|
|
#if SRSLTE_SIMD_B_SIZE
|
|
|
|
|
/* Data types */
|
|
|
|
|
#ifdef LV_HAVE_AVX512
|
|
|
|
|
typedef __m512i simd_b_t;
|
|
|
|
|
#else /* LV_HAVE_AVX512 */
|
|
|
|
|
#ifdef LV_HAVE_AVX2
|
|
|
|
|
typedef __m256i simd_b_t;
|
|
|
|
|
#else /* LV_HAVE_AVX2 */
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
typedef __m128i simd_b_t;
|
|
|
|
|
#else /* HAVE_NEON */
|
|
|
|
|
#ifdef HAVE_NEON
|
|
|
|
|
typedef int8x16_t simd_b_t;
|
|
|
|
|
#endif /* HAVE_NEON */
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
#endif /* LV_HAVE_AVX2 */
|
|
|
|
|
#endif /* LV_HAVE_AVX512 */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline simd_b_t srslte_simd_b_load(int8_t *ptr){
|
|
|
|
|
#ifdef LV_HAVE_AVX512
|
|
|
|
|
return _mm512_load_si512(ptr);
|
|
|
|
|
#else /* LV_HAVE_AVX512 */
|
|
|
|
|
#ifdef LV_HAVE_AVX2
|
|
|
|
|
return _mm256_load_si256((__m256i*) ptr);
|
|
|
|
|
#else /* LV_HAVE_AVX2 */
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
return _mm_load_si128((__m128i*) ptr);
|
|
|
|
|
#else /* LV_HAVE_SSE */
|
|
|
|
|
#ifdef HAVE_NEON
|
|
|
|
|
return vld1q_s8(ptr);
|
|
|
|
|
#endif /* HAVE_NEON */
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
#endif /* LV_HAVE_AVX2 */
|
|
|
|
|
#endif /* LV_HAVE_AVX512 */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline simd_b_t srslte_simd_b_loadu(int8_t *ptr){
|
|
|
|
|
#ifdef LV_HAVE_AVX512
|
|
|
|
|
return _mm512_loadu_si512(ptr);
|
|
|
|
|
#else /* LV_HAVE_AVX512 */
|
|
|
|
|
#ifdef LV_HAVE_AVX2
|
|
|
|
|
return _mm256_loadu_si256((__m256i*) ptr);
|
|
|
|
|
#else /* LV_HAVE_AVX2 */
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
return _mm_loadu_si128((__m128i*) ptr);
|
|
|
|
|
#else /* LV_HAVE_SSE */
|
|
|
|
|
#ifdef HAVE_NEON
|
|
|
|
|
return vld1q_s8(ptr);
|
|
|
|
|
#endif /* HAVE_NEON */
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
#endif /* LV_HAVE_AVX2 */
|
|
|
|
|
#endif /* LV_HAVE_AVX512 */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void srslte_simd_b_store(int8_t *ptr, simd_b_t simdreg) {
|
|
|
|
|
#ifdef LV_HAVE_AVX512
|
|
|
|
|
_mm512_store_si512(ptr, simdreg);
|
|
|
|
|
#else /* LV_HAVE_AVX512 */
|
|
|
|
|
#ifdef LV_HAVE_AVX2
|
|
|
|
|
_mm256_store_si256((__m256i*) ptr, simdreg);
|
|
|
|
|
#else /* LV_HAVE_AVX2 */
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
_mm_store_si128((__m128i*) ptr, simdreg);
|
|
|
|
|
#else /* LV_HAVE_SSE */
|
|
|
|
|
#ifdef HAVE_NEON
|
|
|
|
|
vst1q_s8( ptr, simdreg);
|
|
|
|
|
#endif /* HAVE_NEON */
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
#endif /* LV_HAVE_AVX2 */
|
|
|
|
|
#endif /* LV_HAVE_AVX512 */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void srslte_simd_b_storeu(int8_t *ptr, simd_b_t simdreg) {
|
|
|
|
|
#ifdef LV_HAVE_AVX512
|
|
|
|
|
_mm512_storeu_si512(ptr, simdreg);
|
|
|
|
|
#else /* LV_HAVE_AVX512 */
|
|
|
|
|
#ifdef LV_HAVE_AVX2
|
|
|
|
|
_mm256_storeu_si256((__m256i*) ptr, simdreg);
|
|
|
|
|
#else /* LV_HAVE_AVX2 */
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
_mm_storeu_si128((__m128i*) ptr, simdreg);
|
|
|
|
|
#else /* LV_HAVE_SSE */
|
|
|
|
|
#ifdef HAVE_NEON
|
|
|
|
|
vst1q_s8(ptr, simdreg);
|
|
|
|
|
#endif /* HAVE_NEON */
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
#endif /* LV_HAVE_AVX2 */
|
|
|
|
|
#endif /* LV_HAVE_AVX512 */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) {
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_AVX512
|
|
|
|
|
return _mm512_xor_epi32(a, b);
|
|
|
|
|
#else /* LV_HAVE_AVX512 */
|
|
|
|
|
#ifdef LV_HAVE_AVX2
|
|
|
|
|
return _mm256_xor_si256(a, b);
|
|
|
|
|
#else /* LV_HAVE_AVX2 */
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
return _mm_xor_si128 (a, b);
|
|
|
|
|
#else /* LV_HAVE_SSE */
|
|
|
|
|
#ifdef HAVE_NEON
|
|
|
|
|
return veorq_s8(a, b);
|
|
|
|
|
#endif /* HAVE_NEON */
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
#endif /* LV_HAVE_AVX2 */
|
|
|
|
|
#endif /* LV_HAVE_AVX512 */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif /*SRSLTE_SIMD_B_SIZE */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif //SRSLTE_SIMD_H_H
|
|
|
|
|