Added missing AVX512 intrinsics and flags. Fixes #291.

master
Xavier Arteaga 6 years ago
parent 05abdc9516
commit 8c3a0153b9

@ -306,8 +306,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
endif (HAVE_FMA) endif (HAVE_FMA)
if (HAVE_AVX512) if (HAVE_AVX512)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -mavx512bw -mavx512dq -DLV_HAVE_AVX512")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -mavx512bw -mavx512dq -DLV_HAVE_AVX512")
endif(HAVE_AVX512) endif(HAVE_AVX512)
if(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") if(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")

@ -142,7 +142,7 @@ if (ENABLE_SSE)
# Check compiler for AVX intrinsics # Check compiler for AVX intrinsics
# #
if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
set(CMAKE_REQUIRED_FLAGS "-mavx512f") set(CMAKE_REQUIRED_FLAGS "-mavx512f -mavx512cd -mavx512bw -mavx512dq -DLV_HAVE_AVX512")
check_c_source_runs(" check_c_source_runs("
#include <immintrin.h> #include <immintrin.h>
int main() int main()

@ -1341,7 +1341,13 @@ static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) {
static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) { static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) {
#ifdef LV_HAVE_AVX512 #ifdef LV_HAVE_AVX512
#error sign instruction not available in avx512 __m256i a0 = _mm512_extracti64x4_epi64(a, 0);
__m256i a1 = _mm512_extracti64x4_epi64(a, 1);
__m256i b0 = _mm512_extracti64x4_epi64(b, 0);
__m256i b1 = _mm512_extracti64x4_epi64(b, 1);
__m256i r0 = _mm256_sign_epi16(a0, b0);
__m256i r1 = _mm256_sign_epi16(a1, b1);
return _mm512_inserti64x4(_mm512_broadcast_i64x4(r0), r1, 1);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
return _mm256_sign_epi16(a, b); return _mm256_sign_epi16(a, b);
@ -1814,7 +1820,13 @@ static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) {
static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) { static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) {
#ifdef LV_HAVE_AVX512 #ifdef LV_HAVE_AVX512
#error sign instruction not available in avx512 __m256i a0 = _mm512_extracti64x4_epi64(a, 0);
__m256i a1 = _mm512_extracti64x4_epi64(a, 1);
__m256i b0 = _mm512_extracti64x4_epi64(b, 0);
__m256i b1 = _mm512_extracti64x4_epi64(b, 1);
__m256i r0 = _mm256_sign_epi8(a0, b0);
__m256i r1 = _mm256_sign_epi8(a1, b1);
return _mm512_inserti64x4(_mm512_broadcast_i64x4(r0), r1, 1);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
return _mm256_sign_epi8(a, b); return _mm256_sign_epi8(a, b);

@ -226,6 +226,29 @@ TEST(srslte_vec_prod_sss,
free(z); free(z);
) )
TEST(srslte_vec_neg_sss,
MALLOC(int16_t, x);
MALLOC(int16_t, y);
MALLOC(int16_t, z);
int16_t gold = 0.0f;
for (int i = 0; i < block_size; i++) {
x[i] = RANDOM_S();
do { y[i] = RANDOM_S(); } while (!y[i]);
}
TEST_CALL(srslte_vec_neg_sss(x, y, z, block_size))
for (int i = 0; i < block_size; i++) {
gold = y[i] < 0 ? -x[i] : x[i];
mse += abs(gold - z[i]);
}
free(x);
free(y);
free(z);
)
TEST(srslte_vec_acc_cc, TEST(srslte_vec_acc_cc,
MALLOC(cf_t, x); MALLOC(cf_t, x);
cf_t z; cf_t z;
@ -868,6 +891,9 @@ int main(int argc, char **argv) {
passed[func_count][size_count] = test_srslte_vec_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); passed[func_count][size_count] = test_srslte_vec_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++; func_count++;
passed[func_count][size_count] = test_srslte_vec_neg_sss(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] = test_srslte_vec_acc_cc(func_names[func_count], &timmings[func_count][size_count], block_size); passed[func_count][size_count] = test_srslte_vec_acc_cc(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++; func_count++;

Loading…
Cancel
Save