|
|
@ -1,6 +1,3 @@
|
|
|
|
if (NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|^i[3,9]86$")
|
|
|
|
|
|
|
|
return()
|
|
|
|
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
include(CheckCSourceRuns)
|
|
|
|
include(CheckCSourceRuns)
|
|
|
|
|
|
|
|
|
|
|
@ -40,11 +37,21 @@ if (ENABLE_SSE)
|
|
|
|
set(CMAKE_REQUIRED_FLAGS "-mavx")
|
|
|
|
set(CMAKE_REQUIRED_FLAGS "-mavx")
|
|
|
|
check_c_source_runs("
|
|
|
|
check_c_source_runs("
|
|
|
|
#include <immintrin.h>
|
|
|
|
#include <immintrin.h>
|
|
|
|
|
|
|
|
|
|
|
|
int main()
|
|
|
|
int main()
|
|
|
|
{
|
|
|
|
{
|
|
|
|
__m256i a = _mm256_setzero_si256();
|
|
|
|
__m256 a, b, c;
|
|
|
|
return 0;
|
|
|
|
const float src[8] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
|
|
|
|
|
|
|
|
float dst[8];
|
|
|
|
|
|
|
|
a = _mm256_loadu_ps( src );
|
|
|
|
|
|
|
|
b = _mm256_loadu_ps( src );
|
|
|
|
|
|
|
|
c = _mm256_add_ps( a, b );
|
|
|
|
|
|
|
|
_mm256_storeu_ps( dst, c );
|
|
|
|
|
|
|
|
for( int i = 0; i < 8; i++ ){
|
|
|
|
|
|
|
|
if( ( src[i] + src[i] ) != dst[i] ){
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}"
|
|
|
|
}"
|
|
|
|
HAVE_AVX)
|
|
|
|
HAVE_AVX)
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
@ -54,29 +61,38 @@ if (ENABLE_SSE)
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
|
|
if (ENABLE_AVX2)
|
|
|
|
if (ENABLE_AVX2)
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# Check compiler for AVX intrinsics
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG )
|
|
|
|
|
|
|
|
set(CMAKE_REQUIRED_FLAGS "-mavx2")
|
|
|
|
|
|
|
|
check_c_source_runs("
|
|
|
|
|
|
|
|
#include <immintrin.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int main()
|
|
|
|
#
|
|
|
|
{
|
|
|
|
# Check compiler for AVX intrinsics
|
|
|
|
__m256i a = _mm256_setzero_si256();
|
|
|
|
#
|
|
|
|
__m256i b = _mm256_abs_epi16(a);
|
|
|
|
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG )
|
|
|
|
|
|
|
|
set(CMAKE_REQUIRED_FLAGS "-mavx2")
|
|
|
|
|
|
|
|
check_c_source_runs("
|
|
|
|
|
|
|
|
#include <immintrin.h>
|
|
|
|
|
|
|
|
int main()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
__m256i a, b, c;
|
|
|
|
|
|
|
|
const int src[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
|
|
|
|
|
|
|
|
int dst[8];
|
|
|
|
|
|
|
|
a = _mm256_loadu_si256( (__m256i*)src );
|
|
|
|
|
|
|
|
b = _mm256_loadu_si256( (__m256i*)src );
|
|
|
|
|
|
|
|
c = _mm256_add_epi32( a, b );
|
|
|
|
|
|
|
|
_mm256_storeu_si256( (__m256i*)dst, c );
|
|
|
|
|
|
|
|
for( int i = 0; i < 8; i++ ){
|
|
|
|
|
|
|
|
if( ( src[i] + src[i] ) != dst[i] ){
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
}"
|
|
|
|
}"
|
|
|
|
HAVE_AVX2)
|
|
|
|
HAVE_AVX2)
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
|
|
if (HAVE_AVX2)
|
|
|
|
if (HAVE_AVX2)
|
|
|
|
message(STATUS "AVX2 is enabled - target CPU must support it")
|
|
|
|
message(STATUS "AVX2 is enabled - target CPU must support it")
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
|
|