diff --git a/cmake/modules/FindSSE.cmake b/cmake/modules/FindSSE.cmake index 30be8a206..3440f01c1 100644 --- a/cmake/modules/FindSSE.cmake +++ b/cmake/modules/FindSSE.cmake @@ -7,6 +7,7 @@ include(CheckCSourceRuns) option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) option(ENABLE_AVX "Enable compile-time AVX support." ON) option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) +option(ENABLE_FMA "Enable compile-time FMA support." ON) if (ENABLE_SSE) # @@ -97,8 +98,43 @@ if (ENABLE_SSE) if (HAVE_AVX2) message(STATUS "AVX2 is enabled - target CPU must support it") endif() - endif() + endif() + + if (ENABLE_FMA) + + # + # Check compiler for AVX intrinsics + # + if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG ) + set(CMAKE_REQUIRED_FLAGS "-mfma") + check_c_source_runs(" + #include + int main() + { + __m256 a, b, c, r; + const float src[8] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f }; + float dst[8]; + a = _mm256_loadu_ps( src ); + b = _mm256_loadu_ps( src ); + c = _mm256_loadu_ps( src ); + r = _mm256_fmadd_ps( a, b, c ); + _mm256_storeu_ps( dst, r ); + int i = 0; + for( i = 0; i < 8; i++ ){ + if( ( src[i] * src[i] + src[i] ) != dst[i] ){ + return -1; + } + } + return 0; + }" + HAVE_FMA) + endif() + + if (HAVE_FMA) + message(STATUS "FMA is enabled - target CPU must support it") + endif() + endif() endif() -mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2) +mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2, HAVE_FMA)