diff --git a/CMakeModules/FindSSE.cmake b/CMakeModules/FindSSE.cmake index f6ad475..264a94e 100644 --- a/CMakeModules/FindSSE.cmake +++ b/CMakeModules/FindSSE.cmake @@ -1,104 +1,69 @@ -# Check if SSE instructions are available on the machine where -# the project is compiled. +# Check if SSE instructions are available by the compiler and target platform (be aware of cross compilation) +include(CheckCCompilerFlag) -IF(CMAKE_SYSTEM_NAME MATCHES "Linux") - EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) +check_c_compiler_flag(-msse2 HAVE_SSE2) +check_c_compiler_flag(-msse3 HAVE_SSE3) +check_c_compiler_flag(-mssse3 HAVE_SSSE3) +check_c_compiler_flag(-msse4.1 HAVE_SSE4_1) - STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE "${CPUINFO}") - STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE) - IF (SSE2_TRUE) - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") - ELSE (SSE2_TRUE) - set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") - ENDIF (SSE2_TRUE) +# Some compilers understand SSE flags, even when target platform doesn't support it (Clang with arm target) +# It is necessary try to compile actual code +if(HAVE_SSE2) + try_compile(SSE_OK "${PROJECT_BINARY_DIR}" + "${CMAKE_CURRENT_LIST_DIR}/TestSSE2.c" + COMPILE_DEFINITIONS "-msse2" ) + if(NOT SSE_OK) + message(STATUS "SSE2 test compilation fails") + set(HAVE_SSE2 FALSE) + endif() +endif() - # /proc/cpuinfo apparently omits sse3 :( - STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE "${CPUINFO}") - STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE) - IF (NOT SSE3_TRUE) - STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE "${CPUINFO}") - STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE) - ENDIF (NOT SSE3_TRUE) +if(HAVE_SSE3) + try_compile(SSE_OK "${PROJECT_BINARY_DIR}" + "${CMAKE_CURRENT_LIST_DIR}/TestSSE3.c" + COMPILE_DEFINITIONS "-msse3" ) + if(NOT SSE_OK) + message(STATUS "SSE3 test compilation fails") + set(HAVE_SSE3 FALSE) + endif() +endif() - STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE "${CPUINFO}") - STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE) - IF (SSE3_TRUE OR SSSE3_TRUE) - set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") - ELSE (SSE3_TRUE OR SSSE3_TRUE) - set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") - ENDIF (SSE3_TRUE OR SSSE3_TRUE) - IF (SSSE3_TRUE) - set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") - ELSE (SSSE3_TRUE) - set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") - ENDIF (SSSE3_TRUE) +if(HAVE_SSSE3) + try_compile(SSE_OK "${PROJECT_BINARY_DIR}" + "${CMAKE_CURRENT_LIST_DIR}/TestSSSE3.c" + COMPILE_DEFINITIONS "-mssse3" ) + if(NOT SSE_OK) + message(STATUS "SSE3 test compilation fails") + set(HAVE_SSSE3 FALSE) + endif() +endif() - STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE "${CPUINFO}") - STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE) - IF (SSE41_TRUE) - set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") - ELSE (SSE41_TRUE) - set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") - ENDIF (SSE41_TRUE) -ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") - EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE - CPUINFO) +if(HAVE_SSE4_1) + try_compile(SSE_OK "${PROJECT_BINARY_DIR}" + "${CMAKE_CURRENT_LIST_DIR}/TestSSE41.c" + COMPILE_DEFINITIONS "-msse4.1" ) + if(NOT SSE_OK) + message(STATUS "SSE4.1 test compilation fails") + set(HAVE_SSE4_1 FALSE) + endif() +endif() - STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE "${CPUINFO}") - STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE) - IF (SSE2_TRUE) - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") - ELSE (SSE2_TRUE) - set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") - ENDIF (SSE2_TRUE) - - STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE "${CPUINFO}") - STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE) - IF (SSE3_TRUE) - set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") - ELSE (SSE3_TRUE) - set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") - ENDIF (SSE3_TRUE) - - STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE "${CPUINFO}") - STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE) - IF (SSSE3_TRUE) - set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") - ELSE (SSSE3_TRUE) - set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") - ENDIF (SSSE3_TRUE) - - STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE "${CPUINFO}") - STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE) - IF (SSE41_TRUE) - set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") - ELSE (SSE41_TRUE) - set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") - ENDIF (SSE41_TRUE) -ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") - # TODO - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") - set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") - set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") - set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") -ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") - set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") - set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") - set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") -ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") +set(SSE2_FOUND ${HAVE_SSE2} CACHE BOOL "SSE2 available on target") +set(SSE3_FOUND ${HAVE_SSE3} CACHE BOOL "SSE3 available on target") +set(SSSE3_FOUND ${HAVE_SSSE3} CACHE BOOL "SSSE3 available on target") +set(SSE4_1_FOUND ${HAVE_SSE4_1} CACHE BOOL "SSE4.1 available on target") if(NOT SSE2_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.") + MESSAGE(STATUS "SSE2 is not supported on target platform.") endif(NOT SSE2_FOUND) if(NOT SSE3_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.") + MESSAGE(STATUS "SSE3 is not supported on target platform.") endif(NOT SSE3_FOUND) if(NOT SSSE3_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.") + MESSAGE(STATUS "SSSE3 is not supported on target platform.") endif(NOT SSSE3_FOUND) if(NOT SSE4_1_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.") + MESSAGE(STATUS "SSE4.1 is not supported on target platform.") endif(NOT SSE4_1_FOUND) mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND) diff --git a/CMakeModules/TestSSE2.c b/CMakeModules/TestSSE2.c new file mode 100644 index 0000000..2456b65 --- /dev/null +++ b/CMakeModules/TestSSE2.c @@ -0,0 +1,20 @@ +#include + +//include sse and sse2 headers +#include +#include + +/* __m128 is ugly to write */ +typedef __m128d v2df; // vector of 2 double (sse2) + +int main(int argc, char **argv) +{ + v2df calcx = _mm_setr_pd(2.0, 3.0); + v2df xx = _mm_mul_pd(calcx, calcx); + double d; + _mm_storel_pd(&d, xx); + printf("%f\n", d); + _mm_storeh_pd(&d, xx); + printf("%f\n", d); + return 0; +} diff --git a/CMakeModules/TestSSE3.c b/CMakeModules/TestSSE3.c new file mode 100644 index 0000000..2456b65 --- /dev/null +++ b/CMakeModules/TestSSE3.c @@ -0,0 +1,20 @@ +#include + +//include sse and sse2 headers +#include +#include + +/* __m128 is ugly to write */ +typedef __m128d v2df; // vector of 2 double (sse2) + +int main(int argc, char **argv) +{ + v2df calcx = _mm_setr_pd(2.0, 3.0); + v2df xx = _mm_mul_pd(calcx, calcx); + double d; + _mm_storel_pd(&d, xx); + printf("%f\n", d); + _mm_storeh_pd(&d, xx); + printf("%f\n", d); + return 0; +} diff --git a/CMakeModules/TestSSE41.c b/CMakeModules/TestSSE41.c new file mode 100644 index 0000000..74b2f10 --- /dev/null +++ b/CMakeModules/TestSSE41.c @@ -0,0 +1,15 @@ +#include + +//include sse, sse2 and sse4.1 headers +#include +#include +#include + +int main(int argc, char **argv) +{ + __m128i a = _mm_set_epi32(1, 2, 3, 4); + __m128i b = _mm_set_epi32(1, 2, 3, 4); + int i = _mm_testz_si128(a, b); + printf("%d\n", i); + return 0; +} diff --git a/CMakeModules/TestSSSE3.c b/CMakeModules/TestSSSE3.c new file mode 100644 index 0000000..0bda974 --- /dev/null +++ b/CMakeModules/TestSSSE3.c @@ -0,0 +1,16 @@ +#include + +//include sse, sse2 and ssse3 headers +#include +#include +#include + +int main(int argc, char **argv) +{ + __m128i calcx = _mm_set_epi32(1, 2, 3, 4); + __m128i xx = _mm_hadd_epi32(calcx, calcx); + int i; + _mm_storeu_si32(&i, xx); + printf("%d\n", i); + return 0; +}