Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

check availability of SSE* instructions on target platform #114

Merged
merged 2 commits into from
May 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 52 additions & 87 deletions CMakeModules/FindSSE.cmake
Original file line number Diff line number Diff line change
@@ -1,104 +1,69 @@
# Check if SSE instructions are available on the machine where
# the project is compiled.
# Check if SSE instructions are available by the compiler and target platform (be aware of cross compilation)
include(CheckCCompilerFlag)

IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
check_c_compiler_flag(-msse2 HAVE_SSE2)
check_c_compiler_flag(-msse3 HAVE_SSE3)
check_c_compiler_flag(-mssse3 HAVE_SSSE3)
check_c_compiler_flag(-msse4.1 HAVE_SSE4_1)

STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
IF (SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
ELSE (SSE2_TRUE)
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
ENDIF (SSE2_TRUE)
# Some compilers understand SSE flags, even when target platform doesn't support it (Clang with arm target)
# It is necessary try to compile actual code
if(HAVE_SSE2)
try_compile(SSE_OK "${PROJECT_BINARY_DIR}"
"${CMAKE_CURRENT_LIST_DIR}/TestSSE2.c"
COMPILE_DEFINITIONS "-msse2" )
if(NOT SSE_OK)
message(STATUS "SSE2 test compilation fails")
set(HAVE_SSE2 FALSE)
endif()
endif()

# /proc/cpuinfo apparently omits sse3 :(
STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE)
IF (NOT SSE3_TRUE)
STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE)
ENDIF (NOT SSE3_TRUE)
if(HAVE_SSE3)
try_compile(SSE_OK "${PROJECT_BINARY_DIR}"
"${CMAKE_CURRENT_LIST_DIR}/TestSSE3.c"
COMPILE_DEFINITIONS "-msse3" )
if(NOT SSE_OK)
message(STATUS "SSE3 test compilation fails")
set(HAVE_SSE3 FALSE)
endif()
endif()

STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE)
IF (SSE3_TRUE OR SSSE3_TRUE)
set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
ELSE (SSE3_TRUE OR SSSE3_TRUE)
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
ENDIF (SSE3_TRUE OR SSSE3_TRUE)
IF (SSSE3_TRUE)
set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
ELSE (SSSE3_TRUE)
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
ENDIF (SSSE3_TRUE)
if(HAVE_SSSE3)
try_compile(SSE_OK "${PROJECT_BINARY_DIR}"
"${CMAKE_CURRENT_LIST_DIR}/TestSSSE3.c"
COMPILE_DEFINITIONS "-mssse3" )
if(NOT SSE_OK)
message(STATUS "SSE3 test compilation fails")
set(HAVE_SSSE3 FALSE)
endif()
endif()

STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
CPUINFO)
if(HAVE_SSE4_1)
try_compile(SSE_OK "${PROJECT_BINARY_DIR}"
"${CMAKE_CURRENT_LIST_DIR}/TestSSE41.c"
COMPILE_DEFINITIONS "-msse4.1" )
if(NOT SSE_OK)
message(STATUS "SSE4.1 test compilation fails")
set(HAVE_SSE4_1 FALSE)
endif()
endif()

STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
IF (SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
ELSE (SSE2_TRUE)
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
ENDIF (SSE2_TRUE)

STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE)
IF (SSE3_TRUE)
set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
ELSE (SSE3_TRUE)
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
ENDIF (SSE3_TRUE)

STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE)
IF (SSSE3_TRUE)
set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
ELSE (SSSE3_TRUE)
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
ENDIF (SSSE3_TRUE)

STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
# TODO
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE2_FOUND ${HAVE_SSE2} CACHE BOOL "SSE2 available on target")
set(SSE3_FOUND ${HAVE_SSE3} CACHE BOOL "SSE3 available on target")
set(SSSE3_FOUND ${HAVE_SSSE3} CACHE BOOL "SSSE3 available on target")
set(SSE4_1_FOUND ${HAVE_SSE4_1} CACHE BOOL "SSE4.1 available on target")

if(NOT SSE2_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.")
MESSAGE(STATUS "SSE2 is not supported on target platform.")
endif(NOT SSE2_FOUND)
if(NOT SSE3_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.")
MESSAGE(STATUS "SSE3 is not supported on target platform.")
endif(NOT SSE3_FOUND)
if(NOT SSSE3_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.")
MESSAGE(STATUS "SSSE3 is not supported on target platform.")
endif(NOT SSSE3_FOUND)
if(NOT SSE4_1_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.")
MESSAGE(STATUS "SSE4.1 is not supported on target platform.")
endif(NOT SSE4_1_FOUND)

mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND)
20 changes: 20 additions & 0 deletions CMakeModules/TestSSE2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include <stdio.h>

//include sse and sse2 headers
#include <xmmintrin.h>
#include <emmintrin.h>

/* __m128 is ugly to write */
typedef __m128d v2df; // vector of 2 double (sse2)

int main(int argc, char **argv)
{
v2df calcx = _mm_setr_pd(2.0, 3.0);
v2df xx = _mm_mul_pd(calcx, calcx);
double d;
_mm_storel_pd(&d, xx);
printf("%f\n", d);
_mm_storeh_pd(&d, xx);
printf("%f\n", d);
return 0;
}
20 changes: 20 additions & 0 deletions CMakeModules/TestSSE3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include <stdio.h>

//include sse and sse2 headers
#include <xmmintrin.h>
#include <emmintrin.h>

/* __m128 is ugly to write */
typedef __m128d v2df; // vector of 2 double (sse2)

int main(int argc, char **argv)
{
v2df calcx = _mm_setr_pd(2.0, 3.0);
v2df xx = _mm_mul_pd(calcx, calcx);
double d;
_mm_storel_pd(&d, xx);
printf("%f\n", d);
_mm_storeh_pd(&d, xx);
printf("%f\n", d);
return 0;
}
15 changes: 15 additions & 0 deletions CMakeModules/TestSSE41.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include <stdio.h>

//include sse, sse2 and sse4.1 headers
#include <xmmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>

int main(int argc, char **argv)
{
__m128i a = _mm_set_epi32(1, 2, 3, 4);
__m128i b = _mm_set_epi32(1, 2, 3, 4);
int i = _mm_testz_si128(a, b);
printf("%d\n", i);
return 0;
}
16 changes: 16 additions & 0 deletions CMakeModules/TestSSSE3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include <stdio.h>

//include sse, sse2 and ssse3 headers
#include <xmmintrin.h>
#include <emmintrin.h>
#include <tmmintrin.h>

int main(int argc, char **argv)
{
__m128i calcx = _mm_set_epi32(1, 2, 3, 4);
__m128i xx = _mm_hadd_epi32(calcx, calcx);
int i;
_mm_storeu_si32(&i, xx);
printf("%d\n", i);
return 0;
}