Skip to content

Commit bbe7c56

Browse files
authored
cmake : pass CPU architecture flags to nvcc (ggml-org#5146)
1 parent 62fead3 commit bbe7c56

File tree

1 file changed

+39
-35
lines changed

1 file changed

+39
-35
lines changed

CMakeLists.txt

+39-35
Original file line numberDiff line numberDiff line change
@@ -466,17 +466,17 @@ function(get_flags CCID CCVER)
466466
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
467467
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
468468
)
469-
set(C_FLAGS ${C_FLAGS} -Wdouble-promotion)
469+
list(APPEND C_FLAGS -Wdouble-promotion)
470470
endif()
471471
elseif (CCID STREQUAL "GNU")
472472
set(C_FLAGS -Wdouble-promotion)
473473
set(CXX_FLAGS -Wno-array-bounds)
474474

475475
if (CCVER VERSION_GREATER_EQUAL 7.1.0)
476-
set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation)
476+
list(APPEND CXX_FLAGS -Wno-format-truncation)
477477
endif()
478478
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
479-
set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi)
479+
list(APPEND CXX_FLAGS -Wextra-semi)
480480
endif()
481481
elseif (CCID MATCHES "Intel")
482482
# enable max optimization level when using Intel compiler
@@ -510,16 +510,18 @@ if (LLAMA_ALL_WARNINGS)
510510
endif()
511511
endif()
512512

513+
set(CUDA_CXX_FLAGS "")
514+
513515
if (LLAMA_CUBLAS)
514516
set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
515517
if (NOT MSVC)
516-
set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic)
518+
list(APPEND CUDA_FLAGS -Wno-pedantic)
517519
endif()
518520

519521
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
520522
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
521523
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
522-
set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER})
524+
list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
523525
endif()
524526

525527
execute_process(
@@ -547,13 +549,8 @@ if (LLAMA_CUBLAS)
547549
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
548550

549551
get_flags(${CUDA_CCID} ${CUDA_CCVER})
550-
list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument
551-
if (NOT CUDA_CXX_FLAGS STREQUAL "")
552-
set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS})
553-
endif()
552+
list(APPEND CUDA_CXX_FLAGS ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
554553
endif()
555-
556-
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
557554
endif()
558555

559556
if (WIN32)
@@ -618,12 +615,7 @@ if (NOT MSVC)
618615
endif()
619616
endif()
620617

621-
function(add_compile_option_cpp ARG)
622-
# Adds a compile option to C/C++ only, but not for Cuda.
623-
# Use, e.g., for CPU-architecture flags.
624-
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${ARG}>)
625-
add_compile_options($<$<COMPILE_LANGUAGE:C>:${ARG}>)
626-
endfunction()
618+
set(ARCH_FLAGS "")
627619

628620
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
629621
message(STATUS "ARM detected")
@@ -636,19 +628,19 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
636628
else()
637629
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
638630
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
639-
add_compile_options(-mfp16-format=ieee)
631+
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
640632
endif()
641633
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
642634
# Raspberry Pi 1, Zero
643-
add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access)
635+
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
644636
endif()
645637
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
646638
# Raspberry Pi 2
647-
add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
639+
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
648640
endif()
649641
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
650642
# Raspberry Pi 3, 4, Zero 2 (32-bit)
651-
add_compile_options(-mno-unaligned-access)
643+
list(APPEND ARCH_FLAGS -mno-unaligned-access)
652644
endif()
653645
endif()
654646
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
@@ -659,7 +651,7 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
659651
include(cmake/FindSIMD.cmake)
660652
endif ()
661653
if (LLAMA_AVX512)
662-
add_compile_option_cpp(/arch:AVX512)
654+
list(APPEND ARCH_FLAGS /arch:AVX512)
663655
# MSVC has no compile-time flags enabling specific
664656
# AVX512 extensions, neither it defines the
665657
# macros corresponding to the extensions.
@@ -673,49 +665,61 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
673665
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
674666
endif()
675667
elseif (LLAMA_AVX2)
676-
add_compile_option_cpp(/arch:AVX2)
668+
list(APPEND ARCH_FLAGS /arch:AVX2)
677669
elseif (LLAMA_AVX)
678-
add_compile_option_cpp(/arch:AVX)
670+
list(APPEND ARCH_FLAGS /arch:AVX)
679671
endif()
680672
else()
681673
if (LLAMA_NATIVE)
682-
add_compile_option_cpp(-march=native)
674+
list(APPEND ARCH_FLAGS -march=native)
683675
endif()
684676
if (LLAMA_F16C)
685-
add_compile_option_cpp(-mf16c)
677+
list(APPEND ARCH_FLAGS -mf16c)
686678
endif()
687679
if (LLAMA_FMA)
688-
add_compile_option_cpp(-mfma)
680+
list(APPEND ARCH_FLAGS -mfma)
689681
endif()
690682
if (LLAMA_AVX)
691-
add_compile_option_cpp(-mavx)
683+
list(APPEND ARCH_FLAGS -mavx)
692684
endif()
693685
if (LLAMA_AVX2)
694-
add_compile_option_cpp(-mavx2)
686+
list(APPEND ARCH_FLAGS -mavx2)
695687
endif()
696688
if (LLAMA_AVX512)
697-
add_compile_option_cpp(-mavx512f)
698-
add_compile_option_cpp(-mavx512bw)
689+
list(APPEND ARCH_FLAGS -mavx512f)
690+
list(APPEND ARCH_FLAGS -mavx512bw)
699691
endif()
700692
if (LLAMA_AVX512_VBMI)
701-
add_compile_option_cpp(-mavx512vbmi)
693+
list(APPEND ARCH_FLAGS -mavx512vbmi)
702694
endif()
703695
if (LLAMA_AVX512_VNNI)
704-
add_compile_option_cpp(-mavx512vnni)
696+
list(APPEND ARCH_FLAGS -mavx512vnni)
705697
endif()
706698
endif()
707699
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
708700
message(STATUS "PowerPC detected")
709701
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
710-
add_compile_options(-mcpu=powerpc64le)
702+
list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
711703
else()
712-
add_compile_options(-mcpu=native -mtune=native)
704+
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
713705
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
714706
endif()
715707
else()
716708
message(STATUS "Unknown architecture")
717709
endif()
718710

711+
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
712+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
713+
714+
if (LLAMA_CUBLAS)
715+
list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
716+
list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
717+
if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
718+
list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
719+
endif()
720+
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
721+
endif()
722+
719723
if (MINGW)
720724
# Target Windows 8 for PrefetchVirtualMemory
721725
add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})

0 commit comments

Comments
 (0)