@@ -466,17 +466,17 @@ function(get_flags CCID CCVER)
466
466
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
467
467
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
468
468
)
469
- set (C_FLAGS ${ C_FLAGS} -Wdouble-promotion)
469
+ list ( APPEND C_FLAGS -Wdouble-promotion)
470
470
endif ()
471
471
elseif (CCID STREQUAL "GNU" )
472
472
set (C_FLAGS -Wdouble-promotion)
473
473
set (CXX_FLAGS -Wno-array-bounds)
474
474
475
475
if (CCVER VERSION_GREATER_EQUAL 7.1.0)
476
- set ( CXX_FLAGS ${ CXX_FLAGS} -Wno-format-truncation)
476
+ list ( APPEND CXX_FLAGS -Wno-format-truncation)
477
477
endif ()
478
478
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
479
- set ( CXX_FLAGS ${ CXX_FLAGS} -Wextra-semi)
479
+ list ( APPEND CXX_FLAGS -Wextra-semi)
480
480
endif ()
481
481
elseif (CCID MATCHES "Intel" )
482
482
# enable max optimization level when using Intel compiler
@@ -510,16 +510,18 @@ if (LLAMA_ALL_WARNINGS)
510
510
endif ()
511
511
endif ()
512
512
513
+ set (CUDA_CXX_FLAGS "" )
514
+
513
515
if (LLAMA_CUBLAS)
514
516
set (CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
515
517
if (NOT MSVC )
516
- set (CUDA_FLAGS ${ CUDA_FLAGS} -Wno-pedantic)
518
+ list ( APPEND CUDA_FLAGS -Wno-pedantic)
517
519
endif ()
518
520
519
521
if (LLAMA_ALL_WARNINGS AND NOT MSVC )
520
522
set (NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
521
523
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "" )
522
- set (NVCC_CMD ${ NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER} )
524
+ list ( APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER} )
523
525
endif ()
524
526
525
527
execute_process (
@@ -547,13 +549,8 @@ if (LLAMA_CUBLAS)
547
549
message ("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER} " )
548
550
549
551
get_flags(${CUDA_CCID} ${CUDA_CCVER} )
550
- list (JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument
551
- if (NOT CUDA_CXX_FLAGS STREQUAL "" )
552
- set (CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS} )
553
- endif ()
552
+ list (APPEND CUDA_CXX_FLAGS ${GF_CXX_FLAGS} ) # This is passed to -Xcompiler later
554
553
endif ()
555
-
556
- add_compile_options ("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS} >" )
557
554
endif ()
558
555
559
556
if (WIN32 )
@@ -618,12 +615,7 @@ if (NOT MSVC)
618
615
endif ()
619
616
endif ()
620
617
621
- function (add_compile_option_cpp ARG)
622
- # Adds a compile option to C/C++ only, but not for Cuda.
623
- # Use, e.g., for CPU-architecture flags.
624
- add_compile_options ($<$<COMPILE_LANGUAGE:CXX>:${ARG} >)
625
- add_compile_options ($<$<COMPILE_LANGUAGE:C>:${ARG} >)
626
- endfunction ()
618
+ set (ARCH_FLAGS "" )
627
619
628
620
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" ) OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64" ) OR ("${CMAKE_GENERATOR_PLATFORM_LWR} " MATCHES "arm64" ))
629
621
message (STATUS "ARM detected" )
@@ -636,19 +628,19 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
636
628
else ()
637
629
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
638
630
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} " STREQUAL "" )
639
- add_compile_options ( -mfp16-format=ieee)
631
+ list ( APPEND ARCH_FLAGS -mfp16-format=ieee)
640
632
endif ()
641
633
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6" )
642
634
# Raspberry Pi 1, Zero
643
- add_compile_options ( -mfpu=neon-fp-armv8 -mno-unaligned-access)
635
+ list ( APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
644
636
endif ()
645
637
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7" )
646
638
# Raspberry Pi 2
647
- add_compile_options ( -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
639
+ list ( APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
648
640
endif ()
649
641
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8" )
650
642
# Raspberry Pi 3, 4, Zero 2 (32-bit)
651
- add_compile_options ( -mno-unaligned-access)
643
+ list ( APPEND ARCH_FLAGS -mno-unaligned-access)
652
644
endif ()
653
645
endif ()
654
646
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR} " MATCHES "^(x86_64|i686|amd64|x64)$" )
@@ -659,7 +651,7 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
659
651
include (cmake/FindSIMD.cmake)
660
652
endif ()
661
653
if (LLAMA_AVX512)
662
- add_compile_option_cpp( /arch:AVX512)
654
+ list ( APPEND ARCH_FLAGS /arch:AVX512)
663
655
# MSVC has no compile-time flags enabling specific
664
656
# AVX512 extensions, neither it defines the
665
657
# macros corresponding to the extensions.
@@ -673,49 +665,61 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
673
665
add_compile_definitions ($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
674
666
endif ()
675
667
elseif (LLAMA_AVX2)
676
- add_compile_option_cpp( /arch:AVX2)
668
+ list ( APPEND ARCH_FLAGS /arch:AVX2)
677
669
elseif (LLAMA_AVX)
678
- add_compile_option_cpp( /arch:AVX)
670
+ list ( APPEND ARCH_FLAGS /arch:AVX)
679
671
endif ()
680
672
else ()
681
673
if (LLAMA_NATIVE)
682
- add_compile_option_cpp( -march=native)
674
+ list ( APPEND ARCH_FLAGS -march=native)
683
675
endif ()
684
676
if (LLAMA_F16C)
685
- add_compile_option_cpp( -mf16c)
677
+ list ( APPEND ARCH_FLAGS -mf16c)
686
678
endif ()
687
679
if (LLAMA_FMA)
688
- add_compile_option_cpp( -mfma)
680
+ list ( APPEND ARCH_FLAGS -mfma)
689
681
endif ()
690
682
if (LLAMA_AVX)
691
- add_compile_option_cpp( -mavx)
683
+ list ( APPEND ARCH_FLAGS -mavx)
692
684
endif ()
693
685
if (LLAMA_AVX2)
694
- add_compile_option_cpp( -mavx2)
686
+ list ( APPEND ARCH_FLAGS -mavx2)
695
687
endif ()
696
688
if (LLAMA_AVX512)
697
- add_compile_option_cpp( -mavx512f)
698
- add_compile_option_cpp( -mavx512bw)
689
+ list ( APPEND ARCH_FLAGS -mavx512f)
690
+ list ( APPEND ARCH_FLAGS -mavx512bw)
699
691
endif ()
700
692
if (LLAMA_AVX512_VBMI)
701
- add_compile_option_cpp( -mavx512vbmi)
693
+ list ( APPEND ARCH_FLAGS -mavx512vbmi)
702
694
endif ()
703
695
if (LLAMA_AVX512_VNNI)
704
- add_compile_option_cpp( -mavx512vnni)
696
+ list ( APPEND ARCH_FLAGS -mavx512vnni)
705
697
endif ()
706
698
endif ()
707
699
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64" )
708
700
message (STATUS "PowerPC detected" )
709
701
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le" )
710
- add_compile_options ( -mcpu=powerpc64le)
702
+ list ( APPEND ARCH_FLAGS -mcpu=powerpc64le)
711
703
else ()
712
- add_compile_options ( -mcpu=native -mtune=native)
704
+ list ( APPEND ARCH_FLAGS -mcpu=native -mtune=native)
713
705
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
714
706
endif ()
715
707
else ()
716
708
message (STATUS "Unknown architecture" )
717
709
endif ()
718
710
711
+ add_compile_options ("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS} >" )
712
+ add_compile_options ("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS} >" )
713
+
714
+ if (LLAMA_CUBLAS)
715
+ list (APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS} )
716
+ list (JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
717
+ if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "" )
718
+ list (APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED} )
719
+ endif ()
720
+ add_compile_options ("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS} >" )
721
+ endif ()
722
+
719
723
if (MINGW)
720
724
# Target Windows 8 for PrefetchVirtualMemory
721
725
add_compile_definitions (_WIN32_WINNT=${LLAMA_WIN_VER} )
0 commit comments