Skip to content

Commit 22482e4

Browse files
[Bugfix] Flash attention arches not getting set properly (#9062)
1 parent 3d826d2 commit 22482e4

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,17 @@ if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda")
482482
return()
483483
endif ()
484484

485+
# vLLM flash attention requires VLLM_GPU_ARCHES to contain the set of target
486+
# arches in the CMake syntax (75-real, 89-virtual, etc), since we clear the
487+
# arches in the CUDA case (and instead set the gencodes on a per file basis)
488+
# we need to manually set VLLM_GPU_ARCHES here.
489+
if(VLLM_GPU_LANG STREQUAL "CUDA")
490+
foreach(_ARCH ${CUDA_ARCHS})
491+
string(REPLACE "." "" _ARCH "${_ARCH}")
492+
list(APPEND VLLM_GPU_ARCHES "${_ARCH}-real")
493+
endforeach()
494+
endif()
495+
485496
#
486497
# Build vLLM flash attention from source
487498
#

0 commit comments

Comments
 (0)