@@ -173,31 +173,15 @@ endif ()
173
173
174
174
# FA3 requires CUDA 12.0 or later
175
175
if (FA3_ENABLED AND ${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 12.0)
176
- # BF16 source files (we only use paged, split, packgqa and sm80)
177
- # we only use paged, split and packgqa since for paged_kv or varlen_q
178
- # PackedGQA is the only one that is used
176
+ # BF16 source files
179
177
file (GLOB FA3_BF16_GEN_SRCS
180
- "hopper/instantiations/flash_fwd_hdimall_bf16_*paged*_sm90.cu" )
181
- file (GLOB FA3_BF16_GEN_SRCS_
182
- "hopper/instantiations/flash_fwd_hdimall_bf16_*packgqa*_sm90.cu" )
183
- list (APPEND FA3_BF16_GEN_SRCS ${FA3_BF16_GEN_SRCS_} )
184
- file (GLOB FA3_BF16_GEN_SRCS_
185
- "hopper/instantiations/flash_fwd_hdimall_bf16_*split*_sm90.cu" )
186
- list (APPEND FA3_BF16_GEN_SRCS ${FA3_BF16_GEN_SRCS_} )
178
+ "hopper/instantiations/flash_fwd_hdimall_bf16*_sm90.cu" )
187
179
file (GLOB FA3_BF16_GEN_SRCS_
188
180
"hopper/instantiations/flash_fwd_*_bf16_*_sm80.cu" )
189
181
list (APPEND FA3_BF16_GEN_SRCS ${FA3_BF16_GEN_SRCS_} )
190
- # FP16 source files (we only use paged, split and packgqa and sm80)
191
- # we only use paged, split and packgqa since for paged_kv or varlen_q
192
- # PackedGQA is the only one that is used
182
+ # FP16 source files
193
183
file (GLOB FA3_FP16_GEN_SRCS
194
- "hopper/instantiations/flash_fwd_hdimall_fp16_*paged*_sm90.cu" )
195
- file (GLOB FA3_FP16_GEN_SRCS_
196
- "hopper/instantiations/flash_fwd_hdimall_fp16_*packgqa*_sm90.cu" )
197
- list (APPEND FA3_FP16_GEN_SRCS ${FA3_FP16_GEN_SRCS_} )
198
- file (GLOB FA3_FP16_GEN_SRCS_
199
- "hopper/instantiations/flash_fwd_hdimall_fp16_*split*_sm90.cu" )
200
- list (APPEND FA3_FP16_GEN_SRCS ${FA3_FP16_GEN_SRCS_} )
184
+ "hopper/instantiations/flash_fwd_hdimall_fp16*_sm90.cu" )
201
185
file (GLOB FA3_FP16_GEN_SRCS_
202
186
"hopper/instantiations/flash_fwd_*_fp16_*_sm80.cu" )
203
187
list (APPEND FA3_FP16_GEN_SRCS ${FA3_FP16_GEN_SRCS_} )
0 commit comments