Skip to content

Commit 5220a16

Browse files
CUDA: fix FA logic for PTX 7.0 and CC >= 7.5 (ggml-org#12222)
1 parent 3ffbbd5 commit 5220a16

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/fattn.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst
310310
}
311311

312312
// The MMA implementation needs Turing or newer, use the old WMMA code for Volta:
313-
if (cc == GGML_CUDA_CC_VOLTA) {
313+
if (fp16_mma_available(cc) && !new_mma_available(cc)) {
314314
ggml_cuda_flash_attn_ext_wmma_f16(ctx, dst);
315315
return;
316316
}

0 commit comments

Comments
 (0)