We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents e4c0d67 + e1b40ac commit b58ce7cCopy full SHA for b58ce7c
ggml-cuda.cu
@@ -2558,7 +2558,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
2558
}
2559
2560
// Disable CUDA graphs (from the next token) if the use-case is demanding too many consecutive graph updates.
2561
- if (cuda_graph_update_required) {
+ if (use_cuda_graph && cuda_graph_update_required) {
2562
cuda_ctx->cuda_graph->number_consecutive_updates++;
2563
} else {
2564
cuda_ctx->cuda_graph->number_consecutive_updates = 0;
0 commit comments