Nexesenex
diff --git a/‎ggml-cuda.cu
+1-1 b/‎ggml-cuda.cu
+1-1
@@ -2558,7 +2558,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
         }
 
         // Disable CUDA graphs (from the next token) if the use-case is demanding too many consecutive graph updates.
-        if (cuda_graph_update_required) {
+        if (use_cuda_graph && cuda_graph_update_required) {
             cuda_ctx->cuda_graph->number_consecutive_updates++;
         } else {
             cuda_ctx->cuda_graph->number_consecutive_updates = 0;
Original file line number	Diff line number	Diff line change
`@@ -2558,7 +2558,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t`
`2558`	`2558`	`}`
`2559`	`2559`
`2560`	`2560`	`// Disable CUDA graphs (from the next token) if the use-case is demanding too many consecutive graph updates.`
`2561`		`- if (cuda_graph_update_required) {`
	`2561`	`+ if (use_cuda_graph && cuda_graph_update_required) {`
`2562`	`2562`	`cuda_ctx->cuda_graph->number_consecutive_updates++;`
`2563`	`2563`	`} else {`
`2564`	`2564`	`cuda_ctx->cuda_graph->number_consecutive_updates = 0;`