[Bugfix] Fix divide by zero when serving Mamba models (vllm-project#9617)

tlrmchlsmth · Erkin Sagiroglu · commit 47c25777817a · 2024-10-26T10:21:14.000Z
Signed-off-by: Tyler Michael Smith &lt;tyler@neuralmagic.com&gt;
Signed-off-by: Erkin Sagiroglu &lt;erkin@infra-aipipeline-1-at1-prox-prod-a.ipa.corp.telnyx.com&gt;
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
@@ -1612,15 +1612,15 @@ def _get_stats(self,
         # KV Cache Usage in %
         num_total_gpu = self.cache_config.num_gpu_blocks
         gpu_cache_usage_sys = 0.
-        if num_total_gpu is not None:
+        if num_total_gpu:  # Guard against both None and 0
             num_free_gpu = sum(
                 scheduler.block_manager.get_num_free_gpu_blocks()
                 for scheduler in self.scheduler)
             gpu_cache_usage_sys = 1.0 - (num_free_gpu / num_total_gpu)
 
         num_total_cpu = self.cache_config.num_cpu_blocks
         cpu_cache_usage_sys = 0.
-        if num_total_cpu is not None and num_total_cpu > 0:
+        if num_total_cpu:  # Guard against both None and 0
             num_free_cpu = sum(
                 scheduler.block_manager.get_num_free_cpu_blocks()
                 for scheduler in self.scheduler)