minor

comaniac · comaniac · commit c9f8cf3ae502 · 2025-02-10T09:26:21.000-08:00
Signed-off-by: Cody Yu &lt;hao.yu.cody@gmail.com&gt;
diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
@@ -92,10 +92,10 @@ def usage(self) -> float:
                       self.num_gpu_blocks)
 
     def make_prefix_cache_stats(self) -> PrefixCacheStats:
-        """Get (and reset) the prefix cache query and hit counts.
+        """Get (and reset) the prefix cache stats.
 
         Returns:
-            The prefix caching stats - query count, and hit count.
+            The current prefix caching stats.
         """
         stats = self.prefix_cache_stats
         self.prefix_cache_stats = PrefixCacheStats()
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
@@ -40,7 +40,7 @@ class PrefixCachingMetrics:
 
     def __init__(self, interval: int = 1000):
         self.interval = interval
-        # The current aggregated query total and hit.
+        # The current aggregated values.
         self.aggregated_requests = 0
         self.aggregated_query_total = 0
         self.aggregated_query_hit = 0
@@ -56,20 +56,21 @@ def observe(self, stats: PrefixCacheStats):
         When there are more than `interval` requests, the oldest set of
         requestsare removed from the metrics.
 
-        Stats:
-            reset: Whether reset_prefix_cache was invoked.
-            requests: The number of requests in this update.
-            queries: The number of queries in these requests.
-            hits: The number of hits in these requests.
+        Args:
+            stats: The prefix cache stats.
         """
+        # reset_prefix_cache was invoked before the current update.
+        # Reset the metrics before aggregating the current stats.
         if stats.reset:
             self.reset()
 
+        # Update the metrics.
         self.query_queue.append((stats.requests, stats.queries, stats.hits))
         self.aggregated_requests += stats.requests
         self.aggregated_query_total += stats.queries
         self.aggregated_query_hit += stats.hits
 
+        # Remove the oldest stats if the number of requests exceeds.
         if self.aggregated_requests > self.interval:
             old_requests, old_queries, old_hits = self.query_queue.popleft()
             self.aggregated_requests -= old_requests
diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py
@@ -12,10 +12,14 @@
 @dataclass
 class PrefixCacheStats:
     """Stores prefix cache hit statistics."""
+    # Whether reset_prefix_cache was invoked.
     reset: bool = False
+    # The number of requests in this update.
     requests: int = 0
-    hits: int = 0
+    # The number of queries in these requests.
     queries: int = 0
+    # The number of hits in these requests.
+    hits: int = 0
 
 
 @dataclass