Skip to content

Commit c9f8cf3

Browse files
committed
minor
Signed-off-by: Cody Yu <hao.yu.cody@gmail.com>
1 parent 60e1637 commit c9f8cf3

File tree

3 files changed

+14
-9
lines changed

3 files changed

+14
-9
lines changed

vllm/v1/core/kv_cache_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ def usage(self) -> float:
9292
self.num_gpu_blocks)
9393

9494
def make_prefix_cache_stats(self) -> PrefixCacheStats:
95-
"""Get (and reset) the prefix cache query and hit counts.
95+
"""Get (and reset) the prefix cache stats.
9696
9797
Returns:
98-
The prefix caching stats - query count, and hit count.
98+
The current prefix caching stats.
9999
"""
100100
stats = self.prefix_cache_stats
101101
self.prefix_cache_stats = PrefixCacheStats()

vllm/v1/core/kv_cache_utils.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class PrefixCachingMetrics:
4040

4141
def __init__(self, interval: int = 1000):
4242
self.interval = interval
43-
# The current aggregated query total and hit.
43+
# The current aggregated values.
4444
self.aggregated_requests = 0
4545
self.aggregated_query_total = 0
4646
self.aggregated_query_hit = 0
@@ -56,20 +56,21 @@ def observe(self, stats: PrefixCacheStats):
5656
When there are more than `interval` requests, the oldest set of
5757
requestsare removed from the metrics.
5858
59-
Stats:
60-
reset: Whether reset_prefix_cache was invoked.
61-
requests: The number of requests in this update.
62-
queries: The number of queries in these requests.
63-
hits: The number of hits in these requests.
59+
Args:
60+
stats: The prefix cache stats.
6461
"""
62+
# reset_prefix_cache was invoked before the current update.
63+
# Reset the metrics before aggregating the current stats.
6564
if stats.reset:
6665
self.reset()
6766

67+
# Update the metrics.
6868
self.query_queue.append((stats.requests, stats.queries, stats.hits))
6969
self.aggregated_requests += stats.requests
7070
self.aggregated_query_total += stats.queries
7171
self.aggregated_query_hit += stats.hits
7272

73+
# Remove the oldest stats if the number of requests exceeds.
7374
if self.aggregated_requests > self.interval:
7475
old_requests, old_queries, old_hits = self.query_queue.popleft()
7576
self.aggregated_requests -= old_requests

vllm/v1/metrics/stats.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,14 @@
1212
@dataclass
1313
class PrefixCacheStats:
1414
"""Stores prefix cache hit statistics."""
15+
# Whether reset_prefix_cache was invoked.
1516
reset: bool = False
17+
# The number of requests in this update.
1618
requests: int = 0
17-
hits: int = 0
19+
# The number of queries in these requests.
1820
queries: int = 0
21+
# The number of hits in these requests.
22+
hits: int = 0
1923

2024

2125
@dataclass

0 commit comments

Comments
 (0)