Skip to content

Commit e2aa6de

Browse files
committed
test
Signed-off-by: Cody Yu <hao.yu.cody@gmail.com>
1 parent 91da711 commit e2aa6de

File tree

3 files changed

+41
-8
lines changed

3 files changed

+41
-8
lines changed

tests/v1/core/test_kv_cache_utils.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from vllm.multimodal.inputs import MultiModalKwargs
66
from vllm.sampling_params import SamplingParams
77
from vllm.v1.core.kv_cache_utils import (BlockHashType, FreeKVCacheBlockQueue,
8-
KVCacheBlock,
8+
KVCacheBlock, PrefixCachingMetrics,
99
generate_block_hash_extra_keys,
1010
hash_block_tokens,
1111
hash_request_tokens)
@@ -277,3 +277,34 @@ def test_hash_request_tokens_no_mm_inputs():
277277
assert block_hashes[0].extra_keys is None
278278
assert block_hashes[1].token_ids == (3, 4, 5)
279279
assert block_hashes[1].extra_keys is None
280+
281+
282+
def test_metrics():
283+
"""
284+
Test the prefix caching metrics.
285+
"""
286+
metrics = PrefixCachingMetrics(interval=5)
287+
assert metrics.hit_rate == 0.0
288+
289+
metrics.add_request_query(20, 9)
290+
# 9 / 20 = 0.45
291+
assert metrics.hit_rate == 0.45
292+
293+
for _ in range(4):
294+
metrics.add_request_query(20, 4)
295+
296+
# 25 / 100 = 0.25
297+
assert metrics.hit_rate == 0.25
298+
299+
metrics.add_request_query(10, 2)
300+
301+
# Remove (20, 9) and add (10, 2): 18 / 90 = 0.2
302+
assert metrics.aggregated_query_total == 90
303+
assert metrics.aggregated_query_hit == 18
304+
assert metrics.hit_rate == 0.2
305+
306+
metrics.reset()
307+
assert metrics.hit_rate == 0.0
308+
assert metrics.aggregated_query_total == 0
309+
assert metrics.aggregated_query_hit == 0
310+
assert not metrics.query_queue

vllm/v1/core/kv_cache_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ def __init__(
7272
self.req_to_blocks: DefaultDict[str,
7373
List[KVCacheBlock]] = defaultdict(list)
7474

75-
# Prefix cache metrics.
76-
self.prefix_caching_metrics = PrefixCachingMetrics()
75+
# Prefix cache metrics. TODO: Make the interval configurable.
76+
self.prefix_caching_metrics = PrefixCachingMetrics(interval=1000)
7777

7878
@property
7979
def usage(self) -> float:

vllm/v1/core/kv_cache_utils.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,11 @@ class PrefixCachingMetrics:
3939

4040
def __init__(self, interval: int = 1000):
4141
self.interval = interval
42+
# The current aggregated query total and hit.
4243
self.aggregated_query_total = 0
4344
self.aggregated_query_hit = 0
44-
self.request_queries: deque[Tuple[int, int]] = deque()
45+
# A deque of (num_queries, num_hits) for the most recent requests.
46+
self.query_queue: deque[Tuple[int, int]] = deque()
4547

4648
def add_request_query(self, num_queries: int, num_hits: int):
4749
"""Add a request to the metrics. This function is called when
@@ -54,9 +56,9 @@ def add_request_query(self, num_queries: int, num_hits: int):
5456
num_hits: The number of hits in the request.
5557
"""
5658

57-
self.request_queries.append((num_queries, num_hits))
58-
if len(self.request_queries) > self.interval:
59-
old_num_queries, old_num_hits = self.request_queries.popleft()
59+
self.query_queue.append((num_queries, num_hits))
60+
if len(self.query_queue) > self.interval:
61+
old_num_queries, old_num_hits = self.query_queue.popleft()
6062
self.aggregated_query_total -= old_num_queries
6163
self.aggregated_query_hit -= old_num_hits
6264

@@ -67,7 +69,7 @@ def reset(self):
6769
"""Reset the metrics."""
6870
self.aggregated_query_total = 0
6971
self.aggregated_query_hit = 0
70-
self.request_queries.clear()
72+
self.query_queue.clear()
7173

7274
@property
7375
def hit_rate(self) -> float:

0 commit comments

Comments
 (0)