8
8
from vllm .v1 .kv_cache_interface import SlidingWindowSpec
9
9
10
10
11
+ def get_sliding_window_manager (sliding_window_spec , block_pool ):
12
+ return SlidingWindowManager (sliding_window_spec ,
13
+ block_pool ,
14
+ use_eagle = False ,
15
+ num_kv_cache_groups = 1 ,
16
+ caching_hash_fn = lambda x : x )
17
+
18
+
11
19
def test_sliding_window_possible_cached_prefix ():
12
20
sliding_window_spec = SlidingWindowSpec (
13
21
block_size = 2 ,
@@ -19,9 +27,7 @@ def test_sliding_window_possible_cached_prefix():
19
27
)
20
28
21
29
block_pool = BlockPool (num_gpu_blocks = 100 , enable_caching = True )
22
- manager = SlidingWindowManager (sliding_window_spec ,
23
- block_pool ,
24
- use_eagle = False )
30
+ manager = get_sliding_window_manager (sliding_window_spec , block_pool )
25
31
26
32
def run_one_case (block_is_cached , expect_length ):
27
33
block_hash_list = [
@@ -81,9 +87,7 @@ def test_sliding_window_remove_skipped_blocks():
81
87
82
88
block_pool = BlockPool (num_gpu_blocks = 2000 , enable_caching = True )
83
89
84
- manager = SlidingWindowManager (sliding_window_spec ,
85
- block_pool ,
86
- use_eagle = False )
90
+ manager = get_sliding_window_manager (sliding_window_spec , block_pool )
87
91
88
92
null_block_id = block_pool .null_block .block_id
89
93
@@ -104,39 +108,35 @@ def assert_block_id(block_table, ids):
104
108
1000 , 1001 , 1002 , 1003 , 1004 , 1005 , 1006 , 1007 , 1008 , 1009 , 1010
105
109
]
106
110
block_table = id_to_block_table (original_block_ids )
107
- removed = manager .remove_skipped_blocks (block_table , 0 )
108
- assert_block_id (removed , [])
111
+ manager .req_to_blocks ["test" ] = block_table
112
+
113
+ manager .remove_skipped_blocks ("test" , 0 )
109
114
assert_block_id (block_table , original_block_ids )
110
115
111
116
# 4 tokens are computed. Only token 0 is out of the sliding window. As
112
117
# block 1000 also contains token 1 that is in the sliding window, block 1000
113
118
# cannot be removed.
114
- removed = manager .remove_skipped_blocks (block_table , 4 )
115
- assert_block_id (removed , [])
119
+ manager .remove_skipped_blocks ("test" , 4 )
116
120
assert_block_id (block_table , original_block_ids )
117
121
118
122
# 5 tokens are computed. Token 0 & 1 are out of the sliding window.
119
123
# Block 1000 can be removed.
120
- removed = manager .remove_skipped_blocks (block_table , 5 )
121
- assert_block_id (removed , [original_block_ids [0 ]])
124
+ manager .remove_skipped_blocks ("test" , 5 )
122
125
assert_block_id (block_table , [null_block_id ] + original_block_ids [1 :])
123
126
124
127
# 6 tokens are computed. Token 0-2 are out of the sliding window.
125
128
# Cannot remove new block as the block 1001 is still used by token 3.
126
- removed = manager .remove_skipped_blocks (block_table , 6 )
127
- assert_block_id (removed , [])
129
+ manager .remove_skipped_blocks ("test" , 6 )
128
130
assert_block_id (block_table , [null_block_id ] + original_block_ids [1 :])
129
131
130
132
# 7 tokens are computed. Token 0-3 are out of the sliding window.
131
133
# Block 1001 can be removed and block 1000 is already removed.
132
- removed = manager .remove_skipped_blocks (block_table , 7 )
133
- assert_block_id (removed , [original_block_ids [1 ]])
134
+ manager .remove_skipped_blocks ("test" , 7 )
134
135
assert_block_id (block_table , [null_block_id ] * 2 + original_block_ids [2 :])
135
136
136
137
# 11 tokens are computed. Token 0-7 are out of the sliding window.
137
138
# Block 1002 & 1003 can be removed now. Block 1003 represents a longer
138
139
# sequence, and is expected to be evicted earlier than 1002, so the order
139
140
# of removed blocks should be [1003, 1002].
140
- removed = manager .remove_skipped_blocks (block_table , 11 )
141
- assert_block_id (removed , [original_block_ids [3 ], original_block_ids [2 ]])
141
+ manager .remove_skipped_blocks ("test" , 11 )
142
142
assert_block_id (block_table , [null_block_id ] * 4 + original_block_ids [4 :])
0 commit comments