@@ -84,7 +84,7 @@ def test_prefill(hash_algo):
84
84
blocks = manager .allocate_slots (req0 , 55 ,
85
85
len (computed_blocks .blocks ) * 16 ,
86
86
computed_blocks )
87
- assert blocks .get_block_ids () == [1 , 2 , 3 , 4 ]
87
+ assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ] ]
88
88
89
89
# Check full block metadata
90
90
parent_block_hash = None
@@ -107,13 +107,13 @@ def test_prefill(hash_algo):
107
107
req1 = make_request ("1" , common_token_ids + unique_token_ids )
108
108
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req1 )
109
109
assert len (manager .req_to_block_hashes [req1 .request_id ]) == 3
110
- assert computed_blocks .get_block_ids () == [1 , 2 , 3 ]
110
+ assert computed_blocks .get_block_ids () == [[ 1 , 2 , 3 ] ]
111
111
assert num_computed_tokens == 3 * 16
112
112
num_new_tokens = 53 - 3 * 16
113
113
blocks = manager .allocate_slots (req1 , num_new_tokens ,
114
114
len (computed_blocks .blocks ) * 16 ,
115
115
computed_blocks )
116
- assert blocks .get_block_ids () == [5 ]
116
+ assert blocks .get_block_ids () == [[ 5 ] ]
117
117
for block in computed_blocks .blocks :
118
118
assert block .ref_cnt == 2
119
119
@@ -141,13 +141,13 @@ def test_prefill(hash_algo):
141
141
req2 = make_request ("2" , common_token_ids + unique_token_ids )
142
142
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req2 )
143
143
assert len (manager .req_to_block_hashes [req2 .request_id ]) == 3
144
- assert computed_blocks .get_block_ids () == [1 , 2 , 3 ]
144
+ assert computed_blocks .get_block_ids () == [[ 1 , 2 , 3 ] ]
145
145
assert num_computed_tokens == 3 * 16
146
146
num_new_tokens = 53 - 3 * 16
147
147
blocks = manager .allocate_slots (req2 , num_new_tokens ,
148
148
len (computed_blocks .blocks ) * 16 ,
149
149
computed_blocks )
150
- assert blocks .get_block_ids () == [6 ]
150
+ assert blocks .get_block_ids () == [[ 6 ] ]
151
151
152
152
# Although we only have 6 free blocks, we have 8 blocks in
153
153
# the free block queue due to lazy removal.
@@ -171,7 +171,7 @@ def test_prefill(hash_algo):
171
171
len (computed_blocks .blocks ) * 16 ,
172
172
computed_blocks )
173
173
# This block ID order also checks the eviction order.
174
- assert blocks .get_block_ids () == [7 , 8 , 9 , 10 , 4 , 5 , 6 , 3 , 2 , 1 ]
174
+ assert blocks .get_block_ids () == [[ 7 , 8 , 9 , 10 , 4 , 5 , 6 , 3 , 2 , 1 ] ]
175
175
assert manager .block_pool .free_block_queue .num_free_blocks == 0
176
176
assert manager .block_pool .free_block_queue .free_list_head is None
177
177
assert manager .block_pool .free_block_queue .free_list_tail is None
@@ -208,7 +208,7 @@ def test_prefill_plp():
208
208
blocks = manager .allocate_slots (req0 , 55 ,
209
209
len (computed_blocks .blocks ) * 16 ,
210
210
computed_blocks )
211
- assert blocks .get_block_ids () == [1 , 2 , 3 , 4 ]
211
+ assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ] ]
212
212
req0_block_hashes = [b .block_hash for b in blocks .blocks ]
213
213
214
214
# Check full block metadata
@@ -233,13 +233,13 @@ def test_prefill_plp():
233
233
req1 = make_request ("1" , common_token_ids + unique_token_ids )
234
234
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req1 )
235
235
assert len (manager .req_to_block_hashes [req1 .request_id ]) == 3
236
- assert computed_blocks .get_block_ids () == [1 , 2 , 3 ]
236
+ assert computed_blocks .get_block_ids () == [[ 1 , 2 , 3 ] ]
237
237
assert num_computed_tokens == 3 * 16
238
238
num_new_tokens = 53 - 3 * 16
239
239
blocks = manager .allocate_slots (req1 , num_new_tokens ,
240
240
len (computed_blocks .blocks ) * 16 ,
241
241
computed_blocks )
242
- assert blocks .get_block_ids () == [5 ]
242
+ assert blocks .get_block_ids () == [[ 5 ] ]
243
243
for block in computed_blocks .blocks :
244
244
assert block .ref_cnt == 2
245
245
@@ -277,11 +277,11 @@ def test_prefill_plp():
277
277
block_ids = blocks .get_block_ids ()
278
278
# Duplicate cached blocks have different ids but same hashes vs request #0
279
279
assert [b .block_hash for b in blocks .blocks ] == req0_block_hashes
280
- assert block_ids != [1 , 2 , 3 , 4 ]
280
+ assert block_ids != [[ 1 , 2 , 3 , 4 ] ]
281
281
282
282
# Request #2 block hashes are valid since request #0 hashes are.
283
283
# Check block reference counts.
284
- for block_id in block_ids :
284
+ for block_id in block_ids [ 0 ] :
285
285
assert manager .block_pool .blocks [block_id ].ref_cnt == 1
286
286
287
287
manager .free (req2 )
@@ -307,7 +307,7 @@ def test_decode():
307
307
blocks = manager .allocate_slots (req0 , 55 ,
308
308
len (computed_blocks .blocks ) * 16 ,
309
309
computed_blocks )
310
- assert blocks .get_block_ids () == [1 , 2 , 3 , 4 ]
310
+ assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ] ]
311
311
312
312
# Append slots without allocating a new block.
313
313
req0 .num_computed_tokens = 55
@@ -379,12 +379,12 @@ def test_evict():
379
379
# Touch the first 2 blocks.
380
380
req2 = make_request ("2" , list (range (2 * 16 + 3 )))
381
381
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req2 )
382
- assert computed_blocks .get_block_ids () == [1 , 2 ]
382
+ assert computed_blocks .get_block_ids () == [[ 1 , 2 ] ]
383
383
assert num_computed_tokens == 2 * 16
384
384
blocks = manager .allocate_slots (req2 , 3 ,
385
385
len (computed_blocks .blocks ) * 16 ,
386
386
computed_blocks )
387
- assert blocks .get_block_ids () == [10 ]
387
+ assert blocks .get_block_ids () == [[ 10 ] ]
388
388
assert manager .block_pool .free_block_queue .num_free_blocks == 7
389
389
390
390
@@ -625,7 +625,7 @@ def test_mm_prefix_caching():
625
625
blocks = manager .allocate_slots (req0 , 59 ,
626
626
len (computed_blocks .blocks ) * 16 ,
627
627
computed_blocks )
628
- assert blocks .get_block_ids () == [1 , 2 , 3 , 4 ]
628
+ assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ] ]
629
629
req0 .num_computed_tokens = 59
630
630
631
631
# Append slots without allocating a new block.
@@ -686,7 +686,7 @@ def test_cache_key_salting():
686
686
blocks = manager .allocate_slots (req0 , 59 ,
687
687
len (computed_blocks .blocks ) * 16 ,
688
688
computed_blocks )
689
- assert blocks .get_block_ids () == [1 , 2 , 3 , 4 ]
689
+ assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ] ]
690
690
req0 .num_computed_tokens = 59
691
691
692
692
# Append slots without allocating a new block.
@@ -797,7 +797,7 @@ def test_reset_prefix_cache():
797
797
all_token_ids = full_block_token_ids + unique_token_ids
798
798
req0 = make_request ("0" , all_token_ids )
799
799
blocks = manager .allocate_slots (req0 , 55 )
800
- assert blocks .get_block_ids () == [1 , 2 , 3 , 4 ]
800
+ assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ] ]
801
801
802
802
unique_token_ids = [4 ] * 7
803
803
all_token_ids = full_block_token_ids + unique_token_ids
@@ -808,7 +808,7 @@ def test_reset_prefix_cache():
808
808
blocks = manager .allocate_slots (req1 , 7 ,
809
809
len (computed_blocks .blocks ) * 16 ,
810
810
computed_blocks )
811
- assert blocks .get_block_ids () == [5 ]
811
+ assert blocks .get_block_ids () == [[ 5 ] ]
812
812
813
813
# Failed to reset prefix cache because some blocks are not freed yet.
814
814
assert not manager .reset_prefix_cache ()
0 commit comments