Skip to content

Commit b704a0b

Browse files
committed
chore: reduce diff
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
1 parent 0e699ec commit b704a0b

File tree

1 file changed

+2
-7
lines changed

1 file changed

+2
-7
lines changed

vllm/v1/structured_output/__init__.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,6 @@ def grammar_bitmask(
119119
# position in the batch. Resize the bitmask down to the size of
120120
# the batch.
121121
bitmask_tensor = self._grammar_bitmask
122-
# Reset the relevant part of the bitmask before filling
123-
if batch_len > 0:
124-
bitmask_tensor[:batch_len].fill_(-1)
125122

126123
for req_id, batch_index in structured_output_request_ids.items():
127124
full_request = requests[req_id]
@@ -137,11 +134,9 @@ def grammar_bitmask(
137134
so_request.grammar.fill_bitmask(bitmask_tensor, batch_index)
138135

139136
if batch_len < bitmask_tensor.shape[0]:
140-
final_bitmask_tensor = bitmask_tensor[:batch_len]
141-
else:
142-
final_bitmask_tensor = bitmask_tensor
137+
bitmask_tensor = self._grammar_bitmask[:batch_len]
143138

144139
# After finishing with the xgrammar operations, we convert to
145140
# np.ndarray, because that is much more efficient for serialization
146141
# and deserialization when sending this to the GPU workers.
147-
return final_bitmask_tensor.numpy()
142+
return bitmask_tensor.numpy()

0 commit comments

Comments
 (0)