Skip to content

Commit feb30b9

Browse files
committed
Rollback of unnecessary formatting
1 parent 57c7b4e commit feb30b9

File tree

1 file changed

+68
-103
lines changed

1 file changed

+68
-103
lines changed

vllm/engine/async_llm_engine.py

Lines changed: 68 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def finished(self) -> bool:
9898
return self._finished
9999

100100
async def generator(
101-
self,
101+
self
102102
) -> AsyncGenerator[Union[RequestOutput, EmbeddingRequestOutput], None]:
103103
try:
104104
while True:
@@ -114,9 +114,9 @@ async def generator(
114114

115115
@staticmethod
116116
def _is_raisable(value: Any):
117-
return isinstance(
118-
value, BaseException) or (isinstance(value, type)
119-
and issubclass(value, BaseException))
117+
return isinstance(value, BaseException) or \
118+
(isinstance(value, type) and \
119+
issubclass(value, BaseException))
120120

121121

122122
class RequestTracker:
@@ -126,7 +126,7 @@ def __init__(self) -> None:
126126
self._request_streams: Dict[str, AsyncStream] = {}
127127
self._aborted_requests: asyncio.Queue[str] = asyncio.Queue()
128128
self._new_requests: asyncio.Queue[Tuple[AsyncStream,
129-
dict]] = (asyncio.Queue())
129+
dict]] = asyncio.Queue()
130130
self.new_requests_event = asyncio.Event()
131131

132132
def __contains__(self, item):
@@ -148,12 +148,11 @@ def propagate_exception(self,
148148
for rid in tuple(self._request_streams.keys()):
149149
self.abort_request(rid, exception=exc)
150150

151-
def process_request_output(
152-
self,
153-
request_output: Union[RequestOutput, EmbeddingRequestOutput],
154-
*,
155-
verbose: bool = False,
156-
) -> None:
151+
def process_request_output(self,
152+
request_output: Union[RequestOutput,
153+
EmbeddingRequestOutput],
154+
*,
155+
verbose: bool = False) -> None:
157156
"""Process a request output from the engine."""
158157
request_id = request_output.request_id
159158
finished = request_output.finished
@@ -172,25 +171,21 @@ def process_request_output(
172171
if verbose and finished:
173172
logger.info("Finished request %s.", request_id)
174173

175-
def process_exception(
176-
self,
177-
request_id: str,
178-
exception: BaseException,
179-
*,
180-
verbose: bool = False,
181-
) -> None:
174+
def process_exception(self,
175+
request_id: str,
176+
exception: BaseException,
177+
*,
178+
verbose: bool = False) -> None:
182179
"""Propagate an exception from the engine."""
183180
if verbose:
184181
logger.info("Finished request %s.", request_id)
185182
self.abort_request(request_id, exception=exception)
186183

187-
def add_request(
188-
self,
189-
request_id: str,
190-
*,
191-
verbose: bool = False,
192-
**engine_add_request_kwargs,
193-
) -> AsyncStream:
184+
def add_request(self,
185+
request_id: str,
186+
*,
187+
verbose: bool = False,
188+
**engine_add_request_kwargs) -> AsyncStream:
194189
"""Add a request to be sent to the engine on the next background
195190
loop iteration."""
196191
if request_id in self._request_streams:
@@ -210,13 +205,12 @@ def add_request(
210205

211206
return stream
212207

213-
def abort_request(
214-
self,
215-
request_id: str,
216-
*,
217-
exception: Optional[Union[BaseException, Type[BaseException]]] = None,
218-
verbose: bool = False,
219-
) -> None:
208+
def abort_request(self,
209+
request_id: str,
210+
*,
211+
exception: Optional[Union[BaseException,
212+
Type[BaseException]]] = None,
213+
verbose: bool = False) -> None:
220214
"""Abort a request during next background loop iteration."""
221215
if verbose:
222216
logger.info("Aborted request %s.", request_id)
@@ -293,12 +287,11 @@ async def step_async(
293287
# This ensures that the scheduler is only called again when the current
294288
# batch has completed.
295289
if not self._has_remaining_steps(seq_group_metadata_list):
290+
296291
# Schedule iteration
297-
(
298-
seq_group_metadata_list,
299-
scheduler_outputs,
300-
allow_async_output_proc,
301-
) = self.scheduler[virtual_engine].schedule()
292+
(seq_group_metadata_list, scheduler_outputs,
293+
allow_async_output_proc
294+
) = self.scheduler[virtual_engine].schedule()
302295

303296
ctx.seq_group_metadata_list = seq_group_metadata_list
304297
ctx.scheduler_outputs = scheduler_outputs
@@ -312,11 +305,8 @@ async def step_async(
312305
# cache the scheduler outputs for the next iteration if we have
313306
# lookahead slots
314307
self._cache_scheduler_outputs_for_multi_step(
315-
virtual_engine,
316-
seq_group_metadata_list,
317-
scheduler_outputs,
318-
allow_async_output_proc,
319-
)
308+
virtual_engine, seq_group_metadata_list, scheduler_outputs,
309+
allow_async_output_proc)
320310

321311
assert seq_group_metadata_list is not None
322312
assert scheduler_outputs is not None
@@ -329,8 +319,8 @@ async def step_async(
329319
# For supporting PP this is probably the best way to pass the
330320
# sampled_token_ids, as a separate broadcast over all the PP stages
331321
# will cause one virtual engine's microbatch to block the pipeline.
332-
last_sampled_token_ids = self._get_last_sampled_token_ids(
333-
virtual_engine)
322+
last_sampled_token_ids = \
323+
self._get_last_sampled_token_ids(virtual_engine)
334324

335325
execute_model_req = ExecuteModelRequest(
336326
seq_group_metadata_list=seq_group_metadata_list,
@@ -343,8 +333,7 @@ async def step_async(
343333
finished_requests_ids=finished_requests_ids,
344334
# We use ExecuteModelRequest to pass the last sampled_token_ids
345335
# to each of the non-last PP stages for in-place prepare_input.
346-
last_sampled_token_ids=last_sampled_token_ids,
347-
)
336+
last_sampled_token_ids=last_sampled_token_ids)
348337

349338
if allow_async_output_proc:
350339
execute_model_req.async_callback = self.async_callbacks[
@@ -371,26 +360,22 @@ async def step_async(
371360
if not self._has_remaining_steps(seq_group_metadata_list):
372361
# Clear the cache if we have finished all the steps
373362
if self.scheduler_config.is_multi_step:
374-
self.cached_scheduler_outputs[virtual_engine] = (
375-
SchedulerOutputState())
363+
self.cached_scheduler_outputs[
364+
virtual_engine] = SchedulerOutputState()
376365

377-
ctx.append_output(
378-
outputs=outputs,
379-
seq_group_metadata_list=seq_group_metadata_list,
380-
scheduler_outputs=scheduler_outputs,
381-
is_async=allow_async_output_proc,
382-
is_last_step=True,
383-
)
366+
ctx.append_output(outputs=outputs,
367+
seq_group_metadata_list=seq_group_metadata_list,
368+
scheduler_outputs=scheduler_outputs,
369+
is_async=allow_async_output_proc,
370+
is_last_step=True)
384371

385372
if outputs and allow_async_output_proc:
386-
assert (
387-
len(outputs) == 1
388-
), "Async postprocessor expects only a single output set"
373+
assert len(
374+
outputs
375+
) == 1, "Async postprocessor expects only a single output set"
389376
self._advance_to_next_step(
390-
outputs[0],
391-
seq_group_metadata_list,
392-
scheduler_outputs.scheduled_seq_groups,
393-
)
377+
outputs[0], seq_group_metadata_list,
378+
scheduler_outputs.scheduled_seq_groups)
394379

395380
if not allow_async_output_proc:
396381
self._process_model_outputs(ctx=ctx)
@@ -432,11 +417,9 @@ async def add_request_async(
432417
if lora_request is not None and not self.lora_config:
433418
raise ValueError(f"Got lora_request {lora_request} but LoRA is "
434419
"not enabled!")
435-
436420
if priority > 0 and not self.scheduler_config.policy == "priority":
437421
raise ValueError(f"Got priority {priority} but "
438422
"Priority scheduling is not enabled.")
439-
440423
if arrival_time is None:
441424
arrival_time = time.time()
442425

@@ -484,13 +467,11 @@ class AsyncLLMEngine:
484467

485468
_engine_class: Type[_AsyncLLMEngine] = _AsyncLLMEngine
486469

487-
def __init__(
488-
self,
489-
*args,
490-
log_requests: bool = True,
491-
start_engine_loop: bool = True,
492-
**kwargs,
493-
) -> None:
470+
def __init__(self,
471+
*args,
472+
log_requests: bool = True,
473+
start_engine_loop: bool = True,
474+
**kwargs) -> None:
494475
self.log_requests = log_requests
495476
self.engine = self._engine_class(*args, **kwargs)
496477

@@ -501,8 +482,8 @@ def __init__(
501482
self.engine.model_config.use_async_output_proc)
502483

503484
if self.use_process_request_outputs_callback:
504-
self.engine.process_request_outputs_callback = weak_bind(
505-
self.process_request_outputs)
485+
self.engine.process_request_outputs_callback = \
486+
weak_bind(self.process_request_outputs)
506487

507488
self.background_loop: Optional[asyncio.Future] = None
508489
# We need to keep a reference to unshielded
@@ -533,58 +514,47 @@ def _get_executor_cls(
533514
executor_class = distributed_executor_backend
534515
elif engine_config.device_config.device_type == "neuron":
535516
from vllm.executor.neuron_executor import NeuronExecutorAsync
536-
537517
executor_class = NeuronExecutorAsync
538518
elif engine_config.device_config.device_type == "tpu":
539519
if distributed_executor_backend == "ray":
540520
from vllm.executor.ray_tpu_executor import RayTPUExecutorAsync
541-
542521
executor_class = RayTPUExecutorAsync
543522
else:
544523
assert distributed_executor_backend is None
545524
from vllm.executor.tpu_executor import TPUExecutorAsync
546-
547525
executor_class = TPUExecutorAsync
548526
elif engine_config.device_config.device_type == "cpu":
549527
from vllm.executor.cpu_executor import CPUExecutorAsync
550-
551528
executor_class = CPUExecutorAsync
552529
elif engine_config.device_config.device_type == "openvino":
553530
assert distributed_executor_backend is None, (
554531
"Distributed execution is not supported with "
555532
"the OpenVINO backend.")
556533
from vllm.executor.openvino_executor import OpenVINOExecutorAsync
557-
558534
executor_class = OpenVINOExecutorAsync
559535
elif engine_config.device_config.device_type == "xpu":
560536
if distributed_executor_backend is None:
561537
from vllm.executor.xpu_executor import XPUExecutorAsync
562-
563538
executor_class = XPUExecutorAsync
564539
elif distributed_executor_backend == "ray":
565540
from vllm.executor.ray_xpu_executor import RayXPUExecutorAsync
566-
567541
executor_class = RayXPUExecutorAsync
568542
elif distributed_executor_backend == "mp":
569543
from vllm.executor.multiproc_xpu_executor import (
570544
MultiprocessingXPUExecutorAsync)
571-
572545
executor_class = MultiprocessingXPUExecutorAsync
573546
else:
574547
raise RuntimeError(
575548
"Not supported distributed execution model on XPU device.")
576549
elif distributed_executor_backend == "ray":
577550
from vllm.executor.ray_gpu_executor import RayGPUExecutorAsync
578-
579551
executor_class = RayGPUExecutorAsync
580552
elif distributed_executor_backend == "mp":
581553
from vllm.executor.multiproc_gpu_executor import (
582554
MultiprocessingGPUExecutorAsync)
583-
584555
executor_class = MultiprocessingGPUExecutorAsync
585556
else:
586557
from vllm.executor.gpu_executor import GPUExecutorAsync
587-
588558
executor_class = GPUExecutorAsync
589559
return executor_class
590560

@@ -654,8 +624,8 @@ async def get_tokenizer(
654624
self,
655625
lora_request: Optional[LoRARequest] = None,
656626
) -> AnyTokenizer:
657-
return await self.engine.get_tokenizer_group(
658-
).get_lora_tokenizer_async(lora_request)
627+
return await (self.engine.get_tokenizer_group().
628+
get_lora_tokenizer_async(lora_request))
659629

660630
def start_background_loop(self) -> None:
661631
"""Start the background loop."""
@@ -746,8 +716,8 @@ async def run_engine_loop(engine_ref: ReferenceType):
746716
if not engine:
747717
return
748718

749-
pipeline_parallel_size = (
750-
engine.engine.parallel_config.pipeline_parallel_size)
719+
pipeline_parallel_size = \
720+
engine.engine.parallel_config.pipeline_parallel_size
751721
has_requests_in_progress = [False] * pipeline_parallel_size
752722
while True:
753723
if not any(has_requests_in_progress):
@@ -783,8 +753,7 @@ async def run_engine_loop(engine_ref: ReferenceType):
783753
async with asyncio_timeout(ENGINE_ITERATION_TIMEOUT_S):
784754
done, _ = await asyncio.wait(
785755
requests_in_progress,
786-
return_when=asyncio.FIRST_COMPLETED,
787-
)
756+
return_when=asyncio.FIRST_COMPLETED)
788757
for _ in range(pipeline_parallel_size):
789758
await asyncio.sleep(0)
790759
for task in done:
@@ -818,7 +787,7 @@ async def add_request(
818787
arrival_time: Optional[float] = None,
819788
lora_request: Optional[LoRARequest] = None,
820789
trace_headers: Optional[Mapping[str, str]] = None,
821-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
790+
prompt_adapter_request: Optional[PromptAdapterRequest] = None
822791
) -> AsyncGenerator[Union[RequestOutput, EmbeddingRequestOutput], None]:
823792
if not self.is_running:
824793
if self.start_engine_loop:
@@ -838,8 +807,7 @@ async def add_request(
838807
arrival_time=arrival_time or time.time(),
839808
lora_request=lora_request,
840809
trace_headers=trace_headers,
841-
prompt_adapter_request=prompt_adapter_request,
842-
)
810+
prompt_adapter_request=prompt_adapter_request)
843811

844812
return stream.generator()
845813

@@ -850,7 +818,7 @@ async def generate(
850818
request_id: str,
851819
lora_request: Optional[LoRARequest] = None,
852820
trace_headers: Optional[Mapping[str, str]] = None,
853-
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
821+
prompt_adapter_request: Optional[PromptAdapterRequest] = None
854822
) -> AsyncGenerator[RequestOutput, None]:
855823
"""Generate outputs for a request.
856824
@@ -1030,11 +998,9 @@ def _abort(self, request_id: str) -> None:
1030998
Args:
1031999
request_id: The unique id of the request.
10321000
"""
1033-
self._request_tracker.abort_request(
1034-
request_id,
1035-
exception=asyncio.CancelledError,
1036-
verbose=self.log_requests,
1037-
)
1001+
self._request_tracker.abort_request(request_id,
1002+
exception=asyncio.CancelledError,
1003+
verbose=self.log_requests)
10381004

10391005
async def get_model_config(self) -> ModelConfig:
10401006
"""Get the model configuration of the vLLM engine."""
@@ -1057,10 +1023,9 @@ async def get_lora_config(self) -> LoRAConfig:
10571023
return self.engine.get_lora_config()
10581024

10591025
async def do_log_stats(
1060-
self,
1061-
scheduler_outputs: Optional[SchedulerOutputs] = None,
1062-
model_output: Optional[List[SamplerOutput]] = None,
1063-
) -> None:
1026+
self,
1027+
scheduler_outputs: Optional[SchedulerOutputs] = None,
1028+
model_output: Optional[List[SamplerOutput]] = None) -> None:
10641029
self.engine.do_log_stats()
10651030

10661031
async def check_health(self) -> None:

0 commit comments

Comments
 (0)