Skip to content

Commit 6821020

Browse files
authored
[Bugfix] Fix async log stats (#8417)
1 parent 8427550 commit 6821020

File tree

2 files changed

+17
-4
lines changed

2 files changed

+17
-4
lines changed

tests/basic_correctness/test_preemption.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def test_chunked_prefill_recompute(
6464
enable_chunked_prefill=enable_chunked_prefill,
6565
max_num_seqs=max_num_seqs,
6666
worker_use_ray=worker_use_ray,
67+
disable_log_stats=False,
6768
) as vllm_model:
6869
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
6970
assert (vllm_model.model.llm_engine.scheduler[0].artificial_preempt_cnt

vllm/engine/llm_engine.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,7 +1056,8 @@ def _process_model_outputs(self,
10561056
# LLMEngine/AsyncLLMEngine directly
10571057
if is_async:
10581058
# Log stats.
1059-
self.do_log_stats(scheduler_outputs, outputs, finished_before)
1059+
self.do_log_stats(scheduler_outputs, outputs, finished_before,
1060+
skip)
10601061

10611062
# Tracing
10621063
self.do_tracing(scheduler_outputs)
@@ -1363,25 +1364,31 @@ def remove_logger(self, logger_name: str) -> None:
13631364
def do_log_stats(self,
13641365
scheduler_outputs: Optional[SchedulerOutputs] = None,
13651366
model_output: Optional[List[SamplerOutput]] = None,
1366-
finished_before: Optional[List[int]] = None) -> None:
1367+
finished_before: Optional[List[int]] = None,
1368+
skip: Optional[List[int]] = None) -> None:
13671369
"""Forced log when no requests active."""
13681370
if self.log_stats:
13691371
stats = self._get_stats(scheduler_outputs, model_output,
1370-
finished_before)
1372+
finished_before, skip)
13711373
for logger in self.stat_loggers.values():
13721374
logger.log(stats)
13731375

13741376
def _get_stats(self,
13751377
scheduler_outputs: Optional[SchedulerOutputs],
13761378
model_output: Optional[List[SamplerOutput]] = None,
1377-
finished_before: Optional[List[int]] = None) -> Stats:
1379+
finished_before: Optional[List[int]] = None,
1380+
skip: Optional[List[int]] = None) -> Stats:
13781381
"""Get Stats to be Logged to Prometheus.
13791382
13801383
Args:
13811384
scheduler_outputs: Optional, used to populate metrics related to
13821385
the scheduled batch,
13831386
model_output: Optional, used to emit speculative decoding metrics
13841387
which are created by the workers.
1388+
finished_before: Optional, indices of sequences that were finished
1389+
before. These sequences will be ignored.
1390+
skip: Optional, indices of sequences that were preempted. These
1391+
sequences will be ignored.
13851392
"""
13861393
now = time.time()
13871394

@@ -1456,6 +1463,11 @@ def _get_stats(self,
14561463
actual_num_batched_tokens -= 1
14571464
continue
14581465

1466+
# Currently, skip == preempted sequences, so we need to skip
1467+
# their log stats
1468+
if skip and idx in skip:
1469+
continue
1470+
14591471
group_was_prefill = idx < scheduler_outputs.num_prefill_groups
14601472
seq_group = scheduled_seq_group.seq_group
14611473

0 commit comments

Comments
 (0)