Skip to content

Commit 279175d

Browse files
[Bugfix] Fix async log stats (vllm-project#8417) (#17)
Co-authored-by: Alexander Matveev <59768536+alexm-neuralmagic@users.noreply.github.com>
1 parent 0beb531 commit 279175d

File tree

2 files changed

+17
-4
lines changed

2 files changed

+17
-4
lines changed

tests/basic_correctness/test_preemption.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def test_chunked_prefill_recompute(
6464
enable_chunked_prefill=enable_chunked_prefill,
6565
max_num_seqs=max_num_seqs,
6666
worker_use_ray=worker_use_ray,
67+
disable_log_stats=False,
6768
) as vllm_model:
6869
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
6970
assert (vllm_model.model.llm_engine.scheduler[0].artificial_preempt_cnt

vllm/engine/llm_engine.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,7 +1436,8 @@ def _process_model_outputs(self,
14361436
# LLMEngine/AsyncLLMEngine directly
14371437
if is_async:
14381438
# Log stats.
1439-
self.do_log_stats(scheduler_outputs, outputs, finished_before)
1439+
self.do_log_stats(scheduler_outputs, outputs, finished_before,
1440+
skip)
14401441

14411442
# Tracing
14421443
self.do_tracing(scheduler_outputs)
@@ -1743,25 +1744,31 @@ def remove_logger(self, logger_name: str) -> None:
17431744
def do_log_stats(self,
17441745
scheduler_outputs: Optional[SchedulerOutputs] = None,
17451746
model_output: Optional[List[SamplerOutput]] = None,
1746-
finished_before: Optional[List[int]] = None) -> None:
1747+
finished_before: Optional[List[int]] = None,
1748+
skip: Optional[List[int]] = None) -> None:
17471749
"""Forced log when no requests active."""
17481750
if self.log_stats:
17491751
stats = self._get_stats(scheduler_outputs, model_output,
1750-
finished_before)
1752+
finished_before, skip)
17511753
for logger in self.stat_loggers.values():
17521754
logger.log(stats)
17531755

17541756
def _get_stats(self,
17551757
scheduler_outputs: Optional[SchedulerOutputs],
17561758
model_output: Optional[List[SamplerOutput]] = None,
1757-
finished_before: Optional[List[int]] = None) -> Stats:
1759+
finished_before: Optional[List[int]] = None,
1760+
skip: Optional[List[int]] = None) -> Stats:
17581761
"""Get Stats to be Logged to Prometheus.
17591762
17601763
Args:
17611764
scheduler_outputs: Optional, used to populate metrics related to
17621765
the scheduled batch,
17631766
model_output: Optional, used to emit speculative decoding metrics
17641767
which are created by the workers.
1768+
finished_before: Optional, indices of sequences that were finished
1769+
before. These sequences will be ignored.
1770+
skip: Optional, indices of sequences that were preempted. These
1771+
sequences will be ignored.
17651772
"""
17661773
now = time.time()
17671774

@@ -1836,6 +1843,11 @@ def _get_stats(self,
18361843
actual_num_batched_tokens -= 1
18371844
continue
18381845

1846+
# Currently, skip == preempted sequences, so we need to skip
1847+
# their log stats
1848+
if skip and idx in skip:
1849+
continue
1850+
18391851
group_was_prefill = idx < scheduler_outputs.num_prefill_groups
18401852
seq_group = scheduled_seq_group.seq_group
18411853

0 commit comments

Comments
 (0)