@@ -1435,7 +1435,8 @@ def _process_model_outputs(self,
1435
1435
# LLMEngine/AsyncLLMEngine directly
1436
1436
if is_async :
1437
1437
# Log stats.
1438
- self .do_log_stats (scheduler_outputs , outputs , finished_before )
1438
+ self .do_log_stats (scheduler_outputs , outputs , finished_before ,
1439
+ skip )
1439
1440
1440
1441
# Tracing
1441
1442
self .do_tracing (scheduler_outputs )
@@ -1742,18 +1743,20 @@ def remove_logger(self, logger_name: str) -> None:
1742
1743
def do_log_stats (self ,
1743
1744
scheduler_outputs : Optional [SchedulerOutputs ] = None ,
1744
1745
model_output : Optional [List [SamplerOutput ]] = None ,
1745
- finished_before : Optional [List [int ]] = None ) -> None :
1746
+ finished_before : Optional [List [int ]] = None ,
1747
+ skip : Optional [List [int ]] = None ) -> None :
1746
1748
"""Forced log when no requests active."""
1747
1749
if self .log_stats :
1748
1750
stats = self ._get_stats (scheduler_outputs , model_output ,
1749
- finished_before )
1751
+ finished_before , skip )
1750
1752
for logger in self .stat_loggers .values ():
1751
1753
logger .log (stats )
1752
1754
1753
1755
def _get_stats (self ,
1754
1756
scheduler_outputs : Optional [SchedulerOutputs ],
1755
1757
model_output : Optional [List [SamplerOutput ]] = None ,
1756
- finished_before : Optional [List [int ]] = None ) -> Stats :
1758
+ finished_before : Optional [List [int ]] = None ,
1759
+ skip : Optional [List [int ]] = None ) -> Stats :
1757
1760
"""Get Stats to be Logged to Prometheus.
1758
1761
1759
1762
Args:
@@ -1835,6 +1838,11 @@ def _get_stats(self,
1835
1838
actual_num_batched_tokens -= 1
1836
1839
continue
1837
1840
1841
+ # Currently, skip == preempted sequences, so we need to skip
1842
+ # their log stats
1843
+ if skip and idx in skip :
1844
+ continue
1845
+
1838
1846
group_was_prefill = idx < scheduler_outputs .num_prefill_groups
1839
1847
seq_group = scheduled_seq_group .seq_group
1840
1848
0 commit comments