From a2d6091f2f5beac6157413726de8113cb4808258 Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Sun, 29 Sep 2024 23:58:17 +0000 Subject: [PATCH] revert updates to num_computed_tokens when sampling --- vllm/engine/llm_engine.py | 1 - vllm/engine/output_processor/multi_step.py | 1 - 2 files changed, 2 deletions(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index e3cd822f648..98c0e9eec27 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1260,7 +1260,6 @@ def _advance_to_next_step( assert len(seq_group.seqs) == 1 seq = seq_group.seqs[0] seq.append_token_id(sample.output_token, sample.logprobs) - seq_group.update_num_computed_tokens(1) def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]: """Performs one decoding iteration and returns newly generated results. diff --git a/vllm/engine/output_processor/multi_step.py b/vllm/engine/output_processor/multi_step.py index 6dac3619580..e1aae841934 100644 --- a/vllm/engine/output_processor/multi_step.py +++ b/vllm/engine/output_processor/multi_step.py @@ -173,7 +173,6 @@ def _process_seq_outputs(self, seq: Sequence, token_id=output_token_id, logprobs=output_logprob, ) - seq.data.update_num_computed_tokens(1) self._process_decode_and_stop(seq, sampling_params)