Skip to content

Commit 4c5549e

Browse files
ShangmingCaiAlvant
authored andcommitted
[Misc][Speculative decoding] Typos and typing fixes (vllm-project#6467)
Co-authored-by: caishangming.csm <caishangming.csm@alibaba-inc.com> Signed-off-by: Alvant <alvasian@yandex.ru>
1 parent 3a3bdda commit 4c5549e

File tree

5 files changed

+7
-7
lines changed

5 files changed

+7
-7
lines changed

vllm/spec_decode/multi_step_worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def init_device(self) -> None:
4343
)
4444

4545
def set_include_gpu_probs_tensor(self) -> None:
46-
# Need include_gpu_probs_tensor for multi_step_worker
46+
# Need include_gpu_probs_tensor for MultiStepWorker
4747
self.model_runner.model.sampler.include_gpu_probs_tensor = True
4848

4949
@torch.inference_mode()

vllm/spec_decode/ngram_worker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
class NGramWorker(NonLLMProposerWorkerBase, LoraNotSupportedWorkerBase):
1414
"""NGramWorker provides a light drafter without need for model.
1515
16-
Current NGramWorker only implement prompt lookup decoding,
16+
Current NGramWorker only implements prompt lookup decoding,
1717
and in future we may also do RAG type drafter and other scenarios
1818
which don't rely on LLM model to give proposals.
1919
"""
@@ -37,7 +37,7 @@ def init_device(self):
3737
self.device = torch.device(f"cuda:{self.local_rank}")
3838
self.load_model = lambda *args, **kwargs: None
3939

40-
# Current only support Top1Proposer
40+
# Current NGramWorker only supports Top1Proposer
4141
self._proposer = Top1Proposer(
4242
weakref.proxy(self), # type: ignore[arg-type]
4343
device=self.device,

vllm/spec_decode/proposer_worker_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def sampler_output(
2424
) -> Tuple[Optional[List[SamplerOutput]], bool]:
2525
raise NotImplementedError
2626

27-
def set_include_gpu_probs_tensor(self):
27+
def set_include_gpu_probs_tensor(self) -> None:
2828
"""Implementation optional"""
2929
pass
3030

vllm/spec_decode/spec_decode_worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def __init__(
206206

207207
self.probs_dtype = self.spec_decode_sampler.probs_dtype
208208
self.token_id_dtype = self.spec_decode_sampler.token_id_dtype
209-
# Lazy initiazliation.
209+
# Lazy initialization.
210210
self.scorer: SpeculativeScorer
211211

212212
# Hidden states from target model to pass to proposer

vllm/spec_decode/top1_proposer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def _split_by_proposal_len(
138138

139139
# Currently only proposal lens of 0 or the global batch proposal len
140140
# are supported.
141-
# If max_proposal_len is defined, then we shall no exccess this
141+
# If max_proposal_len is defined, then we shall no exceed this
142142
# quota for nonzero_proposal
143143
new_k = 0
144144
if (self.max_proposal_len is None
@@ -219,7 +219,7 @@ def _merge_outputs(
219219
proposal_lens: List[int],
220220
nonzero_proposal_len_indices: List[int],
221221
sampler_transposed: bool,
222-
) -> Tuple[torch.Tensor, torch.tensor, torch.Tensor]:
222+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
223223
"""After speculations are produced, merge the speculation results with
224224
the skipped sequences.
225225
"""

0 commit comments

Comments
 (0)