Skip to content

Commit cc74b97

Browse files
authored
[Bugfix][V1] Fix deepseek with v1 (#958)
### What this PR does / why we need it? Fix deepseek with v1, this error is introdeced by #945. and this pr fix the block table of mla ### How was this patch tested? CI passed with new addedtest. Signed-off-by: Mengqing Cao <cmq0113@163.com>
1 parent e3c7f71 commit cc74b97

File tree

2 files changed

+2
-7
lines changed

2 files changed

+2
-7
lines changed

tests/multicard/test_offline_inference_distributed.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
"""
2323
import os
2424

25-
import pytest
2625
import vllm # noqa: F401
2726

2827
from tests.conftest import VllmRunner
@@ -47,8 +46,6 @@ def test_models_distributed_QwQ():
4746
vllm_model.generate_greedy(example_prompts, max_tokens)
4847

4948

50-
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "1",
51-
reason="deepseek v2 lite is not supported on v1")
5249
def test_models_distributed_DeepSeek():
5350
example_prompts = [
5451
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",

vllm_ascend/attention/mla_v1.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,8 @@ def build(self,
239239
# it blocks on all previous kernels.
240240
device = self.runner.device
241241

242-
block_table = self.runner.input_batch.block_table[0].get_device_tensor(
243-
)
244-
block_table[:num_reqs, :self.runner.max_num_blocks_per_req] = (
245-
block_table[:num_reqs])
242+
block_table = (self.runner.input_batch.block_table[0].
243+
get_device_tensor()[:num_reqs])
246244
slot_mapping = self.runner.slot_mapping_cpu[:num_actual_tokens].to(
247245
device, non_blocking=True)
248246
input_positions = self.runner.positions_cpu[:num_actual_tokens].to(

0 commit comments

Comments
 (0)