From 7fd38bf90f82cbd74aca2c6d04ba37937bf50a21 Mon Sep 17 00:00:00 2001
From: Mengqing Cao <cmq0113@163.com>
Date: Wed, 28 May 2025 22:35:14 +0800
Subject: [PATCH] [Bugfix][V1] Fix deepseek with v1

Signed-off-by: Mengqing Cao <cmq0113@163.com>
---
 tests/multicard/test_offline_inference_distributed.py | 3 ---
 vllm_ascend/attention/mla_v1.py                       | 6 ++----
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/tests/multicard/test_offline_inference_distributed.py b/tests/multicard/test_offline_inference_distributed.py
index f399ea652..941055cf7 100644
--- a/tests/multicard/test_offline_inference_distributed.py
+++ b/tests/multicard/test_offline_inference_distributed.py
@@ -22,7 +22,6 @@
 """
 import os
 
-import pytest
 import vllm  # noqa: F401
 
 from tests.conftest import VllmRunner
@@ -47,8 +46,6 @@ def test_models_distributed_QwQ():
         vllm_model.generate_greedy(example_prompts, max_tokens)
 
 
-@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "1",
-                    reason="deepseek v2 lite is not supported on v1")
 def test_models_distributed_DeepSeek():
     example_prompts = [
         "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py
index d39a1499f..054bd953c 100644
--- a/vllm_ascend/attention/mla_v1.py
+++ b/vllm_ascend/attention/mla_v1.py
@@ -239,10 +239,8 @@ def build(self,
         # it blocks on all previous kernels.
         device = self.runner.device
 
-        block_table = self.runner.input_batch.block_table[0].get_device_tensor(
-        )
-        block_table[:num_reqs, :self.runner.max_num_blocks_per_req] = (
-            block_table[:num_reqs])
+        block_table = (self.runner.input_batch.block_table[0].
+                       get_device_tensor()[:num_reqs])
         slot_mapping = self.runner.slot_mapping_cpu[:num_actual_tokens].to(
             device, non_blocking=True)
         input_positions = self.runner.positions_cpu[:num_actual_tokens].to(