Skip to content

Commit 8785c7d

Browse files
garg-amitLeiWang1999
authored andcommitted
[Bugfix] Fix Phi3.5 mini and MoE LoRA inference (vllm-project#8571)
Signed-off-by: LeiWang1999 <leiwang1999@outlook.com>
1 parent 4f5c32e commit 8785c7d

File tree

3 files changed

+22
-1
lines changed

3 files changed

+22
-1
lines changed

vllm/model_executor/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"OrionForCausalLM": ("orion", "OrionForCausalLM"),
5151
"PersimmonForCausalLM": ("persimmon", "PersimmonForCausalLM"),
5252
"PhiForCausalLM": ("phi", "PhiForCausalLM"),
53-
"Phi3ForCausalLM": ("llama", "LlamaForCausalLM"),
53+
"Phi3ForCausalLM": ("phi3", "Phi3ForCausalLM"),
5454
"PhiMoEForCausalLM": ("phimoe", "PhiMoEForCausalLM"),
5555
"Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
5656
"Qwen2MoeForCausalLM": ("qwen2_moe", "Qwen2MoeForCausalLM"),

vllm/model_executor/models/phi3.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# coding=utf-8
2+
# Adapted from llama.py
3+
"""Inference-only Phi3 model code inherit from Llama.py"""
4+
5+
from vllm.model_executor.models.llama import LlamaForCausalLM
6+
7+
8+
class Phi3ForCausalLM(LlamaForCausalLM):
9+
10+
packed_modules_mapping = {
11+
"qkv_proj": [
12+
"qkv_proj",
13+
],
14+
"gate_up_proj": [
15+
"gate_up_proj",
16+
],
17+
}

vllm/model_executor/models/phimoe.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,10 @@ class PhiMoEForCausalLM(nn.Module, SupportsLoRA):
491491
"o_proj",
492492
"embed_tokens",
493493
"lm_head",
494+
"w1",
495+
"w2",
496+
"w3",
497+
"gate",
494498
]
495499
embedding_modules = {
496500
"embed_tokens": "input_embeddings",

0 commit comments

Comments
 (0)