Skip to content

Commit 3d49776

Browse files
authored
[Model][LoRA]LoRA support added for MiniCPMV2.5 (#7199)
1 parent bc2ef1f commit 3d49776

File tree

8 files changed

+378
-31
lines changed

8 files changed

+378
-31
lines changed

tests/lora/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ def baichuan_zero_lora_files():
194194
return snapshot_download(repo_id="jeeejeee/baichuan7b-zero-init")
195195

196196

197+
@pytest.fixture(scope="session")
198+
def minicpmv_lora_files():
199+
return snapshot_download(repo_id="jeeejeee/minicpmv25-lora-pokemon")
200+
201+
197202
@pytest.fixture(scope="session")
198203
def tinyllama_lora_files():
199204
return snapshot_download(repo_id="jashing/tinyllama-colorist-lora")

tests/lora/test_minicpmv.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from typing import List
2+
3+
import vllm
4+
from vllm.assets.image import ImageAsset
5+
from vllm.lora.request import LoRARequest
6+
7+
MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
8+
9+
PROMPT_TEMPLATE = (
10+
"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n"
11+
"(<image>./</image>)\nWhat is in the image?<|eot_id|>"
12+
"<|start_header_id|>assistant<|end_header_id|>\n\n")
13+
14+
IMAGE_ASSETS = [
15+
ImageAsset("stop_sign"),
16+
ImageAsset("cherry_blossom"),
17+
]
18+
19+
# After fine-tuning with LoRA, all generated content should start begin `A`.
20+
EXPECTED_OUTPUT = [
21+
"A red and white stop sign with a Chinese archway in the background featuring red lanterns and gold accents.", # noqa: E501
22+
"A pink cherry blossom tree with a blue sky in the background.",
23+
]
24+
25+
26+
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
27+
sampling_params = vllm.SamplingParams(
28+
temperature=0,
29+
max_tokens=5,
30+
stop_token_ids=[128001, 128009], # eos_id, eot_id
31+
)
32+
33+
inputs = [{
34+
"prompt": PROMPT_TEMPLATE,
35+
"multi_modal_data": {
36+
"image": asset.pil_image
37+
},
38+
} for asset in IMAGE_ASSETS]
39+
40+
outputs = llm.generate(
41+
inputs,
42+
sampling_params,
43+
lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
44+
if lora_id else None,
45+
)
46+
# Print the outputs.
47+
generated_texts: List[str] = []
48+
for output in outputs:
49+
prompt = output.prompt
50+
generated_text = output.outputs[0].text.strip()
51+
generated_texts.append(generated_text)
52+
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
53+
return generated_texts
54+
55+
56+
def test_minicpmv_lora(minicpmv_lora_files):
57+
llm = vllm.LLM(
58+
MODEL_PATH,
59+
max_num_seqs=2,
60+
enable_lora=True,
61+
max_loras=4,
62+
max_lora_rank=64,
63+
trust_remote_code=True,
64+
)
65+
66+
output1 = do_sample(llm, minicpmv_lora_files, lora_id=1)
67+
for i in range(len(EXPECTED_OUTPUT)):
68+
assert EXPECTED_OUTPUT[i].startswith(output1[i])
69+
output2 = do_sample(llm, minicpmv_lora_files, lora_id=2)
70+
for i in range(len(EXPECTED_OUTPUT)):
71+
assert EXPECTED_OUTPUT[i].startswith(output2[i])

tests/lora/test_minicpmv_tp.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from typing import List
2+
3+
import pytest
4+
5+
import vllm
6+
from vllm.assets.image import ImageAsset
7+
from vllm.lora.request import LoRARequest
8+
9+
from ..utils import multi_gpu_test
10+
11+
MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
12+
13+
PROMPT_TEMPLATE = (
14+
"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n"
15+
"(<image>./</image>)\nWhat is in the image?<|eot_id|>"
16+
"<|start_header_id|>assistant<|end_header_id|>\n\n")
17+
18+
IMAGE_ASSETS = [
19+
ImageAsset("stop_sign"),
20+
ImageAsset("cherry_blossom"),
21+
]
22+
23+
# After fine-tuning with LoRA, all generated content should start begin `A`.
24+
EXPECTED_OUTPUT = [
25+
"A red and white stop sign with a Chinese archway in the background featuring red lanterns and gold accents.", # noqa: E501
26+
"A pink cherry blossom tree with a blue sky in the background.",
27+
]
28+
29+
30+
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
31+
sampling_params = vllm.SamplingParams(
32+
temperature=0,
33+
max_tokens=5,
34+
stop_token_ids=[128001, 128009], # eos_id, eot_id
35+
)
36+
37+
inputs = [{
38+
"prompt": PROMPT_TEMPLATE,
39+
"multi_modal_data": {
40+
"image": asset.pil_image
41+
},
42+
} for asset in IMAGE_ASSETS]
43+
44+
outputs = llm.generate(
45+
inputs,
46+
sampling_params,
47+
lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
48+
if lora_id else None,
49+
)
50+
# Print the outputs.
51+
generated_texts: List[str] = []
52+
for output in outputs:
53+
prompt = output.prompt
54+
generated_text = output.outputs[0].text.strip()
55+
generated_texts.append(generated_text)
56+
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
57+
return generated_texts
58+
59+
60+
@multi_gpu_test(num_gpus=2)
61+
@pytest.mark.parametrize("fully_sharded", [True, False])
62+
def test_minicpmv_tp2(minicpmv_lora_files, fully_sharded):
63+
llm = vllm.LLM(
64+
MODEL_PATH,
65+
enable_lora=True,
66+
max_num_seqs=2,
67+
max_loras=4,
68+
max_lora_rank=64,
69+
tensor_parallel_size=2,
70+
trust_remote_code=True,
71+
fully_sharded_loras=fully_sharded,
72+
)
73+
74+
output_tp = do_sample(llm, minicpmv_lora_files, lora_id=1)
75+
76+
for i in range(len(EXPECTED_OUTPUT)):
77+
assert EXPECTED_OUTPUT[i].startswith(output_tp[i])
78+
79+
80+
@multi_gpu_test(num_gpus=4)
81+
@pytest.mark.parametrize("fully_sharded", [True, False])
82+
def test_minicpmv_tp4(minicpmv_lora_files, fully_sharded):
83+
llm = vllm.LLM(
84+
MODEL_PATH,
85+
enable_lora=True,
86+
max_num_seqs=2,
87+
max_loras=4,
88+
max_lora_rank=64,
89+
tensor_parallel_size=4,
90+
trust_remote_code=True,
91+
fully_sharded_loras=fully_sharded,
92+
)
93+
output_tp = do_sample(llm, minicpmv_lora_files, lora_id=1)
94+
for i in range(len(EXPECTED_OUTPUT)):
95+
assert EXPECTED_OUTPUT[i].startswith(output_tp[i])

vllm/lora/models.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
from vllm.lora.punica import PunicaWrapper
2525
from vllm.lora.utils import (from_layer, from_layer_logits_processor,
2626
parse_fine_tuned_lora_name, replace_submodule)
27-
from vllm.model_executor.models.interfaces import SupportsLoRA
27+
from vllm.model_executor.models.interfaces import (SupportsLoRA,
28+
supports_multimodal)
29+
from vllm.model_executor.models.module_mapping import MultiModelKeys
2830
from vllm.model_executor.models.utils import PPMissingLayer
2931
from vllm.utils import is_pin_memory_available
3032

@@ -332,6 +334,8 @@ def __init__(
332334
self.supported_lora_modules.append("rotary_emb")
333335
self.packed_modules_mapping = copy.deepcopy(
334336
self.model.packed_modules_mapping)
337+
# Used to indicate whether the model is a multimodal model
338+
self.supports_mm: bool = supports_multimodal(self.model)
335339
self.packed_modules: Dict[str, List[str]] = {}
336340
self.modules: Dict[str, "BaseLayerWithLoRA"] = {}
337341
# Dict instead of a Set for compatibility with LRUCache.
@@ -437,12 +441,22 @@ def _create_lora_modules(self):
437441
continue
438442
if not self._match_target_modules(module_name):
439443
continue
444+
# A temporary approach for multimodal models to support LoRA
445+
# TODO: Remove this restriction
446+
if self._filter_unsupported_mm_module(module_name):
447+
logger.warning(
448+
"Regarding multimodal models, vLLM currently only supports "
449+
"adding LoRA to language model, %s will be ignored.",
450+
module_name,
451+
)
452+
continue
440453
parts = module_name.split(".")[-1]
441454
packed_moduled_lst = self.packed_modules_mapping.get(parts, [])
442455
new_module = replace_submodule(
443456
self.model, module_name,
444457
from_layer(module, self.lora_slots, self.lora_config,
445458
packed_moduled_lst, self.model.config))
459+
446460
# LinearScalingRotaryEmbeddingWithLora is used to handle
447461
# long context lora. Register relevant metadata.
448462
if isinstance(new_module, LinearScalingRotaryEmbeddingWithLora):
@@ -460,6 +474,15 @@ def _create_lora_modules(self):
460474
module, self.lora_slots,
461475
self.lora_config,
462476
self.model.config))
477+
478+
# In some models, especially multimodal ones, layers with the same
479+
# name may have different types, such as nn.Linear and
480+
# ReplicatedLinear. The nn.Linear layers cannot be replaced with
481+
# LoRA layers, leading to assertion error. The following check
482+
# aims to prevent this error
483+
if self.supports_mm and not isinstance(new_module,
484+
BaseLayerWithLoRA):
485+
continue
463486
self.register_module(module_name, new_module)
464487
self._register_packed_modules(module_name)
465488
# All lora layers share the same punica_wrapper based on reference.
@@ -478,9 +501,10 @@ def create_dummy_lora(
478501
"""Create zero-initialized LoRAModel for warmup."""
479502
model = LoRAModel(lora_id, rank, {}, scaling_factor)
480503
for module_name, module in self.model.named_modules():
481-
if not self._match_target_modules(module_name) or not isinstance(
482-
module, BaseLayerWithLoRA) or isinstance(
483-
module, LinearScalingRotaryEmbeddingWithLora):
504+
if (not self._match_target_modules(module_name)
505+
or not isinstance(module, BaseLayerWithLoRA)
506+
or isinstance(module, LinearScalingRotaryEmbeddingWithLora)
507+
or self._filter_unsupported_mm_module(module_name)):
484508
continue
485509
parts = module_name.split(".")
486510
if module_name not in self.packed_modules:
@@ -541,6 +565,19 @@ def _match_target_modules(self, module_name: str):
541565
module_name) or target_module == module_name
542566
for target_module in self.supported_lora_modules)
543567

568+
def _filter_unsupported_mm_module(self, module_name: str) -> bool:
569+
"""
570+
Regarding multimodal models, vLLM currently only supports adding LoRA to
571+
language model. LoRA for other modules, such as the vision tower, will
572+
be filtered out.
573+
"""
574+
if self.supports_mm:
575+
prefix = module_name.split(".")[0]
576+
module_mapping: MultiModelKeys = self.model.get_mm_mapping()
577+
return (prefix in module_mapping.connector
578+
or prefix in module_mapping.tower_model)
579+
return False
580+
544581
def _register_packed_modules(self, module_full_name: str) -> None:
545582
parts = module_full_name.split(".")
546583
module_name = parts[-1]

0 commit comments

Comments
 (0)