Skip to content

[Model] Support NVLM-D and fix QK Norm in InternViT #9045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
698921d
Support NVLM-D
DarkLight1337 Oct 3, 2024
33f3a50
Fix wrong module
DarkLight1337 Oct 3, 2024
20ebb75
Avoid warning when loading config
DarkLight1337 Oct 3, 2024
de39406
Fix `mlp1` loading
DarkLight1337 Oct 3, 2024
fe3ba5b
Fix model loading
DarkLight1337 Oct 3, 2024
92454b8
Use NVLM-specific modules
DarkLight1337 Oct 3, 2024
efb8f26
Load the correct vision model
DarkLight1337 Oct 3, 2024
26f4496
Adopt the original version of RMSNorm which uses custom variance
DarkLight1337 Oct 3, 2024
d76ef50
Remove extra transpose
DarkLight1337 Oct 3, 2024
dead63e
Simplify code
DarkLight1337 Oct 4, 2024
f0d3003
Remove unused code
DarkLight1337 Oct 4, 2024
72a71d5
Update input processing
DarkLight1337 Oct 4, 2024
15eb917
Merge branch 'main' into nvlm_d
DarkLight1337 Oct 4, 2024
1a8fd37
Format
DarkLight1337 Oct 4, 2024
bfd910a
Fix and abstract input pipeline
DarkLight1337 Oct 4, 2024
fc710a1
Add support for online serving
DarkLight1337 Oct 4, 2024
6d38309
Fix wrong embeddings
DarkLight1337 Oct 4, 2024
3a89f90
Fix docs
DarkLight1337 Oct 4, 2024
83d54f2
Update examples
DarkLight1337 Oct 4, 2024
3144be2
Fix incorrect head size
DarkLight1337 Oct 4, 2024
43d546c
Add sanity checks
DarkLight1337 Oct 4, 2024
2ec7fc1
Fix parallel attention not being used correctly
DarkLight1337 Oct 4, 2024
6a6f477
Merge branch 'main' into nvlm_d
DarkLight1337 Oct 4, 2024
b5ea51b
Merge branch 'main' into nvlm_d
ywang96 Oct 6, 2024
49e3dad
fix qk norm for paralleled VIT attention
ywang96 Oct 7, 2024
759e749
clean up
ywang96 Oct 7, 2024
5c2d303
add comment
ywang96 Oct 7, 2024
6d54d59
update nvlm-d multi-image
ywang96 Oct 7, 2024
47ed7e1
Merge branch 'main' into nvlm_d
DarkLight1337 Oct 7, 2024
1b57db8
Add header to docs
DarkLight1337 Oct 7, 2024
7728064
Consolidate code
DarkLight1337 Oct 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/source/models/supported_models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ Multimodal Language Models
- Image
- :code:`meta-llama/Llama-3.2-90B-Vision-Instruct`, :code:`meta-llama/Llama-3.2-11B-Vision`, etc.
-
* - :code:`NVLM_D`
- NVLM-D 1.0
- Image\ :sup:`E+`
- :code:`nvidia/NVLM-D-72B`, etc.
-
* - :code:`PaliGemmaForConditionalGeneration`
- PaliGemma
- Image\ :sup:`E`
Expand Down
36 changes: 14 additions & 22 deletions vllm/model_executor/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@
"PhiMoEForCausalLM": ("phimoe", "PhiMoEForCausalLM"),
"Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
"Qwen2MoeForCausalLM": ("qwen2_moe", "Qwen2MoeForCausalLM"),
"Qwen2VLForConditionalGeneration":
("qwen2_vl", "Qwen2VLForConditionalGeneration"),
Copy link
Member Author

@DarkLight1337 DarkLight1337 Oct 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is redundant as it's already in multimodal models, so I'm removing this.

"RWForCausalLM": ("falcon", "FalconForCausalLM"),
"StableLMEpochForCausalLM": ("stablelm", "StablelmForCausalLM"),
"StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
Expand All @@ -79,34 +77,28 @@
"Qwen2ForRewardModel": ("qwen2_rm", "Qwen2ForRewardModel"),
}

# yapf: disable
_MULTIMODAL_MODELS = {
"Blip2ForConditionalGeneration":
("blip2", "Blip2ForConditionalGeneration"),
"ChameleonForConditionalGeneration":
("chameleon", "ChameleonForConditionalGeneration"),
"Blip2ForConditionalGeneration": ("blip2", "Blip2ForConditionalGeneration"),
"ChameleonForConditionalGeneration": ("chameleon", "ChameleonForConditionalGeneration"), # noqa: E501
"FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"),
"InternVLChatModel": ("internvl", "InternVLChatModel"),
"LlavaForConditionalGeneration": ("llava",
"LlavaForConditionalGeneration"),
"LlavaNextForConditionalGeneration": ("llava_next",
"LlavaNextForConditionalGeneration"),
"LlavaNextVideoForConditionalGeneration":
("llava_next_video", "LlavaNextVideoForConditionalGeneration"),
"LlavaOnevisionForConditionalGeneration":
("llava_onevision", "LlavaOnevisionForConditionalGeneration"),
"LlavaForConditionalGeneration": ("llava", "LlavaForConditionalGeneration"),
"LlavaNextForConditionalGeneration": ("llava_next", "LlavaNextForConditionalGeneration"), # noqa: E501
"LlavaNextVideoForConditionalGeneration": ("llava_next_video", "LlavaNextVideoForConditionalGeneration"), # noqa: E501
"LlavaOnevisionForConditionalGeneration": ("llava_onevision", "LlavaOnevisionForConditionalGeneration"), # noqa: E501
"MiniCPMV": ("minicpmv", "MiniCPMV"),
"PaliGemmaForConditionalGeneration": ("paligemma",
"PaliGemmaForConditionalGeneration"),
"MllamaForConditionalGeneration": ("mllama", "MllamaForConditionalGeneration"), # noqa: E501
"NVLM_D": ("nvlm_d", "InternVLChatModel"),
"PaliGemmaForConditionalGeneration": ("paligemma", "PaliGemmaForConditionalGeneration"), # noqa: E501
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
"PixtralForConditionalGeneration": ("pixtral",
"PixtralForConditionalGeneration"),
"PixtralForConditionalGeneration": ("pixtral", "PixtralForConditionalGeneration"), # noqa: E501
"QWenLMHeadModel": ("qwen", "QWenLMHeadModel"),
"Qwen2VLForConditionalGeneration": ("qwen2_vl",
"Qwen2VLForConditionalGeneration"),
"Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501
"UltravoxModel": ("ultravox", "UltravoxModel"),
"MllamaForConditionalGeneration": ("mllama",
"MllamaForConditionalGeneration"),
}
# yapf: enable
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resorted the list in alphabetical order, and enforced one line per model to be more readable.


_CONDITIONAL_GENERATION_MODELS = {
"BartModel": ("bart", "BartForConditionalGeneration"),
"BartForConditionalGeneration": ("bart", "BartForConditionalGeneration"),
Expand Down
1 change: 1 addition & 0 deletions vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"exaone": ExaoneConfig,
"internvl_chat": InternVLChatConfig,
"nemotron": NemotronConfig,
"NVLM_D": InternVLChatConfig,
"solar": SolarConfig,
"ultravox": UltravoxConfig,
"qwen2_vl": Qwen2VLConfig,
Expand Down
Loading