Skip to content

Commit 5c6a184

Browse files
committed
update
1 parent 651463b commit 5c6a184

File tree

5 files changed

+13
-10
lines changed

5 files changed

+13
-10
lines changed

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -598,8 +598,6 @@
598598
|[Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2.5-VL-7B-Instruct-AWQ)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|✘|vision, video|[Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct-AWQ)|
599599
|[Qwen/Qwen2.5-VL-32B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2.5-VL-32B-Instruct-AWQ)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|✘|vision, video|[Qwen/Qwen2.5-VL-32B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-32B-Instruct-AWQ)|
600600
|[Qwen/Qwen2.5-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2.5-VL-72B-Instruct-AWQ)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|✘|vision, video|[Qwen/Qwen2.5-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct-AWQ)|
601-
|[XiaomiMiMo/MiMo-VL-7B-SFT](https://modelscope.cn/models/XiaomiMiMo/MiMo-VL-7B-SFT)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|✘|vision, video|[XiaomiMiMo/MiMo-VL-7B-SFT](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-SFT)|
602-
|[XiaomiMiMo/MiMo-VL-7B-RL](https://modelscope.cn/models/XiaomiMiMo/MiMo-VL-7B-RL)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|✘|vision, video|[XiaomiMiMo/MiMo-VL-7B-RL](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-RL)|
603601
|[Qwen/Qwen2.5-Omni-3B](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-3B)|qwen2_5_omni|qwen2_5_omni|transformers>=4.50, soundfile, qwen_omni_utils, decord|✘|vision, video, audio|[Qwen/Qwen2.5-Omni-3B](https://huggingface.co/Qwen/Qwen2.5-Omni-3B)|
604602
|[Qwen/Qwen2.5-Omni-7B](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-7B)|qwen2_5_omni|qwen2_5_omni|transformers>=4.50, soundfile, qwen_omni_utils, decord|✘|vision, video, audio|[Qwen/Qwen2.5-Omni-7B](https://huggingface.co/Qwen/Qwen2.5-Omni-7B)|
605603
|[Qwen/Qwen2-Audio-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B-Instruct)|qwen2_audio|qwen2_audio|transformers>=4.45,<4.49, librosa|&#x2718;|audio|[Qwen/Qwen2-Audio-7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct)|
@@ -617,6 +615,8 @@
617615
|[AIDC-AI/Ovis2-8B](https://modelscope.cn/models/AIDC-AI/Ovis2-8B)|ovis2|ovis2|transformers>=4.46.2, moviepy<2|&#x2718;|vision|[AIDC-AI/Ovis2-8B](https://huggingface.co/AIDC-AI/Ovis2-8B)|
618616
|[AIDC-AI/Ovis2-16B](https://modelscope.cn/models/AIDC-AI/Ovis2-16B)|ovis2|ovis2|transformers>=4.46.2, moviepy<2|&#x2718;|vision|[AIDC-AI/Ovis2-16B](https://huggingface.co/AIDC-AI/Ovis2-16B)|
619617
|[AIDC-AI/Ovis2-34B](https://modelscope.cn/models/AIDC-AI/Ovis2-34B)|ovis2|ovis2|transformers>=4.46.2, moviepy<2|&#x2718;|vision|[AIDC-AI/Ovis2-34B](https://huggingface.co/AIDC-AI/Ovis2-34B)|
618+
|[XiaomiMiMo/MiMo-VL-7B-SFT](https://modelscope.cn/models/XiaomiMiMo/MiMo-VL-7B-SFT)|mimo_vl|mimo_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|&#x2718;|vision, video|[XiaomiMiMo/MiMo-VL-7B-SFT](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-SFT)|
619+
|[XiaomiMiMo/MiMo-VL-7B-RL](https://modelscope.cn/models/XiaomiMiMo/MiMo-VL-7B-RL)|mimo_vl|mimo_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|&#x2718;|vision, video|[XiaomiMiMo/MiMo-VL-7B-RL](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-RL)|
620620
|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)|glm4v|glm4v|transformers>=4.42,<4.45|&#x2718;|-|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
621621
|[ZhipuAI/cogagent-9b-20241220](https://modelscope.cn/models/ZhipuAI/cogagent-9b-20241220)|glm4v|glm4v|transformers>=4.42|&#x2718;|-|[THUDM/cogagent-9b-20241220](https://huggingface.co/THUDM/cogagent-9b-20241220)|
622622
|[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b)|glm_edge_v|glm_edge_v|transformers>=4.46|&#x2718;|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)|

docs/source_en/Instruction/Supported-models-and-datasets.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -598,8 +598,6 @@ The table below introduces the models integrated with ms-swift:
598598
|[Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2.5-VL-7B-Instruct-AWQ)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|&#x2718;|vision, video|[Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct-AWQ)|
599599
|[Qwen/Qwen2.5-VL-32B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2.5-VL-32B-Instruct-AWQ)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|&#x2718;|vision, video|[Qwen/Qwen2.5-VL-32B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-32B-Instruct-AWQ)|
600600
|[Qwen/Qwen2.5-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2.5-VL-72B-Instruct-AWQ)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|&#x2718;|vision, video|[Qwen/Qwen2.5-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct-AWQ)|
601-
|[XiaomiMiMo/MiMo-VL-7B-SFT](https://modelscope.cn/models/XiaomiMiMo/MiMo-VL-7B-SFT)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|&#x2718;|vision, video|[XiaomiMiMo/MiMo-VL-7B-SFT](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-SFT)|
602-
|[XiaomiMiMo/MiMo-VL-7B-RL](https://modelscope.cn/models/XiaomiMiMo/MiMo-VL-7B-RL)|qwen2_5_vl|qwen2_5_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|&#x2718;|vision, video|[XiaomiMiMo/MiMo-VL-7B-RL](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-RL)|
603601
|[Qwen/Qwen2.5-Omni-3B](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-3B)|qwen2_5_omni|qwen2_5_omni|transformers>=4.50, soundfile, qwen_omni_utils, decord|&#x2718;|vision, video, audio|[Qwen/Qwen2.5-Omni-3B](https://huggingface.co/Qwen/Qwen2.5-Omni-3B)|
604602
|[Qwen/Qwen2.5-Omni-7B](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-7B)|qwen2_5_omni|qwen2_5_omni|transformers>=4.50, soundfile, qwen_omni_utils, decord|&#x2718;|vision, video, audio|[Qwen/Qwen2.5-Omni-7B](https://huggingface.co/Qwen/Qwen2.5-Omni-7B)|
605603
|[Qwen/Qwen2-Audio-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B-Instruct)|qwen2_audio|qwen2_audio|transformers>=4.45,<4.49, librosa|&#x2718;|audio|[Qwen/Qwen2-Audio-7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct)|
@@ -617,6 +615,8 @@ The table below introduces the models integrated with ms-swift:
617615
|[AIDC-AI/Ovis2-8B](https://modelscope.cn/models/AIDC-AI/Ovis2-8B)|ovis2|ovis2|transformers>=4.46.2, moviepy<2|&#x2718;|vision|[AIDC-AI/Ovis2-8B](https://huggingface.co/AIDC-AI/Ovis2-8B)|
618616
|[AIDC-AI/Ovis2-16B](https://modelscope.cn/models/AIDC-AI/Ovis2-16B)|ovis2|ovis2|transformers>=4.46.2, moviepy<2|&#x2718;|vision|[AIDC-AI/Ovis2-16B](https://huggingface.co/AIDC-AI/Ovis2-16B)|
619617
|[AIDC-AI/Ovis2-34B](https://modelscope.cn/models/AIDC-AI/Ovis2-34B)|ovis2|ovis2|transformers>=4.46.2, moviepy<2|&#x2718;|vision|[AIDC-AI/Ovis2-34B](https://huggingface.co/AIDC-AI/Ovis2-34B)|
618+
|[XiaomiMiMo/MiMo-VL-7B-SFT](https://modelscope.cn/models/XiaomiMiMo/MiMo-VL-7B-SFT)|mimo_vl|mimo_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|&#x2718;|vision, video|[XiaomiMiMo/MiMo-VL-7B-SFT](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-SFT)|
619+
|[XiaomiMiMo/MiMo-VL-7B-RL](https://modelscope.cn/models/XiaomiMiMo/MiMo-VL-7B-RL)|mimo_vl|mimo_vl|transformers>=4.49, qwen_vl_utils>=0.0.6, decord|&#x2718;|vision, video|[XiaomiMiMo/MiMo-VL-7B-RL](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-RL)|
620620
|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)|glm4v|glm4v|transformers>=4.42,<4.45|&#x2718;|-|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
621621
|[ZhipuAI/cogagent-9b-20241220](https://modelscope.cn/models/ZhipuAI/cogagent-9b-20241220)|glm4v|glm4v|transformers>=4.42|&#x2718;|-|[THUDM/cogagent-9b-20241220](https://huggingface.co/THUDM/cogagent-9b-20241220)|
622622
|[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b)|glm_edge_v|glm_edge_v|transformers>=4.46|&#x2718;|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)|

swift/llm/model/model/qwen.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,6 @@ def get_model_tokenizer_qwen2_5_vl(*args, **kwargs):
701701
tags=['vision', 'video']))
702702

703703

704-
705704
def get_model_tokenizer_qwen2_5_omni(model_dir, *args, **kwargs):
706705
from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor, Qwen2_5OmniConfig
707706
from qwen_omni_utils import vision_process

swift/llm/template/template/qwen.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,8 +397,12 @@ class Qwen2_5VLTemplate(Qwen2VLTemplate):
397397

398398
register_template(QwenTemplateMeta(MLLMTemplateType.qwen2_5_vl, template_cls=Qwen2_5VLTemplate))
399399

400-
register_template(QwenTemplateMeta(MLLMTemplateType.mimo_vl, template_cls=Qwen2_5VLTemplate,
401-
default_system='You are MiMo, an AI assistant developed by Xiaomi.'))
400+
register_template(
401+
QwenTemplateMeta(
402+
MLLMTemplateType.mimo_vl,
403+
template_cls=Qwen2_5VLTemplate,
404+
default_system='You are MiMo, an AI assistant developed by Xiaomi.'))
405+
402406

403407
class Qwen2_5OmniTemplate(Qwen2_5VLTemplate):
404408
version = 'omni'

tests/test_align/test_template/test_llm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ def test_gemma3():
390390

391391

392392
def test_mimo():
393-
pt_engine = PtEngine('XiaomiMiMo/MiMo-7B-SFT')
393+
pt_engine = PtEngine('XiaomiMiMo/MiMo-7B-RL-0530')
394394
res = _infer_model(pt_engine)
395395
pt_engine.default_template.template_backend = 'jinja'
396396
res2 = _infer_model(pt_engine)
@@ -426,7 +426,7 @@ def test_mimo():
426426
# test_phi4_mini()
427427
# test_internlm3()
428428
# test_deepseek_r1_distill()
429-
test_deepseek_prover_v2()
429+
# test_deepseek_prover_v2()
430430
# test_qwen2_5_prm()
431431
# test_mistral_small()
432432
# test_baichuan_m1()
@@ -435,4 +435,4 @@ def test_mimo():
435435
# test_gemma3()
436436
# test_glm4_0414()
437437
# test_qwen3()
438-
# test_mimo()
438+
test_mimo()

0 commit comments

Comments
 (0)