Skip to content

Commit e0fdcd7

Browse files
author
yihuiwen
committed
rebase main
1 parent 0c116c4 commit e0fdcd7

File tree

3 files changed

+9
-16
lines changed

3 files changed

+9
-16
lines changed

lightllm/models/qwen2_5_vl/qwen2_5_visual.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from transformers import AutoProcessor
2121
from safetensors import safe_open
2222
from transformers.utils import TensorType
23+
from lightllm.server.multimodal_params import MultimodalParams, ImageItem
2324
from lightllm.models.qwen2_vl.qwen2_visual import PatchEmbed, VisionRotaryEmbedding
2425

2526
# adapted from
@@ -509,17 +510,17 @@ def load_model(self, weight_dir):
509510

510511
self.load_state_dict(weight_dict)
511512

512-
def encode(self, image_uuids: List):
513+
def encode(self, images: List[ImageItem]):
513514
img_tensors = []
514515
valid_ids = []
515516
valid_id = 0
516517
img_grids = []
517518
uuids = []
518519

519-
for i, url in enumerate(image_uuids):
520-
if isinstance(url, int):
521-
uuids.append(url)
522-
image_data = read_shm(get_shm_name_data(url))
520+
for i, img in enumerate(images):
521+
if isinstance(img, ImageItem):
522+
uuids.append(img.uuid)
523+
image_data = read_shm(get_shm_name_data(img.uuid))
523524
image_data = Image.open(BytesIO(image_data))
524525
image_data = get_image(image_data)
525526
image_inputs = self.processor.preprocess(images=image_data, return_tensors="pt")
@@ -528,7 +529,7 @@ def encode(self, image_uuids: List):
528529
img_tensors.append(pixel_values)
529530
img_grids.append(image_grid_thw)
530531
else:
531-
raise Exception("Unsupport input types: {} for {}".format(type(url), url))
532+
raise Exception("Unsupport input types: {} for {}".format(type(img), img))
532533

533534
# must devide merge_length
534535
cur_num = img_tensors[-1].shape[0] // (self.spatial_merge_size ** 2)

lightllm/server/router/model_infer/mode_backend/base_backend.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,7 @@ def init_model(self, kvargs):
177177
self.model = Qwen2RewardTpPartModel(model_kvargs)
178178
else:
179179
self.model = Qwen2TpPartModel(model_kvargs)
180-
elif self.model_type == "qwen2_vl":
181-
self.model = Qwen2VLTpPartModel(model_kvargs)
182-
self.is_multimodal = True
183-
elif self.model_type == "qwen2_5_vl":
180+
elif self.model_type in ["qwen2_vl", "qwen2_5_vl"]:
184181
self.model = Qwen2VLTpPartModel(model_kvargs)
185182
self.is_multimodal = True
186183
elif self.model_type == "gemma":

lightllm/server/tokenizer.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,7 @@ def get_tokenizer(
7979
tokenizer = LlavaTokenizer(tokenizer, model_cfg)
8080
elif model_type == "qwen" and "visual" in model_cfg:
8181
tokenizer = QWenVLTokenizer(tokenizer, model_cfg)
82-
elif model_type == "qwen2_vl" and "vision_config" in model_cfg:
83-
from transformers import AutoProcessor
84-
85-
image_processor = AutoProcessor.from_pretrained(tokenizer_name)
86-
tokenizer = QWen2VLTokenizer(tokenizer=tokenizer, image_processor=image_processor, model_cfg=model_cfg)
87-
elif model_type == "qwen2_5_vl" and "vision_config" in model_cfg:
82+
elif model_type in ["qwen2_vl", "qwen2_5_vl"] and "vision_config" in model_cfg:
8883
from transformers import AutoProcessor
8984

9085
image_processor = AutoProcessor.from_pretrained(tokenizer_name)

0 commit comments

Comments
 (0)