Skip to content

Commit 2a888db

Browse files
committed
Pixtral example
1 parent 16cd5ef commit 2a888db

File tree

4 files changed

+142
-82
lines changed

4 files changed

+142
-82
lines changed

examples/multimodal_pixtral.py

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import sys, os
2+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
3+
4+
from exllamav2 import (
5+
ExLlamaV2,
6+
ExLlamaV2Config,
7+
ExLlamaV2Cache,
8+
ExLlamaV2Tokenizer,
9+
ExLlamaV2VisionTower,
10+
)
11+
12+
from exllamav2.generator import (
13+
ExLlamaV2DynamicGenerator,
14+
ExLlamaV2DynamicJob,
15+
ExLlamaV2Sampler,
16+
)
17+
18+
from PIL import Image
19+
import requests
20+
21+
# Model used:
22+
#
23+
# Quantized: https://huggingface.co/turboderp/pixtral-12b-exl2
24+
# Unquantized: https://huggingface.co/mistral-community/pixtral-12b/
25+
26+
model_directory = "/mnt/str/models/pixtral-12b-exl2/6.0bpw"
27+
config = ExLlamaV2Config(model_directory)
28+
config.max_seq_len = 16384 # default is 1M
29+
30+
# Load vision model and multimodal projector and initialize preprocessor
31+
32+
vision_model = ExLlamaV2VisionTower(config)
33+
vision_model.load(progress = True)
34+
35+
# Load EXL2 model
36+
37+
model = ExLlamaV2(config)
38+
cache = ExLlamaV2Cache(model, lazy = True, max_seq_len = 16384)
39+
model.load_autosplit(cache, progress = True)
40+
tokenizer = ExLlamaV2Tokenizer(config)
41+
42+
# Create generator
43+
44+
generator = ExLlamaV2DynamicGenerator(
45+
model = model,
46+
cache = cache,
47+
tokenizer = tokenizer
48+
)
49+
50+
# Util function to get a PIL image from a URL or from a file in the script's directory
51+
52+
def get_image(file = None, url = None):
53+
assert (file or url) and not (file and url)
54+
if file:
55+
script_dir = os.path.dirname(os.path.abspath(__file__))
56+
file_path = os.path.join(script_dir, file)
57+
return Image.open(file_path)
58+
elif url:
59+
return Image.open(requests.get(url, stream = True).raw)
60+
61+
# Convert image(s) to embeddings
62+
63+
image_embeddings = [
64+
vision_model.get_image_embeddings(
65+
model = model,
66+
tokenizer = tokenizer,
67+
image = img,
68+
text_alias = alias,
69+
)
70+
for (alias, img) in [
71+
("{{IMAGE_1}}", get_image(file = "test_image_1.jpg")),
72+
("{{IMAGE_2}}", get_image(file = "test_image_2.jpg")),
73+
# ("{{IMAGE_3}}", get_image(url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRSERy82bn3jpYKr1cNxMLXTyEsVvSt2wZOIQ&s")),
74+
]
75+
]
76+
77+
# Define a prompt using the aliases above as placeholders for image tokens. The tokenizer will replace each alias
78+
# with a range of temporary token IDs, and the model will embed those temporary IDs from their respective sources
79+
# rather than the model's text embedding table.
80+
#
81+
# The temporary IDs are unique for the lifetime of the process and persist as long as a reference is held to the
82+
# corresponding ExLlamaV2Embedding object. This way, images can be reused between generations, or used multiple
83+
# for multiple jobs in a batch, and the generator will be able to apply prompt caching and deduplication to image
84+
# tokens as well as text tokens.
85+
#
86+
# Image token IDs are assigned sequentially, however, so two ExLlamaV2Embedding objects created from the same
87+
# source image will not be recognized as the same image for purposes of prompt caching etc.
88+
89+
prompt = "[INST]{{IMAGE_1}}{{IMAGE_2}}\n" + \
90+
"What are the similarities and differences between these two experiments?[/INST]"
91+
92+
# Generate
93+
94+
streaming = True
95+
greedy = True
96+
97+
if streaming:
98+
99+
input_ids = tokenizer.encode(
100+
prompt,
101+
add_bos = True,
102+
encode_special_tokens = True,
103+
embeddings = image_embeddings,
104+
)
105+
106+
job = ExLlamaV2DynamicJob(
107+
input_ids = input_ids,
108+
max_new_tokens = 500,
109+
decode_special_tokens = True,
110+
stop_conditions = [tokenizer.eos_token_id],
111+
gen_settings = ExLlamaV2Sampler.Settings.greedy() if greedy else None,
112+
embeddings = image_embeddings,
113+
)
114+
115+
generator.enqueue(job)
116+
117+
print()
118+
print(prompt, end = ""); sys.stdout.flush()
119+
120+
eos = False
121+
while generator.num_remaining_jobs():
122+
results = generator.iterate()
123+
for result in results:
124+
text = result.get("text", "")
125+
print(text, end = ""); sys.stdout.flush()
126+
127+
print()
128+
129+
else:
130+
131+
output = generator.generate(
132+
prompt = prompt,
133+
max_new_tokens = 500,
134+
add_bos = True,
135+
encode_special_tokens = True,
136+
decode_special_tokens = True,
137+
stop_conditions = [tokenizer.eos_token_id],
138+
gen_settings = ExLlamaV2Sampler.Settings.greedy() if greedy else None,
139+
embeddings = image_embeddings,
140+
)
141+
142+
print(output)
File renamed without changes.
File renamed without changes.

experimental/multimodal_pixtral_hf.py

-82
This file was deleted.

0 commit comments

Comments
 (0)