Skip to content

Commit bec13a6

Browse files
authored
Merge pull request #650 from macrocosm-os/staging
v2.17.7
2 parents 6f9dbb7 + 6aec8b0 commit bec13a6

18 files changed

+2009
-1803
lines changed

poetry.lock

+1,652-1,701
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

prompting/api/scoring/api.py

-2
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,7 @@ def get_task_scorer(request: Request):
5555
async def score_response(
5656
request: Request, api_key_data: dict = Depends(verify_scoring_signature), task_scorer=Depends(get_task_scorer)
5757
):
58-
logger.debug("Scoring Request received!!!!!!!!!!!!!!!!")
5958
model = None
60-
logger.debug("Setted Model to None")
6159
payload: dict[str, Any] = await request.json()
6260
logger.debug(f"Awaited body: {payload}")
6361
body = payload.get("body")

prompting/datasets/sn13.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,12 @@ def sample(self) -> ChatEntry:
3838
raise self.exception
3939
# Randomly select a sample from the dataset.
4040
messages = []
41-
for _ in range(4):
41+
for i in range(4):
4242
sample_idx = random.randint(0, len(self.dataset) - 1)
4343
if message := self.dataset[sample_idx]["text"]:
44-
messages.append({"role": random.choice(["user", "assistant"]), "content": message})
44+
if i % 2 == 0:
45+
messages.append({"role": "user", "content": message})
46+
else:
47+
messages.append({"role": "assistant", "content": message})
4548

4649
return ChatEntry(messages=messages, organic=False, source=self._url)

prompting/llms/hf_llm.py

+34-47
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,58 @@
11
import random
2+
from abc import abstractmethod
23

34
import numpy as np
4-
import torch
5-
from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, pipeline
5+
from loguru import logger
6+
7+
try:
8+
import torch
9+
except ImportError:
10+
logger.warning("torch is not installed. This module will not be available.")
611

712

813
class ReproducibleHF:
9-
def __init__(
10-
self,
11-
model_id: str = "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
12-
device: str = "cuda:0",
13-
sampling_params: dict[str, str | float | int | bool] | None = None,
14-
):
15-
"""Deterministic HuggingFace model."""
14+
def __init__(self, model_id: str, device: str, sampling_params: dict[str, str | float | int | bool] | None = None):
15+
self.model_id = model_id
1616
self._device = device
17-
self.sampling_params = {} if sampling_params is None else sampling_params
18-
self.model: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
19-
model_id,
20-
torch_dtype=torch.float16,
21-
low_cpu_mem_usage=True,
22-
device_map=self._device,
23-
)
17+
self.sampling_params = sampling_params if sampling_params else {}
2418

25-
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
26-
self.valid_generation_params = set(
27-
AutoModelForCausalLM.from_pretrained(model_id).generation_config.to_dict().keys()
28-
)
29-
self.llm = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)
19+
@staticmethod
20+
@abstractmethod
21+
def format_messages(messages: list[str] | list[dict[str, str]]) -> list[dict[str, str | list[dict[str, str]]]]:
22+
raise NotImplementedError("This method must be implemented by the subclass")
3023

31-
@torch.inference_mode()
3224
def generate(
3325
self,
3426
messages: list[str] | list[dict[str, str]],
3527
sampling_params: dict[str, str | float | int | bool] | None = None,
3628
seed: int | None = None,
3729
) -> str:
3830
"""Generate text with optimized performance."""
39-
self.set_random_seeds(seed)
31+
with torch.inference_mode():
32+
self.set_random_seeds(seed)
4033

41-
inputs = self.tokenizer.apply_chat_template(
42-
messages,
43-
tokenize=True,
44-
add_generation_prompt=True,
45-
return_tensors="pt",
46-
return_dict=True,
47-
).to(self._device)
34+
inputs = self.tokenizer.apply_chat_template(
35+
self.message_formater(messages),
36+
tokenize=True,
37+
add_generation_prompt=True,
38+
return_tensors="pt",
39+
return_dict=True,
40+
).to(self._device)
4841

49-
params = sampling_params if sampling_params else self.sampling_params
50-
filtered_params = {k: v for k, v in params.items() if k in self.valid_generation_params}
42+
params = sampling_params if sampling_params else self.sampling_params
43+
filtered_params = {k: v for k, v in params.items() if k in self.valid_generation_params}
5144

52-
outputs = self.model.generate(
53-
**inputs,
54-
**filtered_params,
55-
eos_token_id=self.tokenizer.eos_token_id,
56-
)
45+
outputs = self.model.generate(
46+
**inputs,
47+
**filtered_params,
48+
)
5749

58-
results = self.tokenizer.batch_decode(
59-
outputs[:, inputs["input_ids"].shape[1] :],
60-
skip_special_tokens=True,
61-
)[0]
50+
results = self.tokenizer.batch_decode(
51+
outputs[:, inputs["input_ids"].shape[1] :],
52+
skip_special_tokens=True,
53+
)[0]
6254

63-
return results if len(results) > 1 else results[0]
55+
return results if len(results) > 1 else results[0]
6456

6557
def set_random_seeds(self, seed: int | None = 42):
6658
"""Set random seeds for reproducibility across all relevant libraries."""
@@ -72,8 +64,3 @@ def set_random_seeds(self, seed: int | None = 42):
7264
torch.cuda.manual_seed_all(seed)
7365
torch.backends.cudnn.deterministic = True
7466
torch.backends.cudnn.benchmark = False
75-
76-
77-
# if __name__ == "__main__":
78-
# llm = ReproducibleHF(model="Qwen/Qwen2-0.5B", tensor_parallel_size=1, seed=42)
79-
# llm.generate({"role": "user", "content": "Hello, world!"})

prompting/llms/hf_text.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from loguru import logger
2+
3+
try:
4+
import torch
5+
from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel
6+
except ImportError:
7+
logger.warning("Transformers or torch is not installed. This module will not be available.")
8+
9+
from .hf_llm import ReproducibleHF
10+
11+
12+
class HFTextGeneration(ReproducibleHF):
13+
def __init__(
14+
self,
15+
model_id: str = "meta-llama/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
16+
device: str = "cuda:0",
17+
sampling_params: dict[str, str | float | int | bool] | None = None,
18+
):
19+
super().__init__(model_id, device, sampling_params)
20+
self.model: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
21+
model_id,
22+
torch_dtype=torch.float16,
23+
low_cpu_mem_usage=True,
24+
device_map=self._device,
25+
)
26+
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
27+
self.valid_generation_params = set(self.model.generation_config.to_dict().keys())
28+
self.message_formater = self.format_messages
29+
30+
@staticmethod
31+
def format_messages(messages: list[str] | list[dict[str, str]]) -> list[dict[str, str | list[dict[str, str]]]]:
32+
return messages

prompting/llms/hf_text_image.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from loguru import logger
2+
3+
try:
4+
import torch
5+
from transformers import AutoModelForImageTextToText, AutoProcessor
6+
except ImportError:
7+
logger.warning("Transformers or torch is not installed. This module will not be available.")
8+
9+
from prompting.llms.hf_llm import ReproducibleHF
10+
11+
12+
class HFTextImageToText(ReproducibleHF):
13+
def __init__(
14+
self,
15+
model_id: str = "google/gemma-3-27b-it",
16+
device: str = "cuda:0",
17+
sampling_params: dict[str, str | float | int | bool] | None = None,
18+
):
19+
super().__init__(model_id, device, sampling_params)
20+
self.model: AutoModelForImageTextToText = AutoModelForImageTextToText.from_pretrained(
21+
model_id,
22+
torch_dtype=torch.bfloat16,
23+
device_map=self._device,
24+
)
25+
self.tokenizer = AutoProcessor.from_pretrained(model_id)
26+
self.valid_generation_params = set(self.model.generation_config.to_dict().keys())
27+
self.message_formater = HFTextImageToText.format_messages
28+
29+
@staticmethod
30+
def format_messages(messages: list[str] | list[dict[str, str]]) -> list[dict[str, str | list[dict[str, str]]]]:
31+
"""Format the messages for the gemma model.
32+
33+
Converts message content strings to dictionaries with type and text fields.
34+
Example:
35+
Input: [{"role": "user", "content": "Hello"}]
36+
Output: [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}]
37+
"""
38+
formatted_messages = []
39+
# Check if the message is a list of only one element and that element is a list
40+
if isinstance(messages, list) and len(messages) == 1 and isinstance(messages[0], list):
41+
messages = messages[0]
42+
for message in messages:
43+
if isinstance(message, dict) and "content" in message:
44+
# If content is a string, convert it to a list with a dictionary
45+
if isinstance(message["content"], str):
46+
formatted_message = message.copy()
47+
formatted_message["content"] = [{"type": "text", "text": message["content"]}]
48+
formatted_messages.append(formatted_message)
49+
else:
50+
# If content is already in the correct format, keep it as is
51+
formatted_messages.append(message)
52+
else:
53+
# Handle other message formats if needed
54+
formatted_messages.append(message)
55+
56+
return formatted_messages
57+
58+
59+
if __name__ == "__main__":
60+
model = HFTextImageToText(model_id="google/gemma-3-27b-it", device="cuda:0")
61+
print(model.generate([{"role": "user", "content": "What's ur name?"}]))

0 commit comments

Comments
 (0)