diff --git a/commands.yaml b/commands.yaml index d5c39d6..b197d6a 100644 --- a/commands.yaml +++ b/commands.yaml @@ -14,7 +14,7 @@ commands: prefix: '!' provider: open-ai type: quick-query - model: gpt-4-1106-preview + model: gpt-4o traits: - admin-only - openai-moderated diff --git a/modules/builder.py b/modules/builder.py index 6423a1d..4dae71c 100644 --- a/modules/builder.py +++ b/modules/builder.py @@ -60,6 +60,9 @@ class InvalidCommandException(Exception): ... "allow-long", "enable-hard-limit", "hard-limit-length", + "allow-img", + "img-detail", + "img-screen-id" } diff --git a/modules/conversation_history.py b/modules/conversation_history.py index 859437e..489b566 100644 --- a/modules/conversation_history.py +++ b/modules/conversation_history.py @@ -1,3 +1,8 @@ +import base64 +from io import BytesIO +import mss +import mss.tools +from PIL import Image from modules.lobby_manager import lobby_manager from modules.typing import Message, MessageHistory from modules.utils.prompts import PROMPTS, get_prompt_by_name @@ -24,8 +29,8 @@ def _get_system_message(self) -> Message: # Soft limiting the response if ( - self.settings.get("enable-soft-limit") is True - or self.settings.get("enable-soft-limit") is None + self.settings.get("enable-soft-limit") is True + or self.settings.get("enable-soft-limit") is None ): enable_soft_limit = self.enable_soft_limit else: @@ -70,7 +75,7 @@ def add_assistant_message(self, message: Message) -> None: self.message_history.append(message) def add_user_message_from_prompt( - self, user_prompt: str, enable_soft_limit: bool = True + self, user_prompt: str, enable_soft_limit: bool = True ) -> None: user_message = remove_args(user_prompt) args = get_args(user_prompt) @@ -87,10 +92,34 @@ def add_user_message_from_prompt( if r"\stats" in args: self.enable_stats = True + if r"\img" in args and self.settings.get("allow-img"): + sct = mss.mss() + monitor = sct.monitors[self.settings.get("img-screen-id", 1)] + scr = sct.grab(monitor) + img = Image.frombytes("RGB", scr.size, scr.bgra, "raw", "BGRX") + buffered = BytesIO() + img.save(buffered, format="JPEG") + base64_encoded_image = base64.b64encode(buffered.getvalue()).decode("utf-8") + + content = [ + { + "type": "text", + "text": f"{user_message}" + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_encoded_image}", + "detail": self.settings.get("img-detail") or "low" + } + }] + else: + content = user_message + # Don't add a message if the user prompt is empty. # Some LLM providers will complain about that, which effectively kills the chat. if user_message != "": - self.message_history.append(Message(role="user", content=user_message)) + self.message_history.append(Message(role="user", content=content)) def reset_turn(self): self.enable_soft_limit = True diff --git a/modules/typing.py b/modules/typing.py index b4dfabd..db6f523 100644 --- a/modules/typing.py +++ b/modules/typing.py @@ -1,11 +1,11 @@ -from typing import Callable, List, Literal, NamedTuple, Optional, TypedDict +from typing import Callable, List, Literal, NamedTuple, Optional, TypedDict, Union, Dict from pydantic import BaseModel class Message(TypedDict): role: Literal["assistant", "user", "system"] - content: str + content: Union[str, List[Dict[Literal["text", "image_url"], Union[str, Dict[str, str]]]]] MessageHistory = List[Message] diff --git a/requirements.txt b/requirements.txt index 13480b4..9e287c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,5 @@ pytest-repeat==0.9.3 groq==0.5.0 pyyaml==6.0.1 isort==5.13.2 -black==24.4.2 \ No newline at end of file +black==24.4.2 +mss==9.0.1 \ No newline at end of file diff --git a/schemas/commands.schema.json b/schemas/commands.schema.json index 3d6af31..f4b38f5 100644 --- a/schemas/commands.schema.json +++ b/schemas/commands.schema.json @@ -74,6 +74,19 @@ }, "hard-limit-length": { "type": "integer" + }, + "allow-img": { + "type": "boolean" + }, + "img-screen-id": { + "type": "integer" + }, + "img-detail": { + "type": "string", + "enum": [ + "low", + "high" + ] } }, "dependencies": { @@ -86,6 +99,16 @@ "required": [ "hard-limit-length" ] + }, + "img-screen-id": { + "required": [ + "allow-img" + ] + }, + "img-detail": { + "required": [ + "allow-img" + ] } } },