From 426c4e0fd5dd580c98e5b37b5be1363c43fbeea7 Mon Sep 17 00:00:00 2001 From: dborodin836 Date: Wed, 19 Jun 2024 12:45:15 +0300 Subject: [PATCH 1/3] Add \img option --- commands.yaml | 5 ++++- modules/builder.py | 2 ++ modules/conversation_history.py | 37 +++++++++++++++++++++++++++++---- modules/typing.py | 4 ++-- schemas/commands.schema.json | 15 +++++++++++++ 5 files changed, 56 insertions(+), 7 deletions(-) diff --git a/commands.yaml b/commands.yaml index d5c39d6..c3e5df7 100644 --- a/commands.yaml +++ b/commands.yaml @@ -14,7 +14,10 @@ commands: prefix: '!' provider: open-ai type: quick-query - model: gpt-4-1106-preview + model: gpt-4o + settings: + allow-img: true + img-detail: low traits: - admin-only - openai-moderated diff --git a/modules/builder.py b/modules/builder.py index 6423a1d..f07c326 100644 --- a/modules/builder.py +++ b/modules/builder.py @@ -60,6 +60,8 @@ class InvalidCommandException(Exception): ... "allow-long", "enable-hard-limit", "hard-limit-length", + "allow-img", + "img-detail" } diff --git a/modules/conversation_history.py b/modules/conversation_history.py index 859437e..40e6ddf 100644 --- a/modules/conversation_history.py +++ b/modules/conversation_history.py @@ -1,3 +1,8 @@ +import base64 +from io import BytesIO +import mss +import mss.tools +from PIL import Image from modules.lobby_manager import lobby_manager from modules.typing import Message, MessageHistory from modules.utils.prompts import PROMPTS, get_prompt_by_name @@ -24,8 +29,8 @@ def _get_system_message(self) -> Message: # Soft limiting the response if ( - self.settings.get("enable-soft-limit") is True - or self.settings.get("enable-soft-limit") is None + self.settings.get("enable-soft-limit") is True + or self.settings.get("enable-soft-limit") is None ): enable_soft_limit = self.enable_soft_limit else: @@ -70,7 +75,7 @@ def add_assistant_message(self, message: Message) -> None: self.message_history.append(message) def add_user_message_from_prompt( - self, user_prompt: str, enable_soft_limit: bool = True + self, user_prompt: str, enable_soft_limit: bool = True ) -> None: user_message = remove_args(user_prompt) args = get_args(user_prompt) @@ -87,10 +92,34 @@ def add_user_message_from_prompt( if r"\stats" in args: self.enable_stats = True + if r"\img" in args and self.settings.get("allow-img"): + sct = mss.mss() + monitor = sct.monitors[1] + scr = sct.grab(monitor) + img = Image.frombytes("RGB", scr.size, scr.bgra, "raw", "BGRX") + buffered = BytesIO() + img.save(buffered, format="JPEG") + base64_encoded_image = base64.b64encode(buffered.getvalue()).decode("utf-8") + + content = [ + { + "type": "text", + "text": f"{user_message}" + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_encoded_image}", + "detail": self.settings.get("img-detail") or "low" + } + }] + else: + content = user_message + # Don't add a message if the user prompt is empty. # Some LLM providers will complain about that, which effectively kills the chat. if user_message != "": - self.message_history.append(Message(role="user", content=user_message)) + self.message_history.append(Message(role="user", content=content)) def reset_turn(self): self.enable_soft_limit = True diff --git a/modules/typing.py b/modules/typing.py index b4dfabd..db6f523 100644 --- a/modules/typing.py +++ b/modules/typing.py @@ -1,11 +1,11 @@ -from typing import Callable, List, Literal, NamedTuple, Optional, TypedDict +from typing import Callable, List, Literal, NamedTuple, Optional, TypedDict, Union, Dict from pydantic import BaseModel class Message(TypedDict): role: Literal["assistant", "user", "system"] - content: str + content: Union[str, List[Dict[Literal["text", "image_url"], Union[str, Dict[str, str]]]]] MessageHistory = List[Message] diff --git a/schemas/commands.schema.json b/schemas/commands.schema.json index 3d6af31..58624f4 100644 --- a/schemas/commands.schema.json +++ b/schemas/commands.schema.json @@ -74,6 +74,16 @@ }, "hard-limit-length": { "type": "integer" + }, + "allow-img": { + "type": "boolean" + }, + "img-detail": { + "type": "string", + "enum": [ + "low", + "high" + ] } }, "dependencies": { @@ -86,6 +96,11 @@ "required": [ "hard-limit-length" ] + }, + "img-detail": { + "required": [ + "allow-img" + ] } } }, From a2311473cb2bfce487ec98a9bdc54ff2213815b6 Mon Sep 17 00:00:00 2001 From: dborodin836 Date: Wed, 19 Jun 2024 16:12:19 +0300 Subject: [PATCH 2/3] add option for multi-monitor setups --- commands.yaml | 3 --- modules/builder.py | 3 ++- modules/conversation_history.py | 2 +- schemas/commands.schema.json | 8 ++++++++ 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/commands.yaml b/commands.yaml index c3e5df7..b197d6a 100644 --- a/commands.yaml +++ b/commands.yaml @@ -15,9 +15,6 @@ commands: provider: open-ai type: quick-query model: gpt-4o - settings: - allow-img: true - img-detail: low traits: - admin-only - openai-moderated diff --git a/modules/builder.py b/modules/builder.py index f07c326..4dae71c 100644 --- a/modules/builder.py +++ b/modules/builder.py @@ -61,7 +61,8 @@ class InvalidCommandException(Exception): ... "enable-hard-limit", "hard-limit-length", "allow-img", - "img-detail" + "img-detail", + "img-screen-id" } diff --git a/modules/conversation_history.py b/modules/conversation_history.py index 40e6ddf..489b566 100644 --- a/modules/conversation_history.py +++ b/modules/conversation_history.py @@ -94,7 +94,7 @@ def add_user_message_from_prompt( if r"\img" in args and self.settings.get("allow-img"): sct = mss.mss() - monitor = sct.monitors[1] + monitor = sct.monitors[self.settings.get("img-screen-id", 1)] scr = sct.grab(monitor) img = Image.frombytes("RGB", scr.size, scr.bgra, "raw", "BGRX") buffered = BytesIO() diff --git a/schemas/commands.schema.json b/schemas/commands.schema.json index 58624f4..f4b38f5 100644 --- a/schemas/commands.schema.json +++ b/schemas/commands.schema.json @@ -78,6 +78,9 @@ "allow-img": { "type": "boolean" }, + "img-screen-id": { + "type": "integer" + }, "img-detail": { "type": "string", "enum": [ @@ -97,6 +100,11 @@ "hard-limit-length" ] }, + "img-screen-id": { + "required": [ + "allow-img" + ] + }, "img-detail": { "required": [ "allow-img" From dee47765049a39d6818bf32818de23de6f1e7537 Mon Sep 17 00:00:00 2001 From: dborodin836 Date: Wed, 19 Jun 2024 16:35:03 +0300 Subject: [PATCH 3/3] add mss to requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 13480b4..9e287c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,5 @@ pytest-repeat==0.9.3 groq==0.5.0 pyyaml==6.0.1 isort==5.13.2 -black==24.4.2 \ No newline at end of file +black==24.4.2 +mss==9.0.1 \ No newline at end of file