Implement query_multimodal for OpenAI LLMs

Summary: as titled Reviewed By: SimonWan Differential Revision: D64258836 fbshipit-source-id: 7bb8dee2e335d6e9e0c20d780521a6a5fe856ab2
meta-llama · Oct 11, 2024 · 3bbc772 · 3bbc772
1 parent a3a9712
commit 3bbc772
Showing 1 changed file with 43 additions and 0 deletions.
diff --git a/CybersecurityBenchmarks/benchmark/llm.py b/CybersecurityBenchmarks/benchmark/llm.py
@@ -592,6 +592,49 @@ def query_with_system_prompt_and_image(
         )
         return response.choices[0].message.content
 
+    @override
+    def query_multimodal(
+        self,
+        system_prompt: Optional[str] = None,
+        text_prompt: Optional[str] = None,
+        image_path: Optional[str] = None,
+        audio_path: Optional[str] = None,
+        max_tokens: int = DEFAULT_MAX_TOKENS,
+        temperature: float = DEFAULT_TEMPERATURE,
+        top_p: float = DEFAULT_TOP_P,
+    ) -> str:
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+
+        user_message_content = []
+        if text_prompt:
+            user_message_content.append({"type": "text", "text": text_prompt})
+
+        if image_path:
+            image_data = image_to_b64(image_path)
+            user_message_content.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64, {image_data}"},
+                }
+            )
+
+        if audio_path:
+            raise NotImplementedError("Audio input in OpenAI is not yet supported")
+
+        messages.append(
+            {
+                "role": "user",
+                "content": user_message_content,
+            }
+        )
+
+        response = self.client.chat.completions.create(
+            model=self.model, messages=messages, max_tokens=max_tokens
+        )
+        return response.choices[0].message.content
+
 
 class ANYSCALE(LLM):
     """Accessing ANYSCALE"""