Add moondream support (wip)

abetlen · abetlen · commit 2fd41f9cceb4 · 2024-04-27T13:19:24.000-04:00
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
@@ -2477,6 +2477,32 @@ def from_pretrained(
             **kwargs,
         )
 
+class MoondreamChatHanlder(Llava15ChatHandler):
+    # Chat Format:
+    # <image>\n\nQuestion: {prompt}\n\nAnswer:
+    CHAT_FORMAT = (
+        "{% for message in messages %}"
+        "{% if message.role == 'user' %}"
+        "{% if message.content is iterable %}"
+        "{% for content in message.content %}"
+        "{% if content.type == 'image_url' %}"
+        "{{ content.image_url }}"
+        "{% endif %}"
+        "{% if content.type == 'text' %}"
+        "Question: {{ content.text }}"
+        "{% endif %}"
+        "{% endfor %}"
+        "{% endif %}"
+        "{% endif %}"
+        "{% if message.role == 'assistant' %}"
+        "Answer: {{ message.content }}"
+        "{% endif %}"
+        "{% endfor %}"
+        "{% if add_generation_prompt %}"
+        "Answer: "
+        "{% endif %}"
+    )
+
 
 @register_chat_completion_handler("chatml-function-calling")
 def chatml_function_calling(
diff --git a/llama_cpp/server/model.py b/llama_cpp/server/model.py
@@ -84,6 +84,20 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
                 chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(
                     clip_model_path=settings.clip_model_path, verbose=settings.verbose
                 )
+        elif settings.chat_format == "moondream":
+            assert settings.clip_model_path is not None, "clip model not found"
+            if settings.hf_model_repo_id is not None:
+                chat_handler = (
+                    llama_cpp.llama_chat_format.MoondreamChatHanlder.from_pretrained(
+                        repo_id=settings.hf_model_repo_id,
+                        filename=settings.clip_model_path,
+                        verbose=settings.verbose,
+                    )
+                )
+            else:
+                chat_handler = llama_cpp.llama_chat_format.MoondreamChatHanlder(
+                    clip_model_path=settings.clip_model_path, verbose=settings.verbose
+                )
         elif settings.chat_format == "hf-autotokenizer":
             assert (
                 settings.hf_pretrained_model_name_or_path is not None