feat: llama_cpp proxy chat model

samuelint · samuelint · commit c25894882289 · 2024-07-07T20:45:19.000-04:00
diff --git a/langchain_llamacpp_chat_model/__init__.py b/langchain_llamacpp_chat_model/__init__.py
@@ -0,0 +1,4 @@
+from .llama_chat_model import LlamaChatModel
+from .llama_proxy_chat_model import LlamaProxyChatModel
+
+__all__ = ["LlamaChatModel", "LlamaProxyChatModel"]
diff --git a/langchain_llamacpp_chat_model/llama_chat_model.py b/langchain_llamacpp_chat_model/llama_chat_model.py
@@ -35,7 +35,9 @@ def __call__(self):
     def __enter__(self):
         return self()
 
-    def __exit__(self):
+    def __exit__(self, exception_type, exception_value, exception_traceback):
+        if hasattr(self.response, "close"):
+            self.response.close()
         return False
 
 
diff --git a/langchain_llamacpp_chat_model/llama_proxy_chat_model.py b/langchain_llamacpp_chat_model/llama_proxy_chat_model.py
@@ -1,67 +1,17 @@
-from pydantic import Field
-from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence
-from langchain_openai.chat_models.base import BaseChatOpenAI
-from langchain_core.pydantic_v1 import BaseModel
-
-from langchain_core.callbacks import (
-    CallbackManagerForLLMRun,
-)
-from langchain_core.language_models import BaseChatModel
-from langchain_core.tools import BaseTool
-from langchain_core.runnables import Runnable
-from langchain_core.language_models.base import LanguageModelInput
-from langchain_core.messages import AIMessageChunk, BaseMessage, AIMessage
-from langchain_core.utils.function_calling import convert_to_openai_tool
-from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
-from llama_cpp import (
-    CreateCompletionResponse,
-    CreateCompletionStreamResponse,
-    Literal,
-    LlamaGrammar,
-    LogitsProcessorList,
-    StoppingCriteriaList,
-    Type,
-    Union,
-)
+from typing import Any, Dict
 from llama_cpp.server.app import LlamaProxy
 
 from langchain_llamacpp_chat_model.llama_chat_model import LlamaChatModel
 
-# Use this class until it's implemented in LangChain Community
-
 
 class LlamaProxyChatModel(LlamaChatModel):
-    model_name: str = Field(default="", alias="model")
-
-    suffix: Optional[str] = None
-    max_tokens: Optional[int] = 2048
-    temperature: float = 0.8
-    top_p: float = 0.95
-    min_p: float = 0.05
-    typical_p: float = 1.0
-    logprobs: Optional[int] = None
-    echo: bool = False
-    stop: Optional[Union[str, List[str]]] = []
-    frequency_penalty: float = 0.0
-    presence_penalty: float = 0.0
-    repeat_penalty: float = 1.1
-    top_k: int = 40
-    seed: Optional[int] = None
-    tfs_z: float = 1.0
-    mirostat_mode: int = 0
-    mirostat_tau: float = 5.0
-    mirostat_eta: float = 0.1
-    stopping_criteria: Optional[StoppingCriteriaList] = None
-    logits_processor: Optional[LogitsProcessorList] = None
-    grammar: Optional[LlamaGrammar] = None
-    logit_bias: Optional[Dict[str, float]] = None
-
     def __init__(
         self,
         llama_proxy: LlamaProxy,
         **kwargs,
     ):
-        llama = llama_proxy(self.model_name)
+        model = kwargs.get("model_name", kwargs.get("model"))
+        llama = llama_proxy(model)
         super().__init__(**kwargs, llama=llama)
 
     @property
diff --git a/tests/test_functional/models_configuration.py b/tests/test_functional/models_configuration.py
@@ -1,26 +1,50 @@
 import os
 from llama_cpp import Llama
+from llama_cpp.server.app import LlamaProxy
+from llama_cpp.server.settings import ModelSettings
 
 models_to_test = [
     {
         "repo_id": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF",
         "filename": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
+        "alias": "llama3",
     },
     {
         "repo_id": "microsoft/Phi-3-mini-4k-instruct-gguf",
         "filename": "Phi-3-mini-4k-instruct-q4.gguf",
+        "alias": "phi3",
     },
 ]
 
 
-def create_llama(request) -> Llama:
-    local_path = os.path.join(
+def _model_local_path(model) -> str:
+    return os.path.join(
         os.path.expanduser("~/.cache/lm-studio/models"),
-        request.param["repo_id"],
-        request.param["filename"],
+        model["repo_id"],
+        model["filename"],
     )
 
+
+def _create_models_settings():
+    models: list[ModelSettings] = []
+    for model in models_to_test:
+        local_path = _model_local_path(model)
+        models.append(
+            ModelSettings(model=local_path, model_alias=model["alias"], n_gpu_layers=-1)
+        )
+
+    return models
+
+
+def create_llama(request) -> Llama:
+    local_path = _model_local_path(request.param)
+
     return Llama(
         model_path=local_path,
         n_gpu_layers=-1,
     )
+
+
+def create_llama_proxy() -> LlamaProxy:
+    models = _create_models_settings()
+    return LlamaProxy(models=models)
diff --git a/tests/test_functional/test_invoke.py b/tests/test_functional/test_invoke.py
@@ -2,7 +2,7 @@
 import pytest
 from langchain_core.messages import AIMessage, HumanMessage
 
-from langchain_llamacpp_chat_model.llama_chat_model import LlamaChatModel
+from langchain_llamacpp_chat_model import LlamaChatModel
 
 from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_core.tools import tool
diff --git a/tests/test_functional/test_llama_proxy.py b/tests/test_functional/test_llama_proxy.py
@@ -0,0 +1,37 @@
+import pytest
+from langchain_core.messages import AIMessage, HumanMessage
+
+from langchain_llamacpp_chat_model import LlamaProxyChatModel
+
+from tests.test_functional.models_configuration import (
+    create_llama_proxy,
+    models_to_test,
+)
+from llama_cpp.server.app import LlamaProxy
+
+
+@pytest.fixture
+def llama_proxy() -> LlamaProxy:
+    return create_llama_proxy()
+
+
+class TestLlamaProxyChat:
+
+    @pytest.fixture(
+        params=models_to_test, ids=[config["alias"] for config in models_to_test]
+    )
+    def instance(self, llama_proxy: LlamaProxy, request):
+        return LlamaProxyChatModel(
+            llama_proxy=llama_proxy, model_name=request.param["alias"]
+        )
+
+    def test_conversation_memory(self, instance: LlamaProxyChatModel):
+        result = instance.invoke(
+            input=[
+                HumanMessage(content="Remember that I like bananas"),
+                AIMessage(content="Okay"),
+                HumanMessage(content="What do I like?"),
+            ]
+        )
+
+        assert "banana" in result.content
diff --git a/tests/test_functional/test_stream.py b/tests/test_functional/test_stream.py
@@ -2,7 +2,7 @@
 import pytest
 from langchain_core.messages import AIMessage, HumanMessage
 
-from langchain_llamacpp_chat_model.llama_chat_model import LlamaChatModel
+from langchain_llamacpp_chat_model import LlamaChatModel
 from tests.test_functional.models_configuration import create_llama, models_to_test
 
 
diff --git a/tests/test_llama_proxy_cpp_chat_model.py b/tests/test_llama_proxy_cpp_chat_model.py