feat: cost and capabilities config for custom litellm models (#481)

pawel-chmielak-deepsense · web-flow · commit 1eafdfbbdf7d · 2025-04-01T13:51:49.000+02:00
diff --git a/packages/ragbits-core/CHANGELOG.md b/packages/ragbits-core/CHANGELOG.md
@@ -4,6 +4,7 @@
 - Make the score in VectorStoreResult consistent (always bigger is better)
 - Add router option to LiteLLMEmbedder (#440)
 - Fix: make unflatten_dict symmetric to flatten_dict (#461)
+- Cost and capabilities config for custom litellm models (#481)
 
 ## 0.12.0 (2025-03-25)
 - Allow Prompt class to accept the asynchronous response_parser. Change the signature of parse_response method.
diff --git a/packages/ragbits-core/src/ragbits/core/llms/litellm.py b/packages/ragbits-core/src/ragbits/core/llms/litellm.py
@@ -1,4 +1,4 @@
-from collections.abc import AsyncGenerator
+from collections.abc import AsyncGenerator, Callable
 from typing import Any
 
 import litellm
@@ -57,6 +57,7 @@ def __init__(
         api_version: str | None = None,
         use_structured_output: bool = False,
         router: litellm.Router | None = None,
+        custom_model_cost_config: dict | None = None,
     ) -> None:
         """
         Constructs a new LiteLLM instance.
@@ -74,13 +75,20 @@ def __init__(
                 [structured output](https://docs.litellm.ai/docs/completion/json_mode#pass-in-json_schema)
                 from the model. Default is False. Can only be combined with models that support structured output.
             router: Router to be used to [route requests](https://docs.litellm.ai/docs/routing) to different models.
+            custom_model_cost_config: Custom cost and capabilities configuration for the model.
+                Necessary for custom model cost and capabilities tracking in LiteLLM.
+                See the [LiteLLM documentation](https://docs.litellm.ai/docs/completion/token_usage#9-register_model)
+                for more information.
         """
         super().__init__(model_name, default_options)
         self.base_url = base_url
         self.api_key = api_key
         self.api_version = api_version
         self.use_structured_output = use_structured_output
         self.router = router
+        self.custom_model_cost_config = custom_model_cost_config
+        if custom_model_cost_config:
+            litellm.register_model(custom_model_cost_config)
 
     def count_tokens(self, prompt: BasePrompt) -> int:
         """
@@ -257,3 +265,17 @@ def from_config(cls, config: dict[str, Any]) -> Self:
             router = litellm.router.Router(model_list=config["router"])
             config["router"] = router
         return super().from_config(config)
+
+    def __reduce__(self) -> tuple[Callable, tuple]:
+        config = {
+            "model_name": self.model_name,
+            "default_options": self.default_options.dict(),
+            "base_url": self.base_url,
+            "api_key": self.api_key,
+            "api_version": self.api_version,
+            "use_structured_output": self.use_structured_output,
+            "custom_model_cost_config": self.custom_model_cost_config,
+        }
+        if self.router:
+            config["router"] = self.router.model_list
+        return self.from_config, (config,)
diff --git a/packages/ragbits-core/tests/unit/llms/test_litellm.py b/packages/ragbits-core/tests/unit/llms/test_litellm.py
@@ -1,4 +1,8 @@
+import pickle
+from unittest.mock import patch
+
 import pytest
+from litellm import Router
 from pydantic import BaseModel
 
 from ragbits.core.llms.exceptions import LLMNotSupportingImagesError
@@ -176,3 +180,85 @@ async def test_generation_without_image_support():
     prompt = MockPromptWithImage("Hello, what is on this image?")
     with pytest.raises(LLMNotSupportingImagesError):
         await llm.generate(prompt)
+
+
+async def test_pickling():
+    """Test pickling of the LiteLLM class."""
+    llm = LiteLLM(
+        model_name="gpt-3.5-turbo",
+        default_options=LiteLLMOptions(mock_response="I'm fine, thank you."),
+        custom_model_cost_config={
+            "gpt-3.5-turbo": {
+                "support_vision": True,
+            }
+        },
+        use_structured_output=True,
+        router=Router(),
+        base_url="https://api.litellm.ai",
+        api_key="test_key",
+        api_version="v1",
+    )
+    llm_pickled = pickle.loads(pickle.dumps(llm))  # noqa: S301
+    assert llm_pickled.model_name == "gpt-3.5-turbo"
+    assert llm_pickled.default_options.mock_response == "I'm fine, thank you."
+    assert llm_pickled.custom_model_cost_config == {
+        "gpt-3.5-turbo": {
+            "support_vision": True,
+        }
+    }
+    assert llm_pickled.use_structured_output
+    assert llm_pickled.router.model_list == []
+    assert llm_pickled.base_url == "https://api.litellm.ai"
+    assert llm_pickled.api_key == "test_key"
+    assert llm_pickled.api_version == "v1"
+
+
+async def test_init_registers_model_with_custom_cost_config():
+    """Test that custom model cost config properly registers the model with LiteLLM."""
+    custom_config = {
+        "some_model": {
+            "support_vision": True,
+            "input_cost_per_token": 0.0015,
+            "output_cost_per_token": 0.002,
+            "max_tokens": 4096,
+        }
+    }
+
+    with patch("litellm.register_model") as mock_register:
+        # Create LLM instance with custom config
+        LiteLLM(
+            model_name="some_model",
+            custom_model_cost_config=custom_config,
+        )
+
+        # Verify register_model was called with the correct config
+        mock_register.assert_called_once_with(custom_config)
+
+
+async def test_init_does_not_register_model_if_no_cost_config_is_provided():
+    """Test that the model is not registered if no cost config is provided."""
+    with patch("litellm.register_model") as mock_register:
+        LiteLLM(
+            model_name="some_model",
+        )
+        mock_register.assert_not_called()
+
+
+async def test_pickling_registers_model_with_custom_cost_config():
+    """Test that the model is registered with LiteLLM when unpickled."""
+    custom_config = {
+        "some_model": {
+            "support_vision": True,
+            "input_cost_per_token": 0.0015,
+            "output_cost_per_token": 0.002,
+            "max_tokens": 4096,
+        }
+    }
+    llm = LiteLLM(
+        model_name="some_model",
+        custom_model_cost_config=custom_config,
+    )
+    with patch("litellm.register_model") as mock_register:
+        llm_pickled = pickle.loads(pickle.dumps(llm))  # noqa: S301
+        assert llm_pickled.custom_model_cost_config == custom_config
+        mock_register.assert_called_once_with(custom_config)