logicalclocks · javierdlrm · Oct 7, 2024 · Sep 10, 2024 · Sep 13, 2024 · Oct 7, 2024
diff --git a/python/hopsworks_common/constants.py b/python/hopsworks_common/constants.py
@@ -158,6 +158,7 @@ class MODEL:
     FRAMEWORK_TORCH = "TORCH"
     FRAMEWORK_PYTHON = "PYTHON"
     FRAMEWORK_SKLEARN = "SKLEARN"
+    FRAMEWORK_LLM = "LLM"
 
 
 class MODEL_REGISTRY:
@@ -210,6 +211,7 @@ class PREDICTOR:
     # model server
     MODEL_SERVER_PYTHON = "PYTHON"
     MODEL_SERVER_TF_SERVING = "TENSORFLOW_SERVING"
+    MODEL_SERVER_VLLM = "VLLM"
     # serving tool
     SERVING_TOOL_DEFAULT = "DEFAULT"
     SERVING_TOOL_KSERVE = "KSERVE"

diff --git a/python/hsml/core/serving_api.py b/python/hsml/core/serving_api.py
@@ -417,4 +417,7 @@ def _get_hopsworks_inference_path(self, project_id: int, deployment_instance):
         ]
 
     def _get_istio_inference_path(self, deployment_instance):
+        if deployment_instance.model_server == "VLLM":
+            return ["openai", "v1", "completions"]
+
         return ["v1", "models", deployment_instance.name + ":predict"]
diff --git a/python/hsml/engine/serving_engine.py b/python/hsml/engine/serving_engine.py
@@ -493,7 +493,10 @@ def predict(
         inputs: Union[Dict, List[Dict]],
     ):
         # validate user-provided payload
-        self._validate_inference_payload(deployment_instance.api_protocol, data, inputs)
+        if deployment_instance.model_server != "VLLM":
+            self._validate_inference_payload(
+                deployment_instance.api_protocol, data, inputs
+            )
 
         # build inference payload based on API protocol
         payload = self._build_inference_payload(

diff --git a/python/hsml/llm/__init__.py b/python/hsml/llm/__init__.py
@@ -0,0 +1,15 @@
+#
+#   Copyright 2024 Hopsworks AB
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
diff --git a/python/hsml/llm/model.py b/python/hsml/llm/model.py
@@ -0,0 +1,75 @@
+#
+#   Copyright 2024 Hopsworks AB
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+
+import humps
+from hsml.constants import MODEL
+from hsml.model import Model
+
+
+class Model(Model):
+    """Metadata object representing a LLM model in the Model Registry."""
+
+    def __init__(
+        self,
+        id,
+        name,
+        version=None,
+        created=None,
+        creator=None,
+        environment=None,
+        description=None,
+        project_name=None,
+        metrics=None,
+        program=None,
+        user_full_name=None,
+        model_schema=None,
+        training_dataset=None,
+        input_example=None,
+        model_registry_id=None,
+        tags=None,
+        href=None,
+        feature_view=None,
+        training_dataset_version=None,
+        **kwargs,
+    ):
+        super().__init__(
+            id,
+            name,
+            version=version,
+            created=created,
+            creator=creator,
+            environment=environment,
+            description=description,
+            project_name=project_name,
+            metrics=metrics,
+            program=program,
+            user_full_name=user_full_name,
+            model_schema=model_schema,
+            training_dataset=training_dataset,
+            input_example=input_example,
+            framework=MODEL.FRAMEWORK_LLM,
+            model_registry_id=model_registry_id,
+            feature_view=feature_view,
+            training_dataset_version=training_dataset_version,
+        )
+
+    def update_from_response_json(self, json_dict):
+        json_decamelized = humps.decamelize(json_dict)
+        json_decamelized.pop("framework")
+        if "type" in json_decamelized:  # backwards compatibility
+            _ = json_decamelized.pop("type")
+        self.__init__(**json_decamelized)
+        return self
diff --git a/python/hsml/llm/predictor.py b/python/hsml/llm/predictor.py
@@ -0,0 +1,28 @@
+#
+#   Copyright 2024 Hopsworks AB
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+
+from hsml.constants import MODEL, PREDICTOR
+from hsml.predictor import Predictor
+
+
+class Predictor(Predictor):
+    """Configuration for a predictor running with the vLLM backend"""
+
+    def __init__(self, **kwargs):
+        kwargs["model_framework"] = MODEL.FRAMEWORK_LLM
+        kwargs["model_server"] = PREDICTOR.MODEL_SERVER_VLLM
+
+        super().__init__(**kwargs)
diff --git a/python/hsml/llm/signature.py b/python/hsml/llm/signature.py
@@ -0,0 +1,77 @@
+#
+#   Copyright 2024 Hopsworks AB
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+
+from typing import Optional, Union
+
+import numpy
+import pandas
+from hopsworks_common import usage
+from hsml.llm.model import Model
+from hsml.model_schema import ModelSchema
+
+
+_mr = None
+
+
+@usage.method_logger
+def create_model(
+    name: str,
+    version: Optional[int] = None,
+    metrics: Optional[dict] = None,
+    description: Optional[str] = None,
+    input_example: Optional[
+        Union[pandas.DataFrame, pandas.Series, numpy.ndarray, list]
+    ] = None,
+    model_schema: Optional[ModelSchema] = None,
+    feature_view=None,
+    training_dataset_version: Optional[int] = None,
+):
+    """Create an LLM model metadata object.
+
+    !!! note "Lazy"
+        This method is lazy and does not persist any metadata or uploads model artifacts in the
+        model registry on its own. To save the model object and the model artifacts, call the `save()` method with a
+        local file path to the directory containing the model artifacts.
+
+    # Arguments
+        name: Name of the model to create.
+        version: Optionally version of the model to create, defaults to `None` and
+            will create the model with incremented version from the last
+            version in the model registry.
+        metrics: Optionally a dictionary with model evaluation metrics (e.g., accuracy, MAE)
+        description: Optionally a string describing the model, defaults to empty string
+            `""`.
+        input_example: Optionally an input example that represents a single input for the model, defaults to `None`.
+        model_schema: Optionally a model schema for the model inputs and/or outputs.
+
+    # Returns
+        `Model`. The model metadata object.
+    """
+    model = Model(
+        id=None,
+        name=name,
+        version=version,
+        description=description,
+        metrics=metrics,
+        input_example=input_example,
+        model_schema=model_schema,
+        feature_view=feature_view,
+        training_dataset_version=training_dataset_version,
+    )
+    model._shared_registry_project_name = _mr.shared_registry_project_name
+    model._model_registry_id = _mr.model_registry_id
+
+    return model
diff --git a/python/hsml/model_registry.py b/python/hsml/model_registry.py
@@ -20,6 +20,7 @@
 from hopsworks_common import usage
 from hsml import util
 from hsml.core import model_api
+from hsml.llm import signature as llm_signature  # noqa: F401
 from hsml.python import signature as python_signature  # noqa: F401
 from hsml.sklearn import signature as sklearn_signature  # noqa: F401
 from hsml.tensorflow import signature as tensorflow_signature  # noqa: F401
@@ -49,11 +50,13 @@ def __init__(
         self._python = python_signature
         self._sklearn = sklearn_signature
         self._torch = torch_signature
+        self._llm = llm_signature
 
         tensorflow_signature._mr = self
         python_signature._mr = self
         sklearn_signature._mr = self
         torch_signature._mr = self
+        llm_signature._mr = self
 
     @classmethod
     def from_response_json(cls, json_dict):
@@ -191,6 +194,12 @@ def python(self):
 
         return python_signature
 
+    @property
+    def llm(self):
+        """Module for exporting a Large Language Model."""
+
+        return llm_signature
+
     def __repr__(self):
         project_name = (
             self._shared_registry_project_name

diff --git a/python/hsml/predictor.py b/python/hsml/predictor.py
@@ -167,18 +167,22 @@ def _validate_serving_tool(cls, serving_tool):
 
     @classmethod
     def _validate_script_file(cls, model_framework, script_file):
-        if model_framework == MODEL.FRAMEWORK_PYTHON and script_file is None:
+        if script_file is None and (
+            model_framework == MODEL.FRAMEWORK_PYTHON
+            or model_framework == MODEL.FRAMEWORK_LLM
+        ):
             raise ValueError(
-                "Predictor scripts are required in deployments for custom Python models"
+                "Predictor scripts are required in deployments for custom Python models and LLMs."
             )
 
     @classmethod
     def _infer_model_server(cls, model_framework):
-        return (
-            PREDICTOR.MODEL_SERVER_TF_SERVING
-            if model_framework == MODEL.FRAMEWORK_TENSORFLOW
-            else PREDICTOR.MODEL_SERVER_PYTHON
-        )
+        if model_framework == MODEL.FRAMEWORK_TENSORFLOW:
+            return PREDICTOR.MODEL_SERVER_TF_SERVING
+        elif model_framework == MODEL.FRAMEWORK_LLM:
+            return PREDICTOR.MODEL_SERVER_VLLM
+        else:
+            return PREDICTOR.MODEL_SERVER_PYTHON
 
     @classmethod
     def _get_default_serving_tool(cls):

diff --git a/python/hsml/util.py b/python/hsml/util.py
@@ -95,6 +95,7 @@ def default(self, obj):  # pylint: disable=E0202
 
 
 def set_model_class(model):
+    from hsml.llm.model import Model as LLMModel
     from hsml.model import Model as BaseModel
     from hsml.python.model import Model as PyModel
     from hsml.sklearn.model import Model as SkLearnModel
@@ -120,6 +121,8 @@ def set_model_class(model):
         return SkLearnModel(**model)
     elif framework == MODEL.FRAMEWORK_PYTHON:
         return PyModel(**model)
+    elif framework == MODEL.FRAMEWORK_LLM:
+        return LLMModel(**model)
     else:
         raise ValueError(
             "framework {} is not a supported framework".format(str(framework))
@@ -232,6 +235,8 @@ def validate_metrics(metrics):
 
 
 def get_predictor_for_model(model, **kwargs):
+    from hsml.llm.model import Model as LLMModel
+    from hsml.llm.predictor import Predictor as vLLMPredictor
     from hsml.model import Model as BaseModel
     from hsml.predictor import Predictor as BasePredictor
     from hsml.python.model import Model as PyModel
@@ -258,6 +263,8 @@ def get_predictor_for_model(model, **kwargs):
         return SkLearnPredictor(**kwargs)
     if type(model) is PyModel:
         return PyPredictor(**kwargs)
+    if type(model) is LLMModel:
+        return vLLMPredictor(**kwargs)
     if type(model) is BaseModel:
         return BasePredictor(  # python as default framework and model server
             model_framework=MODEL.FRAMEWORK_PYTHON,

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -55,7 +55,7 @@ dependencies = [
     "opensearch-py>=1.1.0,<=2.4.2",
     "tqdm",
     "grpcio>=1.49.1,<2.0.0",         # ^1.49.1
-    "protobuf>=3.19.0,<4.0.0",       # ^3.19.0
+    "protobuf>=4.25.4,<5.0.0",       # ^4.25.4
 ]
 
 [project.optional-dependencies]

diff --git a/python/tests/fixtures/model_fixtures.json b/python/tests/fixtures/model_fixtures.json
@@ -133,6 +133,33 @@
       ]
     }
   },
+  "get_llm": {
+    "response": {
+      "count": 1,
+      "items": [
+        {
+          "id": "5",
+          "name": "llmmodel",
+          "version": 0,
+          "created": "created",
+          "creator": "creator",
+          "environment": "environment.yml",
+          "description": "description",
+          "project_name": "myproject",
+          "metrics": { "acc": 0.7 },
+          "program": "program",
+          "user_full_name": "Full Name",
+          "model_schema": "model_schema.json",
+          "training_dataset": "training_dataset",
+          "input_example": "input_example.json",
+          "model_registry_id": 1,
+          "tags": [],
+          "framework": "LLM",
+          "href": "test_href"
+        }
+      ]
+    }
+  },
   "get_list": {
     "response": {
       "count": 2,