argilla-io
diff --git a/Diff for: ‎docs/sections/how_to_guides/advanced/checkpointing.md
+30 b/Diff for: ‎docs/sections/how_to_guides/advanced/checkpointing.md
+30
diff --git a/Diff for: ‎examples/exam_questions.py
+3-3 b/Diff for: ‎examples/exam_questions.py
+3-3
diff --git a/Diff for: ‎pyproject.toml
+11-1 b/Diff for: ‎pyproject.toml
+11-1
diff --git a/Diff for: ‎src/distilabel/embeddings.py
+2-1 b/Diff for: ‎src/distilabel/embeddings.py
+2-1
diff --git a/Diff for: ‎src/distilabel/llms.py
+3 b/Diff for: ‎src/distilabel/llms.py
+3
diff --git a/Diff for: ‎src/distilabel/models/__init__.py
+5 b/Diff for: ‎src/distilabel/models/__init__.py
+5
diff --git a/Diff for: ‎src/distilabel/models/base_clients/inference_endpoints.py
+1-3 b/Diff for: ‎src/distilabel/models/base_clients/inference_endpoints.py
+1-3
diff --git a/Diff for: ‎src/distilabel/models/embeddings/__init__.py
+2 b/Diff for: ‎src/distilabel/models/embeddings/__init__.py
+2
diff --git a/Diff for: ‎src/distilabel/models/embeddings/sglang.py
+125 b/Diff for: ‎src/distilabel/models/embeddings/sglang.py
+125
diff --git a/Diff for: ‎src/distilabel/models/llms/__init__.py
+3 b/Diff for: ‎src/distilabel/models/llms/__init__.py
+3
diff --git a/Diff for: ‎src/distilabel/models/llms/base.py
+1-1 b/Diff for: ‎src/distilabel/models/llms/base.py
+1-1
@@ -57,3 +57,33 @@ The final datasets can be found in the following links:
 - Checkpoint dataset: [distilabel-internal-testing/streaming_test_1](https://huggingface.co/datasets/distilabel-internal-testing/streaming_test_1)
 
 - Final distiset: [distilabel-internal-testing/streaming_test](https://huggingface.co/datasets/distilabel-internal-testing/streaming_test)
+
+### Read back the data
+
+In case we want to take a look at a given filename we can take advantage of the `huggingface_hub` library. We will use the `HfFileSystem` to list all the `jsonl` files in the dataset repository, and download onle of them to show how it works:
+
+```python
+from huggingface_hub import HfFileSystem, hf_hub_download
+
+dataset_name = "distilabel-internal-testing/streaming_test_1"
+fs = HfFileSystem()
+filenames = fs.glob(f"datasets/{dataset_name}/**/*.jsonl")
+
+filename = hf_hub_download(repo_id="distilabel-internal-testing/streaming_test_1", filename="config-0/train-00000.jsonl", repo_type="dataset")
+```
+
+The filename will be downloaded to the default cache, and to read the data we can just proceed as with any other jsonlines file:
+
+```python
+import json
+data = []
+
+with open(filename, "r") as f:
+    data = [json.loads(line) for line in f.readlines()]
+
+# [{'a': 1, 'b': 5},
+#  {'a': 2, 'b': 6},
+#  {'a': 3, 'b': 7},
+# ...
+```
+
@@ -59,15 +59,15 @@ class ExamQuestions(BaseModel):
         name="load_instructions",
         data=[
             {
-                "page": page.content,
+                "instruction": page.content,
             }
         ],
     )
 
     text_generation = TextGeneration(
         name="exam_generation",
         system_prompt=SYSTEM_PROMPT,
-        template="Generate a list of answers and questions about the document. Document:\n\n{{ page }}",
+        template="Generate a list of answers and questions about the document. Document:\n\n{{ instruction }}",
         llm=InferenceEndpointsLLM(
             model_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
             tokenizer_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -95,4 +95,4 @@ class ExamQuestions(BaseModel):
         },
         use_cache=False,
     )
-    distiset.push_to_hub("USERNAME/exam_questions")
+    # distiset.push_to_hub("USERNAME/exam_questions")
@@ -79,7 +79,7 @@ argilla = ["argilla >= 2.0.0", "ipython"]
 cohere = ["cohere >= 5.2.0"]
 groq = ["groq >= 0.4.1"]
 hf-inference-endpoints = ["huggingface_hub >= 0.22.0"]
-hf-transformers = ["transformers >= 4.34.1", "torch >= 2.0.0"]
+hf-transformers = ["transformers == 4.48.3", "torch >= 2.0.0"]
 instructor = ["instructor >= 1.2.3"]
 litellm = ["litellm >= 1.30.0"]
 llama-cpp = ["llama-cpp-python >= 0.2.0"]
@@ -107,6 +107,16 @@ vision = ["Pillow >= 10.3.0"]  # To work with images.
 # minhash
 minhash = ["datasketch >= 1.6.5", "nltk>3.8.1"]
 
+sglang = ["sglang[all]>=0.4.3.post2", "transformers == 4.48.3"]
+
+[tool.hatch.envs.default]
+dependencies = [
+    "sglang[all]>=0.4.3.post2",
+    "transformers == 4.48.3",
+]
+installer = "pip"
+pip-args = ["--find-links", "https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python"]
+
 [project.urls]
 Documentation = "https://distilabel.argilla.io/"
 Issues = "https://github.com/argilla/distilabel/issues"
 
@@ -27,10 +27,11 @@
 from distilabel.models.embeddings.sentence_transformers import (
     SentenceTransformerEmbeddings,
 )
-from distilabel.models.embeddings.vllm import vLLMEmbeddings
+from distilabel.models.embeddings.vllm import SGLangEmbeddings, vLLMEmbeddings
 
 __all__ = [
     "Embeddings",
+    "SGLangEmbeddings",
     "SentenceTransformerEmbeddings",
     "vLLMEmbeddings",
 ]
@@ -37,6 +37,7 @@
 from distilabel.models.llms.moa import MixtureOfAgentsLLM
 from distilabel.models.llms.ollama import OllamaLLM
 from distilabel.models.llms.openai import OpenAILLM
+from distilabel.models.llms.sglang import ClientSGLang, SGLang
 from distilabel.models.llms.together import TogetherLLM
 from distilabel.models.llms.vertexai import VertexAILLM
 from distilabel.models.llms.vllm import ClientvLLM, vLLM
@@ -49,6 +50,7 @@
     "AnyscaleLLM",
     "AsyncLLM",
     "AzureOpenAILLM",
+    "ClientSGLang",
     "ClientvLLM",
     "CohereLLM",
     "CudaDevicePlacementMixin",
@@ -63,6 +65,7 @@
     "MlxLLM",
     "OllamaLLM",
     "OpenAILLM",
+    "SGLang",
     "TogetherLLM",
     "TransformersLLM",
     "VertexAILLM",
 
@@ -18,6 +18,7 @@
 from distilabel.models.embeddings.sentence_transformers import (
     SentenceTransformerEmbeddings,
 )
+from distilabel.models.embeddings.sglang import SGLangEmbeddings
 from distilabel.models.embeddings.vllm import vLLMEmbeddings
 from distilabel.models.image_generation.base import (
     AsyncImageGenerationModel,
@@ -41,6 +42,7 @@
 from distilabel.models.llms.moa import MixtureOfAgentsLLM
 from distilabel.models.llms.ollama import OllamaLLM
 from distilabel.models.llms.openai import OpenAILLM
+from distilabel.models.llms.sglang import ClientSGLang, SGLang
 from distilabel.models.llms.together import TogetherLLM
 from distilabel.models.llms.vertexai import VertexAILLM
 from distilabel.models.llms.vllm import ClientvLLM, vLLM
@@ -54,6 +56,7 @@
     "AsyncImageGenerationModel",
     "AsyncLLM",
     "AzureOpenAILLM",
+    "ClientSGLang",
     "ClientvLLM",
     "CohereLLM",
     "CudaDevicePlacementMixin",
@@ -73,6 +76,8 @@
     "OllamaLLM",
     "OpenAIImageGeneration",
     "OpenAILLM",
+    "SGLang",
+    "SGLangEmbeddings",
     "SentenceTransformerEmbeddings",
     "TogetherLLM",
     "TransformersLLM",
 
@@ -108,9 +108,7 @@ def load(self) -> None:  # noqa: C901
                     f"Model {self.model_id} is not currently deployed or is not running the TGI framework"
                 )
 
-            self.base_url = client._resolve_url(
-                model=self.model_id, task="text-generation"
-            )
+            self._base_url = client.base_url
 
         if self.endpoint_name is not None:
             client = get_inference_endpoint(
 
@@ -17,11 +17,13 @@
 from distilabel.models.embeddings.sentence_transformers import (
     SentenceTransformerEmbeddings,
 )
+from distilabel.models.embeddings.sglang import SGLangEmbeddings
 from distilabel.models.embeddings.vllm import vLLMEmbeddings
 
 __all__ = [
     "Embeddings",
     "LlamaCppEmbeddings",
+    "SGLangEmbeddings",
     "SentenceTransformerEmbeddings",
     "vLLMEmbeddings",
 ]
@@ -0,0 +1,125 @@
+# Copyright 2023-present, Argilla, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
+
+from pydantic import Field, PrivateAttr
+
+from distilabel.mixins.runtime_parameters import RuntimeParameter
+from distilabel.models.embeddings.base import Embeddings
+from distilabel.models.mixins.cuda_device_placement import CudaDevicePlacementMixin
+
+if TYPE_CHECKING:
+    from sglang import Engine
+
+
+class SGLangEmbeddings(Embeddings, CudaDevicePlacementMixin):
+    """`sglang` library implementation for embedding generation.
+
+    Attributes:
+        model: the model Hugging Face Hub repo id or a path to a directory containing the
+            model weights and configuration files.
+        dtype: the data type to use for the model. Defaults to `auto`.
+        trust_remote_code: whether to trust the remote code when loading the model. Defaults
+            to `False`.
+        quantization: the quantization mode to use for the model. Defaults to `None`.
+        revision: the revision of the model to load. Defaults to `None`.
+        seed: the seed to use for the random number generator. Defaults to `0`.
+        extra_kwargs: additional dictionary of keyword arguments that will be passed to the
+            `Engine` class of `sglang` library. Defaults to `{}`.
+        _model: the `SGLang` model instance. This attribute is meant to be used internally
+            and should not be accessed directly. It will be set in the `load` method.
+
+    References:
+        - https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/server_args.py
+
+    Examples:
+        Generating sentence embeddings:
+
+        ```python
+        if __name__ == "__main__":
+
+            from distilabel.models import SGLangEmbeddings
+            embeddings = SGLangEmbeddings(model="intfloat/e5-mistral-7b-instruct")
+            embeddings.load()
+            results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"])
+            print(results)
+            # [
+            #   [0.0203704833984375, -0.0060882568359375, ...],
+            #   [0.02398681640625, 0.0177001953125 ...],
+            # ]
+        ```
+    """
+
+    model: str
+    dtype: str = "auto"
+    trust_remote_code: bool = False
+    quantization: Optional[str] = None
+    revision: Optional[str] = None
+
+    seed: int = 0
+
+    extra_kwargs: Optional[RuntimeParameter[Dict[str, Any]]] = Field(
+        default_factory=dict,
+        description="Additional dictionary of keyword arguments that will be passed to the"
+        " `Engine` class of `sglang` library. See all the supported arguments at: "
+        "https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/entrypoints/engine.py",
+    )
+
+    _model: "Engine" = PrivateAttr(None)
+
+    def load(self) -> None:
+        """Loads the `sglang` model using either the path or the Hugging Face Hub repository id."""
+        super().load()
+
+        CudaDevicePlacementMixin.load(self)
+
+        try:
+            from sglang import Engine
+        except ImportError as err:
+            raise ImportError(
+                "sglang is not installed. Please install it with sglang document https://docs.sglang.ai/start/install.html."
+            ) from err
+
+        self._model = Engine(
+            model_path=self.model,
+            dtype=self.dtype,
+            trust_remote_code=self.trust_remote_code,
+            quantization=self.quantization,
+            revision=self.revision,
+            random_seed=self.seed,
+            **self.extra_kwargs,  # type: ignore
+        )
+
+    def unload(self) -> None:
+        """Unloads the `SGLang` model."""
+        self._model = None
+        CudaDevicePlacementMixin.unload(self)
+        super().unload()
+
+    @property
+    def model_name(self) -> str:
+        """Returns the name of the model."""
+        return self.model
+
+    def encode(self, inputs: List[str]) -> List[List[Union[int, float]]]:
+        """Generates embeddings for the provided inputs.
+
+        Args:
+            inputs: a list of texts for which an embedding has to be generated.
+
+        Returns:
+            The generated embeddings.
+        """
+        return [output["embedding"] for output in self._model.encode(inputs)]
@@ -26,6 +26,7 @@
 from distilabel.models.llms.moa import MixtureOfAgentsLLM
 from distilabel.models.llms.ollama import OllamaLLM
 from distilabel.models.llms.openai import OpenAILLM
+from distilabel.models.llms.sglang import ClientSGLang, SGLang
 from distilabel.models.llms.together import TogetherLLM
 from distilabel.models.llms.vertexai import VertexAILLM
 from distilabel.models.llms.vllm import ClientvLLM, vLLM
@@ -38,6 +39,7 @@
     "AnyscaleLLM",
     "AsyncLLM",
     "AzureOpenAILLM",
+    "ClientSGLang",
     "ClientvLLM",
     "CohereLLM",
     "CudaDevicePlacementMixin",
@@ -52,6 +54,7 @@
     "MlxLLM",
     "OllamaLLM",
     "OpenAILLM",
+    "SGLang",
     "TogetherLLM",
     "TransformersLLM",
     "VertexAILLM",
 
@@ -476,7 +476,7 @@ def _prepare_kwargs(
         Args:
             arguments: The arguments that would be passed to the LLM as **kwargs.
                 to update with the structured output configuration.
-            structured_outputs: The structured output configuration to update the arguments.
+            structured_output: The structured output configuration to update the arguments.
 
         Returns:
             kwargs updated with the special arguments used by `instructor`.
Original file line number	Diff line number	Diff line change
`@@ -59,15 +59,15 @@ class ExamQuestions(BaseModel):`
`59`	`59`	`name="load_instructions",`
`60`	`60`	`data=[`
`61`	`61`	`{`
`62`		`- "page": page.content,`
	`62`	`+ "instruction": page.content,`
`63`	`63`	`}`
`64`	`64`	`],`
`65`	`65`	`)`
`66`	`66`
`67`	`67`	`text_generation = TextGeneration(`
`68`	`68`	`name="exam_generation",`
`69`	`69`	`system_prompt=SYSTEM_PROMPT,`
`70`		`- template="Generate a list of answers and questions about the document. Document:\n\n{{ page }}",`
	`70`	`+ template="Generate a list of answers and questions about the document. Document:\n\n{{ instruction }}",`
`71`	`71`	`llm=InferenceEndpointsLLM(`
`72`	`72`	`model_id="meta-llama/Meta-Llama-3.1-8B-Instruct",`
`73`	`73`	`tokenizer_id="meta-llama/Meta-Llama-3.1-8B-Instruct",`
`@@ -95,4 +95,4 @@ class ExamQuestions(BaseModel):`
`95`	`95`	`},`
`96`	`96`	`use_cache=False,`
`97`	`97`	`)`
`98`		`- distiset.push_to_hub("USERNAME/exam_questions")`
	`98`	`+ # distiset.push_to_hub("USERNAME/exam_questions")`
Original file line number	Diff line number	Diff line change
`@@ -27,10 +27,11 @@`
`27`	`27`	`from distilabel.models.embeddings.sentence_transformers import (`
`28`	`28`	`SentenceTransformerEmbeddings,`
`29`	`29`	`)`
`30`		`-from distilabel.models.embeddings.vllm import vLLMEmbeddings`
	`30`	`+from distilabel.models.embeddings.vllm import SGLangEmbeddings, vLLMEmbeddings`
`31`	`31`
`32`	`32`	`__all__ = [`
`33`	`33`	`"Embeddings",`
	`34`	`+ "SGLangEmbeddings",`
`34`	`35`	`"SentenceTransformerEmbeddings",`
`35`	`36`	`"vLLMEmbeddings",`
`36`	`37`	`]`
Original file line number	Diff line number	Diff line change
`@@ -108,9 +108,7 @@ def load(self) -> None: # noqa: C901`
`108`	`108`	`f"Model {self.model_id} is not currently deployed or is not running the TGI framework"`
`109`	`109`	`)`
`110`	`110`
`111`		`- self.base_url = client._resolve_url(`
`112`		`- model=self.model_id, task="text-generation"`
`113`		`- )`
	`111`	`+ self._base_url = client.base_url`
`114`	`112`
`115`	`113`	`if self.endpoint_name is not None:`
`116`	`114`	`client = get_inference_endpoint(`
Original file line number	Diff line number	Diff line change
`@@ -17,11 +17,13 @@`
`17`	`17`	`from distilabel.models.embeddings.sentence_transformers import (`
`18`	`18`	`SentenceTransformerEmbeddings,`
`19`	`19`	`)`
	`20`	`+from distilabel.models.embeddings.sglang import SGLangEmbeddings`
`20`	`21`	`from distilabel.models.embeddings.vllm import vLLMEmbeddings`
`21`	`22`
`22`	`23`	`__all__ = [`
`23`	`24`	`"Embeddings",`
`24`	`25`	`"LlamaCppEmbeddings",`
	`26`	`+ "SGLangEmbeddings",`
`25`	`27`	`"SentenceTransformerEmbeddings",`
`26`	`28`	`"vLLMEmbeddings",`
`27`	`29`	`]`