Create file per hostname in CudaDevicePlacementMixin (#814)

gabrielmbmb · web-flow · commit 25601bb3ee9b · 2024-07-27T20:58:40.000+02:00
* Create file per hostname

* Set default `_desired_num_gpus` to `1`

* Fix `GeneratorTask`s not getting assigned gpus and name

* Add `_init_cuda_device_placement` method

* Remove info message

* Add disabling `CudaDevicePlacementMixin` if `RayPipeline`

* Fix unit test
diff --git a/src/distilabel/llms/mixins/cuda_device_placement.py b/src/distilabel/llms/mixins/cuda_device_placement.py
@@ -15,6 +15,7 @@
 import json
 import logging
 import os
+import socket
 import tempfile
 from contextlib import contextmanager
 from pathlib import Path
@@ -26,7 +27,11 @@
 from distilabel.mixins.runtime_parameters import RuntimeParameter
 
 _CUDA_DEVICE_PLACEMENT_MIXIN_FILE = (
-    Path(tempfile.gettempdir()) / "distilabel_cuda_device_placement_mixin.json"
+    Path(tempfile.gettempdir())
+    / "distilabel"
+    / "cuda_device_placement"
+    / socket.gethostname()
+    / "distilabel_cuda_device_placement_mixin.json"
 )
 
 
@@ -43,6 +48,8 @@ class CudaDevicePlacementMixin(BaseModel):
             placement information provided in `_device_llm_placement_map`. If set to a list
             of devices, it will be checked if the devices are available to be used by the
             `LLM`. If not, a warning will be logged.
+        disable_cuda_device_placement: Whether to disable the CUDA device placement logic
+            or not. Defaults to `False`.
         _llm_identifier: the identifier of the `LLM` to be used as key in `_device_llm_placement_map`.
         _device_llm_placement_map: a dictionary with the device placement information for each
             `LLM`.
@@ -51,6 +58,10 @@ class CudaDevicePlacementMixin(BaseModel):
     cuda_devices: RuntimeParameter[Union[List[int], Literal["auto"]]] = Field(
         default="auto", description="A list with the ID of the CUDA devices to be used."
     )
+    disable_cuda_device_placement: RuntimeParameter[bool] = Field(
+        default=False,
+        description="Whether to disable the CUDA device placement logic or not.",
+    )
 
     _llm_identifier: Union[str, None] = PrivateAttr(default=None)
     _desired_num_gpus: PositiveInt = PrivateAttr(default=1)
@@ -63,6 +74,9 @@ def load(self) -> None:
         """Assign CUDA devices to the LLM based on the device placement information provided
         in `_device_llm_placement_map`."""
 
+        if self.disable_cuda_device_placement:
+            return
+
         try:
             import pynvml
 
@@ -88,6 +102,9 @@ def load(self) -> None:
     def unload(self) -> None:
         """Unloads the LLM and removes the CUDA devices assigned to it from the device
         placement information provided in `_device_llm_placement_map`."""
+        if self.disable_cuda_device_placement:
+            return
+
         with self._device_llm_placement_map() as device_map:
             if self._llm_identifier in device_map:
                 self._logger.debug(  # type: ignore
@@ -105,6 +122,7 @@ def _device_llm_placement_map(self) -> Generator[Dict[str, List[int]], None, Non
         Yields:
             The content of the device placement file.
         """
+        _CUDA_DEVICE_PLACEMENT_MIXIN_FILE.parent.mkdir(parents=True, exist_ok=True)
         _CUDA_DEVICE_PLACEMENT_MIXIN_FILE.touch()
         with portalocker.Lock(
             _CUDA_DEVICE_PLACEMENT_MIXIN_FILE,
diff --git a/src/distilabel/pipeline/local.py b/src/distilabel/pipeline/local.py
@@ -233,6 +233,7 @@ def _run_step(self, step: "_Step", input_queue: "Queue[Any]", replica: int) -> N
             output_queue=self._output_queue,
             load_queue=self._load_queue,
             dry_run=self._dry_run,
+            ray_pipeline=False,
         )
 
         self._pool.apply_async(step_wrapper.run, error_callback=self._error_callback)
diff --git a/src/distilabel/pipeline/ray.py b/src/distilabel/pipeline/ray.py
@@ -235,6 +235,7 @@ def run(self) -> str:
                 output_queue=self._output_queue,
                 load_queue=self._load_queue,
                 dry_run=self._dry_run,
+                ray_pipeline=True,
             ),
             log_queue=self._log_queue,
         )
diff --git a/src/distilabel/pipeline/step_wrapper.py b/src/distilabel/pipeline/step_wrapper.py
@@ -21,7 +21,7 @@
 from distilabel.pipeline.constants import LAST_BATCH_SENT_FLAG
 from distilabel.pipeline.typing import StepLoadStatus
 from distilabel.steps.base import GeneratorStep, Step, _Step
-from distilabel.steps.tasks.base import Task
+from distilabel.steps.tasks.base import _Task
 
 
 class _StepWrapper:
@@ -44,6 +44,7 @@ def __init__(
         output_queue: "Queue[_Batch]",
         load_queue: "Queue[Union[StepLoadStatus, None]]",
         dry_run: bool = False,
+        ray_pipeline: bool = False,
     ) -> None:
         """Initializes the `_ProcessWrapper`.
 
@@ -54,21 +55,32 @@ def __init__(
             load_queue: The queue used to notify the main process that the step has been
                 loaded, has been unloaded or has failed to load.
             dry_run: Flag to ensure we are forcing to run the last batch.
+            ray_pipeline: Whether the step is running a `RayPipeline` or not.
         """
         self.step = step
         self.replica = replica
         self.input_queue = input_queue
         self.output_queue = output_queue
         self.load_queue = load_queue
-        self._dry_run = dry_run
+        self.dry_run = dry_run
+        self.ray_pipeline = ray_pipeline
 
+        self._init_cuda_device_placement()
+
+    def _init_cuda_device_placement(self) -> None:
+        """Sets the LLM identifier and the number of desired GPUs of the `CudaDevicePlacementMixin`
+        if the step is a `_Task` that uses an `LLM` with CUDA capabilities."""
         if (
-            isinstance(self.step, Task)
+            isinstance(self.step, _Task)
             and hasattr(self.step, "llm")
             and isinstance(self.step.llm, CudaDevicePlacementMixin)
         ):
-            self.step.llm._llm_identifier = self.step.name
-            self.step.llm._desired_num_gpus = self.step.resources.gpus
+            if self.ray_pipeline:
+                self.step.llm.disable_cuda_device_placement = True
+            else:
+                desired_num_gpus = self.step.resources.gpus or 1
+                self.step.llm._llm_identifier = self.step.name
+                self.step.llm._desired_num_gpus = desired_num_gpus
 
     def run(self) -> str:
         """The target function executed by the process. This function will also handle
@@ -156,7 +168,7 @@ def _generator_step_process_loop(self) -> None:
 
             for data, last_batch in step.process_applying_mappings(offset=offset):
                 batch.set_data([data])
-                batch.last_batch = self._dry_run or last_batch
+                batch.last_batch = self.dry_run or last_batch
                 self._send_batch(batch)
 
                 if batch.last_batch:
diff --git a/tests/unit/pipeline/test_local.py b/tests/unit/pipeline/test_local.py
@@ -55,6 +55,7 @@ def test_run_steps(self, step_wrapper_mock: mock.MagicMock) -> None:
                     output_queue=pipeline._output_queue,
                     load_queue=pipeline._load_queue,
                     dry_run=False,
+                    ray_pipeline=False,
                 ),
                 mock.call(
                     step=dummy_step_1,
@@ -63,6 +64,7 @@ def test_run_steps(self, step_wrapper_mock: mock.MagicMock) -> None:
                     output_queue=pipeline._output_queue,
                     load_queue=pipeline._load_queue,
                     dry_run=False,
+                    ray_pipeline=False,
                 ),
                 mock.call(
                     step=dummy_step_1,
@@ -71,6 +73,7 @@ def test_run_steps(self, step_wrapper_mock: mock.MagicMock) -> None:
                     output_queue=pipeline._output_queue,
                     load_queue=pipeline._load_queue,
                     dry_run=False,
+                    ray_pipeline=False,
                 ),
                 mock.call(
                     step=dummy_step_2,
@@ -79,6 +82,7 @@ def test_run_steps(self, step_wrapper_mock: mock.MagicMock) -> None:
                     output_queue=pipeline._output_queue,
                     load_queue=pipeline._load_queue,
                     dry_run=False,
+                    ray_pipeline=False,
                 ),
             ],
         )
diff --git a/tests/unit/steps/tasks/structured_outputs/test_outlines.py b/tests/unit/steps/tasks/structured_outputs/test_outlines.py
@@ -59,6 +59,7 @@ class DummyUserTest(BaseModel):
     "device_map": None,
     "token": None,
     "use_magpie_template": False,
+    "disable_cuda_device_placement": False,
     "type_info": {
         "module": "distilabel.llms.huggingface.transformers",
         "name": "TransformersLLM",
@@ -85,6 +86,7 @@ class DummyUserTest(BaseModel):
     "device_map": None,
     "token": None,
     "use_magpie_template": False,
+    "disable_cuda_device_placement": False,
     "type_info": {
         "module": "distilabel.llms.huggingface.transformers",
         "name": "TransformersLLM",

Original file line number	Diff line number	Diff line change
`@@ -233,6 +233,7 @@ def _run_step(self, step: "_Step", input_queue: "Queue[Any]", replica: int) -> N`
`233`	`233`	`output_queue=self._output_queue,`
`234`	`234`	`load_queue=self._load_queue,`
`235`	`235`	`dry_run=self._dry_run,`
	`236`	`+ ray_pipeline=False,`
`236`	`237`	`)`
`237`	`238`
`238`	`239`	`self._pool.apply_async(step_wrapper.run, error_callback=self._error_callback)`
Original file line number	Diff line number	Diff line change
`@@ -235,6 +235,7 @@ def run(self) -> str:`
`235`	`235`	`output_queue=self._output_queue,`
`236`	`236`	`load_queue=self._load_queue,`
`237`	`237`	`dry_run=self._dry_run,`
	`238`	`+ ray_pipeline=True,`
`238`	`239`	`),`
`239`	`240`	`log_queue=self._log_queue,`
`240`	`241`	`)`