Skip to content

Commit 0b35f96

Browse files
committed
Remove write buffer data if use_cache=False
1 parent 333e346 commit 0b35f96

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

Diff for: src/distilabel/pipeline/base.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import hashlib
1515
import logging
1616
import os
17+
import shutil
1718
import signal
1819
import threading
1920
import time
@@ -384,7 +385,7 @@ def run(
384385
stop_logging()
385386
return distiset
386387

387-
self._setup_write_buffer()
388+
self._setup_write_buffer(use_cache)
388389

389390
self._print_load_stages_info()
390391

@@ -856,11 +857,12 @@ def _invalidate_steps_cache_if_required(self) -> None:
856857
)
857858
break
858859

859-
def _setup_write_buffer(self) -> None:
860+
def _setup_write_buffer(self, use_cache: bool = True) -> None:
860861
"""Setups the `_WriteBuffer` that will store the data of the leaf steps of the
861862
pipeline while running, so the `Distiset` can be created at the end.
862863
"""
863-
# TODO: if `use_cache=False` we should not use previous parquet files
864+
if not use_cache and self._cache_location["data"].exists():
865+
shutil.rmtree(self._cache_location["data"])
864866
buffer_data_path = self._cache_location["data"] / constants.STEPS_OUTPUTS_PATH
865867
self._logger.info(f"📝 Pipeline data will be written to '{buffer_data_path}'")
866868
self._write_buffer = _WriteBuffer(

0 commit comments

Comments
 (0)