File tree 1 file changed +5
-3
lines changed
1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change 14
14
import hashlib
15
15
import logging
16
16
import os
17
+ import shutil
17
18
import signal
18
19
import threading
19
20
import time
@@ -384,7 +385,7 @@ def run(
384
385
stop_logging ()
385
386
return distiset
386
387
387
- self ._setup_write_buffer ()
388
+ self ._setup_write_buffer (use_cache )
388
389
389
390
self ._print_load_stages_info ()
390
391
@@ -856,11 +857,12 @@ def _invalidate_steps_cache_if_required(self) -> None:
856
857
)
857
858
break
858
859
859
- def _setup_write_buffer (self ) -> None :
860
+ def _setup_write_buffer (self , use_cache : bool = True ) -> None :
860
861
"""Setups the `_WriteBuffer` that will store the data of the leaf steps of the
861
862
pipeline while running, so the `Distiset` can be created at the end.
862
863
"""
863
- # TODO: if `use_cache=False` we should not use previous parquet files
864
+ if not use_cache :
865
+ shutil .rmtree (self ._cache_location ["data" ])
864
866
buffer_data_path = self ._cache_location ["data" ] / constants .STEPS_OUTPUTS_PATH
865
867
self ._logger .info (f"📝 Pipeline data will be written to '{ buffer_data_path } '" )
866
868
self ._write_buffer = _WriteBuffer (
You can’t perform that action at this time.
0 commit comments