Skip to content

Commit 7f0f516

Browse files
tomvdwThe TensorFlow Datasets Authors
authored and
The TensorFlow Datasets Authors
committed
Add option to write incomplete files to a subfolder
This can help with getting the dataset folder clean PiperOrigin-RevId: 695237951
1 parent 44b485a commit 7f0f516

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

tensorflow_datasets/core/utils/py_utils.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,17 +308,32 @@ def _tmp_file_prefix() -> str:
308308
return f'{constants.INCOMPLETE_PREFIX}{uuid.uuid4().hex}'
309309

310310

311-
def _tmp_file_name(path: epath.PathLike) -> epath.Path:
311+
def _tmp_file_name(
312+
path: epath.PathLike,
313+
subfolder: str | None = None,
314+
) -> epath.Path:
315+
"""Returns the temporary file name for the given path.
316+
317+
Args:
318+
path: The path to the file.
319+
subfolder: The subfolder to use. If None, then the parent of the path will
320+
be used.
321+
"""
312322
path = epath.Path(path)
313-
return path.parent / f'{_tmp_file_prefix()}.{path.name}'
323+
file_name = f'{_tmp_file_prefix()}.{path.name}'
324+
if subfolder:
325+
return path.parent / subfolder / file_name
326+
else:
327+
return path.parent / file_name
314328

315329

316330
@contextlib.contextmanager
317331
def incomplete_file(
318332
path: epath.Path,
333+
subfolder: str | None = None,
319334
) -> Iterator[epath.Path]:
320335
"""Writes to path atomically, by writing to temp file and renaming it."""
321-
tmp_path = _tmp_file_name(path)
336+
tmp_path = _tmp_file_name(path, subfolder=subfolder)
322337
try:
323338
yield tmp_path
324339
tmp_path.replace(path)

tensorflow_datasets/core/utils/py_utils_test.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
# limitations under the License.
1515

1616
import collections
17+
import os
1718
import pathlib
19+
from unittest import mock
1820

1921
from etils import epath
2022
import pytest
@@ -370,5 +372,17 @@ def test_make_valid_name(name: str, expected: str):
370372
assert py_utils.make_valid_name(name) == expected
371373

372374

375+
@pytest.mark.parametrize(
376+
['path', 'subfolder', 'expected'],
377+
[
378+
('/a/file.ext', None, '/a/foobar.file.ext'),
379+
('/a/file.ext', 'sub', '/a/sub/foobar.file.ext'),
380+
],
381+
)
382+
def test_tmp_file_name(path, subfolder, expected):
383+
with mock.patch.object(py_utils, '_tmp_file_prefix', return_value='foobar'):
384+
assert os.fspath(py_utils._tmp_file_name(path, subfolder)) == expected
385+
386+
373387
if __name__ == '__main__':
374388
tf.test.main()

0 commit comments

Comments
 (0)