Skip to content

Commit 42ffa0f

Browse files
tomvdwThe TensorFlow Datasets Authors
authored and
The TensorFlow Datasets Authors
committed
Improve DatasetInfo loading error message
FileNotFoundError doesn't make much sense, because there can be different causes why the file cannot be loaded. PiperOrigin-RevId: 736818093
1 parent 50ae145 commit 42ffa0f

File tree

4 files changed

+25
-18
lines changed

4 files changed

+25
-18
lines changed

tensorflow_datasets/core/dataset_info.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -702,20 +702,12 @@ def read_from_directory(self, dataset_info_dir: epath.PathLike) -> None:
702702
be the root directory of a specific dataset version.
703703
704704
Raises:
705-
FileNotFoundError: If the dataset_info.json can't be found.
705+
DatasetInfoFileError: If the dataset_info.json can't be read.
706706
"""
707707
logging.info("Load dataset info from %s", dataset_info_dir)
708708

709709
# Load the metadata from disk
710-
try:
711-
parsed_proto = read_from_json(dataset_info_path(dataset_info_dir))
712-
except Exception as e:
713-
raise FileNotFoundError(
714-
"Tried to load `DatasetInfo` from a directory which does not exist or"
715-
" does not contain `dataset_info.json`. Please delete the directory "
716-
f"`{dataset_info_dir}` if you are trying to re-generate the "
717-
"dataset."
718-
) from e
710+
parsed_proto = read_from_json(dataset_info_path(dataset_info_dir))
719711

720712
if str(self.version) != parsed_proto.version:
721713
raise AssertionError(
@@ -1128,12 +1120,14 @@ def read_from_json(path: epath.PathLike) -> dataset_info_pb2.DatasetInfo:
11281120
the DatasetInfo proto.
11291121
11301122
Raises:
1131-
FileNotFoundError: If the builder_dir does not exist.
1123+
DatasetInfoFileError: If the dataset info file cannot be read.
11321124
"""
11331125
try:
11341126
json_str = epath.Path(path).read_text()
11351127
except OSError as e:
1136-
raise FileNotFoundError(f"Could not load dataset info from {path}") from e
1128+
raise DatasetInfoFileError(
1129+
f"Could not read dataset info from {path}"
1130+
) from e
11371131
# Parse it back into a proto.
11381132
parsed_proto = json_format.Parse(json_str, dataset_info_pb2.DatasetInfo())
11391133
return parsed_proto
@@ -1151,7 +1145,7 @@ def read_proto_from_builder_dir(
11511145
The DatasetInfo proto as read from the builder dir.
11521146
11531147
Raises:
1154-
FileNotFoundError: If the builder_dir does not exist.
1148+
DatasetInfoFileError: If the dataset info file cannot be read.
11551149
"""
11561150
builder_dir = epath.Path(builder_dir).expanduser()
11571151
info_path = builder_dir / constants.DATASET_INFO_FILENAME
@@ -1173,8 +1167,7 @@ def read_full_proto_from_builder_dir(
11731167
dir.
11741168
11751169
Raises:
1176-
FileNotFoundError: If the builder_dir does not exist or it doesn't contain
1177-
dataset_info.json.
1170+
DatasetInfoFileError: If the dataset info file cannot be read.
11781171
"""
11791172
builder_dir = epath.Path(builder_dir).expanduser()
11801173
info_path = builder_dir / constants.DATASET_INFO_FILENAME
@@ -1513,3 +1506,12 @@ def save_metadata(self, data_dir):
15131506
self[key] = json.load(f)
15141507
self._tempdir.rmtree()
15151508
super(BeamMetadataDict, self).save_metadata(data_dir)
1509+
1510+
1511+
class DatasetInfoFileError(OSError):
1512+
"""Raised when the dataset info file cannot be read.
1513+
1514+
We use a custom exception rather than native exceptions, because different
1515+
backend of etils.epath will throw different exceptions. This exception
1516+
catches them all.
1517+
"""

tensorflow_datasets/core/dataset_info_test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ def tearDownClass(cls):
9797
def test_non_existent_dir(self):
9898
info = dataset_info.DatasetInfo(builder=self._builder)
9999
with self.assertRaisesWithPredicateMatch(
100-
FileNotFoundError, "from a directory which does not exist"
100+
dataset_info.DatasetInfoFileError,
101+
"Could not read dataset info from",
101102
):
102103
info.read_from_directory(_NON_EXISTENT_DIR)
103104

tensorflow_datasets/core/read_only_builder_test.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from tensorflow_datasets import testing
2929
from tensorflow_datasets.core import constants
3030
from tensorflow_datasets.core import dataset_builder
31+
from tensorflow_datasets.core import dataset_info as dataset_info_lib
3132
from tensorflow_datasets.core import dataset_utils
3233
from tensorflow_datasets.core import load
3334
from tensorflow_datasets.core import proto
@@ -275,7 +276,10 @@ def test_restore_blocked_versions(
275276

276277

277278
def test_builder_from_directory_dir_not_exists(tmp_path: pathlib.Path):
278-
with pytest.raises(FileNotFoundError, match='Could not load dataset info'):
279+
with pytest.raises(
280+
dataset_info_lib.DatasetInfoFileError,
281+
match='Could not read dataset info from',
282+
):
279283
read_only_builder.builder_from_directory(tmp_path)
280284

281285

tensorflow_datasets/core/sequential_writer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def __init__(
256256
f'Trying to append a dataset with name {ds_info.name}'
257257
f' to an existing dataset with name {self._ds_info.name}'
258258
)
259-
except FileNotFoundError:
259+
except dataset_info.DatasetInfoFileError:
260260
self._ds_info.set_file_format(
261261
file_format=self._file_format,
262262
# if it was set, we want this to fail to warn the user

0 commit comments

Comments
 (0)