Skip to content
This repository was archived by the owner on Apr 19, 2023. It is now read-only.

Commit d8287f5

Browse files
Enable reading n5 or zarr data in HDF5VolumeLoader
1 parent f0a979a commit d8287f5

File tree

3 files changed

+42
-20
lines changed

3 files changed

+42
-20
lines changed

inferno/io/volumetric/volume.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,17 @@ def __repr__(self):
125125

126126

127127
class HDF5VolumeLoader(VolumeLoader):
128+
129+
@staticmethod
130+
def is_h5(file_path):
131+
ext = os.path.splitext(file_path)[1].lower()
132+
if ext in ('.h5', '.hdf', '.hdf5'):
133+
return True
134+
elif ext in ('.zarr', '.zr', '.n5'):
135+
return False
136+
else:
137+
raise RuntimeError("Could not infer volume type for file extension %s" % ext)
138+
128139
def __init__(self, path, path_in_h5_dataset=None, data_slice=None, transforms=None,
129140
name=None, **slicing_config):
130141

@@ -163,11 +174,14 @@ def __init__(self, path, path_in_h5_dataset=None, data_slice=None, transforms=No
163174
assert 'window_size' in slicing_config_for_name
164175
assert 'stride' in slicing_config_for_name
165176

166-
# Read in volume from file
167-
volume = iou.fromh5(self.path, self.path_in_h5_dataset,
168-
dataslice=(tuple(self.data_slice)
169-
if self.data_slice is not None
170-
else None))
177+
# Read in volume from file (can be hdf5, n5 or zarr)
178+
dataslice_ = None if self.data_slice is None else tuple(self.data_slice)
179+
if self.is_h5(self.path):
180+
volume = iou.fromh5(self.path, self.path_in_h5_dataset,
181+
dataslice=dataslice_)
182+
else:
183+
volume = iou.fromz5(self.path, self.path_in_h5_dataset,
184+
dataslice=dataslice_)
171185
# Initialize superclass with the volume
172186
super(HDF5VolumeLoader, self).__init__(volume=volume, name=name, transforms=transforms,
173187
**slicing_config_for_name)

inferno/io/volumetric/volumetric_utils.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ def dimension_window(start, stop, wsize, stride, dimsize, ds_dim):
4949
stops = [dimsize - wsize if wsize != dimsize else dimsize
5050
for dimsize, wsize in zip(shape, window_size)]
5151

52-
assert all(stp > strt for strt, stp in zip(starts, stops)), "%s, %s" % (str(starts), str(stops))
52+
assert all(stp > strt for strt, stp in zip(starts, stops)),\
53+
"%s, %s" % (str(starts), str(stops))
5354
nslices = [dimension_window(start, stop, wsize, stride, dimsize, ds_dim)
5455
for start, stop, wsize, stride, dimsize, ds_dim
5556
in zip(starts, stops, window_size, strides, shape, ds)]

inferno/utils/io_utils.py

+21-14
Original file line numberDiff line numberDiff line change
@@ -13,29 +13,36 @@ def fromh5(path, datapath=None, dataslice=None, asnumpy=True, preptrain=None):
1313
"""
1414
# Check if path exists (thanks Lukas!)
1515
assert os.path.exists(path), "Path {} does not exist.".format(path)
16-
# Init file
17-
h5file = h5.File(path)
18-
# Init dataset
19-
h5dataset = h5file[datapath] if datapath is not None else h5file.values()[0]
20-
# Slice dataset
21-
h5dataset = h5dataset[dataslice] if dataslice is not None else h5dataset
22-
# Convert to numpy if required
23-
h5dataset = np.asarray(h5dataset) if asnumpy else h5dataset
24-
# Apply preptrain
25-
h5dataset = preptrain(h5dataset) if preptrain is not None else h5dataset
26-
# Close file
27-
h5file.close()
28-
# Return
16+
with h5.File(path, 'r') as f:
17+
# Init dataset
18+
h5dataset = f[datapath] if datapath is not None else f.values()[0]
19+
# Slice dataset
20+
h5dataset = h5dataset[dataslice] if dataslice is not None else h5dataset
21+
# Convert to numpy if required
22+
h5dataset = np.asarray(h5dataset) if asnumpy else h5dataset
23+
# Apply preptrain
24+
h5dataset = preptrain(h5dataset) if preptrain is not None else h5dataset
2925
return h5dataset
3026

3127

3228
# TODO we could also do **h5_kwargs instead
3329
def toh5(data, path, datapath='data', compression=None, chunks=None):
3430
"""Write `data` to a HDF5 volume."""
35-
with h5.File(path, 'w') as f:
31+
with h5.File(path) as f:
3632
f.create_dataset(datapath, data=data, compression=compression, chunks=chunks)
3733

3834

35+
def fromz5(path, datapath, dataslice=None, n_threads=8):
36+
# we import z5py only here because we don't want to assume that it's in the env
37+
import z5py
38+
assert os.path.exists(path), "Path {} does not exist.".format(path)
39+
with z5py.File(path) as f:
40+
ds = f[datapath]
41+
ds.n_threads = n_threads
42+
data = ds[:] if dataslice is None else ds[dataslice]
43+
return data
44+
45+
3946
# Yaml to dict reader
4047
def yaml2dict(path):
4148
if isinstance(path, dict):

0 commit comments

Comments
 (0)