diff --git a/dascore/data_registry.txt b/dascore/data_registry.txt index b8143f39..f4deb970 100644 --- a/dascore/data_registry.txt +++ b/dascore/data_registry.txt @@ -17,3 +17,4 @@ brady_hs_DAS_DTS_coords.csv b2e766136aac6516ddbb757d7dc26a8df0d5de48af03c8be769c dispersion_event.h5 598c8baa2a5610c930e1c003f2ba02da13f8d8686e3ccf2a034e94bfc5e1990c https://github.com/dasdae/test_data/raw/master/das/dispersion_event.h5 PoroTomo_iDAS_1.h5 967a2885e79937ac0426b2022a9c03d5f24790ecf3abbaa9a16eb28055566fc6 https://github.com/dasdae/test_data/raw/master/das/PoroTomo_iDAS_1.h5 DASDMSShot00_20230328155653619.das 12ac53f78b32d8b0e32cc674c43ff5b4c79a6c8b19de2ad577fd481679b2b7b3 https://github.com/dasdae/test_data/raw/master/das/DASDMSShot00_20230328155653619.das +opto_das_1.hdf5 0437d1f02d93c9f00d31133388efaf6a28c21883bcfac457b97f1224464c7dca https://github.com/dasdae/test_data/raw/master/das/opto_das_1.hdf5 diff --git a/dascore/io/optodas/__init__.py b/dascore/io/optodas/__init__.py new file mode 100644 index 00000000..2076107f --- /dev/null +++ b/dascore/io/optodas/__init__.py @@ -0,0 +1,9 @@ +""" +Support for OptoDAS format. + +This is used by the OptoDAS interrogator made by Alcatel Submarine Networks. + +More info here: https://web.asn.com/ +""" +from __future__ import annotations +from .core import OptoDASV8 diff --git a/dascore/io/optodas/core.py b/dascore/io/optodas/core.py new file mode 100644 index 00000000..2d70aef6 --- /dev/null +++ b/dascore/io/optodas/core.py @@ -0,0 +1,66 @@ +"""IO module for reading OptoDAS data.""" +from __future__ import annotations + +import numpy as np + +import dascore as dc +from dascore.constants import opt_timeable_types +from dascore.io import FiberIO +from dascore.utils.hdf5 import H5Reader +from dascore.utils.models import UnitQuantity, UTF8Str + +from .utils import _get_opto_das_attrs, _get_opto_das_version_str, _read_opto_das + + +class OptoDASPatchAttrs(dc.PatchAttrs): + """Patch attrs for OptoDAS.""" + + gauge_length: float = np.NaN + gauge_length_units: UnitQuantity | None = None + schema_version: UTF8Str = "" + + +class OptoDASV8(FiberIO): + """Support for OptoDAS V 8.""" + + name = "OptoDAS" + preferred_extensions = ("hdf5", "h5") + version = "8" + + def get_format(self, resource: H5Reader) -> tuple[str, str] | bool: + """ + Return True if file contains OptoDAS version 8 data else False. + + Parameters + ---------- + resource + A path to the file which may contain terra15 data. + """ + version_str = _get_opto_das_version_str(resource) + if version_str: + return self.name, version_str + + def scan(self, resource: H5Reader) -> list[dc.PatchAttrs]: + """Scan a OptoDAS file, return summary information about the file's contents.""" + file_version = _get_opto_das_version_str(resource) + extras = { + "path": resource.filename, + "file_format": self.name, + "file_version": str(file_version), + } + attrs = _get_opto_das_attrs(resource) + attrs.update(extras) + return [OptoDASPatchAttrs(**attrs)] + + def read( + self, + resource: H5Reader, + time: tuple[opt_timeable_types, opt_timeable_types] | None = None, + distance: tuple[float | None, float | None] | None = None, + **kwargs, + ) -> dc.BaseSpool: + """Read a OptoDAS spool of patches.""" + patches = _read_opto_das( + resource, time=time, distance=distance, attr_cls=OptoDASPatchAttrs + ) + return dc.spool(patches) diff --git a/dascore/io/optodas/utils.py b/dascore/io/optodas/utils.py new file mode 100644 index 00000000..cd56f455 --- /dev/null +++ b/dascore/io/optodas/utils.py @@ -0,0 +1,104 @@ +"""Utilities for terra15.""" +from __future__ import annotations + +import dascore as dc +import dascore.core +from dascore.constants import VALID_DATA_TYPES +from dascore.core.coords import get_coord +from dascore.utils.hdf5 import unpack_scalar_h5_dataset +from dascore.utils.misc import maybe_get_attrs, unbyte + +# --- Getting format/version + + +def _get_opto_das_version_str(hdf_fi) -> str: + """Return the version string for OptoDAS file.""" + # define a few root attrs that act as a "fingerprint" + expected_attrs = ( + "acqSpec", + "header", + "cableSpec", + "data", + "fileVersion", + ) + if not all([x in hdf_fi for x in expected_attrs]): + return "" + version_str = str(unbyte(hdf_fi["fileVersion"][()])) + return version_str + + +def _get_coord_manager(header): + """Get the distance ranges and spacing.""" + dims = tuple(unbyte(x) for x in header["dimensionNames"]) + units = tuple(unbyte(x) for x in header["dimensionUnits"]) + + coords = {} + for index, (dim, unit) in enumerate(zip(dims, units)): + crange = header["dimensionRanges"][f"dimension{index}"] + step = unpack_scalar_h5_dataset(crange["unitScale"]) + + # special case for time. + if dim == "time": + step = dc.to_timedelta64(step) + t1 = dc.to_datetime64(unpack_scalar_h5_dataset(header["time"])) + start = t1 + unpack_scalar_h5_dataset(crange["min"]) * step + stop = t1 + (unpack_scalar_h5_dataset(crange["max"]) + 1) * step + else: + # The min/max values appear to be int ranges so we need to + # multiply by step. + start = unpack_scalar_h5_dataset(crange["min"]) * step + stop = (unpack_scalar_h5_dataset(crange["max"]) + 1) * step + + coords[dim] = get_coord(min=start, max=stop, step=step, units=unit) + return dascore.core.get_coord_manager(coords=coords, dims=dims) + + +def _get_data_unit_and_type(node): + """Get the data type and units.""" + attrs = node._v_attrs + attr_map = { + "RawDescription": "data_type", + "RawDataUnit": "data_units", + } + out = maybe_get_attrs(attrs, attr_map) + if (data_type := out.get("data_type")) is not None: + clean = data_type.lower().replace(" ", "_") + out["data_type"] = clean if clean in VALID_DATA_TYPES else "" + return out + + +def _get_attr_dict(header): + """Map header info to DAS attrs.""" + attr_map = { + "gaugeLength": "gauge_length", + "unit": "data_units", + "instrument": "intrument_id", + "experiment": "acquisition_id", + } + out = {} + for head_name, attr_name in attr_map.items(): + value = header[head_name] + if hasattr(value, "shape"): + value = unpack_scalar_h5_dataset(value) + out[attr_name] = unbyte(value) + return out + + +def _get_opto_das_attrs(fi) -> dict: + """Scan a OptoDAS file, return metadata.""" + header = fi["header"] + cm = _get_coord_manager(header) + attrs = _get_attr_dict(header) + attrs["coords"] = cm + return attrs + + +def _read_opto_das(fi, distance=None, time=None, attr_cls=dc.PatchAttrs): + """Read the OptoDAS values into a patch.""" + attrs = _get_opto_das_attrs(fi) + data_node = fi["data"] + coords = attrs.pop("coords") + cm, data = coords.select(array=data_node, distance=distance, time=time) + attrs["coords"] = cm.to_summary_dict() + attrs["dims"] = cm.dims + return [dc.Patch(data=data, coords=cm, attrs=attr_cls(**attrs))] diff --git a/dascore/io/prodml/core.py b/dascore/io/prodml/core.py index 63b0295b..e2f48e03 100644 --- a/dascore/io/prodml/core.py +++ b/dascore/io/prodml/core.py @@ -31,12 +31,12 @@ class ProdMLV2_0(FiberIO): # noqa def get_format(self, resource: PyTablesReader) -> tuple[str, str] | bool: """ - Return True if file contains terra15 version 2 data else False. + Return True if file contains prodML version 2 data else False. Parameters ---------- resource - A path to the file which may contain terra15 data. + A path to the file which may contain prodML data. """ version_str = _get_prodml_version_str(resource) if version_str: diff --git a/dascore/io/prodml/utils.py b/dascore/io/prodml/utils.py index efcab3cb..60f723bd 100644 --- a/dascore/io/prodml/utils.py +++ b/dascore/io/prodml/utils.py @@ -1,4 +1,4 @@ -"""Utilities for terra15.""" +"""Utilities for prodML.""" from __future__ import annotations import dascore as dc @@ -12,7 +12,7 @@ def _get_prodml_version_str(hdf_fi) -> str: """Return the version string for prodml file.""" - # define a few root attrs that act as a "fingerprint" for terra15 files + # define a few root attrs that act as a "fingerprint" for prodML files acquisition = getattr(hdf_fi.root, "Acquisition", None) if acquisition is None: diff --git a/dascore/utils/hdf5.py b/dascore/utils/hdf5.py index 50ab9898..5c3c0016 100644 --- a/dascore/utils/hdf5.py +++ b/dascore/utils/hdf5.py @@ -456,3 +456,15 @@ class H5Writer(H5Reader): # used in new code. HDF5Writer = PyTablesWriter HDF5Reader = PyTablesReader + + +def unpack_scalar_h5_dataset(dataset): + """ + Unpack a scalar H5Py dataset. + """ + assert dataset.size == 1 + # This gets weird because datasets can be of shape () or (1,). + value = dataset[()] + if isinstance(value, np.ndarray): + value = value[0] + return value diff --git a/tests/test_io/test_common_io.py b/tests/test_io/test_common_io.py index 8aaeffcc..a8e2c9ff 100644 --- a/tests/test_io/test_common_io.py +++ b/tests/test_io/test_common_io.py @@ -24,6 +24,7 @@ from dascore.io.dasdae import DASDAEV1 from dascore.io.dashdf5 import DASHDF5 from dascore.io.h5simple import H5Simple +from dascore.io.optodas import OptoDASV8 from dascore.io.pickle import PickleIO from dascore.io.prodml import ProdMLV2_0, ProdMLV2_1 from dascore.io.segy import SegyV2 @@ -47,6 +48,7 @@ # See the docs on adding a new IO format, in the contributing section, # for more details. COMMON_IO_READ_TESTS = { + OptoDASV8(): ("opto_das_1.hdf5",), DASDAEV1(): ("example_dasdae_event_1.h5",), H5Simple(): ("h5_simple_2.h5", "h5_simple_1.h5"), ProdMLV2_0(): ("prodml_2.0.h5", "opta_sense_quantx_v2.h5"), @@ -265,7 +267,7 @@ def test_slice_single_dim_both_ends(self, io_path_tuple): stop = getattr(attrs_init, f"{dim}_max") duration = stop - start # first test double ended query - trim_tuple = (start + duration / 10, start + 2 * duration // 10) + trim_tuple = (start + duration / 10, start + 2 * duration / 10) spool = io.read(path, **{dim: trim_tuple}) assert len(spool) == 1 patch = spool[0]