add OptoDAS support (#347)

* add optoDAS support * remove dead code, add entry point * add data category as DAS
DASDAE · Mar 15, 2024 · 38518fb · 38518fb
1 parent 1d49530
commit 38518fb
Show file tree

Hide file tree

Showing 9 changed files with 185 additions and 5 deletions.
diff --git a/dascore/data_registry.txt b/dascore/data_registry.txt
@@ -17,4 +17,5 @@ brady_hs_DAS_DTS_coords.csv b2e766136aac6516ddbb757d7dc26a8df0d5de48af03c8be769c
 dispersion_event.h5 598c8baa2a5610c930e1c003f2ba02da13f8d8686e3ccf2a034e94bfc5e1990c https://github.com/dasdae/test_data/raw/master/das/dispersion_event.h5
 PoroTomo_iDAS_1.h5 967a2885e79937ac0426b2022a9c03d5f24790ecf3abbaa9a16eb28055566fc6 https://github.com/dasdae/test_data/raw/master/das/PoroTomo_iDAS_1.h5
 DASDMSShot00_20230328155653619.das 12ac53f78b32d8b0e32cc674c43ff5b4c79a6c8b19de2ad577fd481679b2b7b3 https://github.com/dasdae/test_data/raw/master/das/DASDMSShot00_20230328155653619.das
+opto_das_1.hdf5 0437d1f02d93c9f00d31133388efaf6a28c21883bcfac457b97f1224464c7dca https://github.com/dasdae/test_data/raw/master/das/opto_das_1.hdf5
 whale_1.hdf5 a09922969e740307bf26dc6ffa7fb9fbb834dc7cd7d4ced02c66b159fb1ce0cd http://piweb.ooirsn.uw.edu/das/data/Optasense/NorthCable/TransmitFiber/North-C1-LR-P1kHz-GL50m-Sp2m-FS200Hz_2021-11-03T15_06_51-0700/North-C1-LR-P1kHz-GL50m-Sp2m-FS200Hz_2021-11-04T020002Z.h5
diff --git a/dascore/io/optodas/__init__.py b/dascore/io/optodas/__init__.py
@@ -0,0 +1,9 @@
+"""
+Support for OptoDAS format.
+
+This is used by the OptoDAS interrogator made by Alcatel Submarine Networks.
+
+More info here: https://web.asn.com/
+"""
+from __future__ import annotations
+from .core import OptoDASV8
diff --git a/dascore/io/optodas/core.py b/dascore/io/optodas/core.py
@@ -0,0 +1,66 @@
+"""IO module for reading OptoDAS data."""
+from __future__ import annotations
+
+import numpy as np
+
+import dascore as dc
+from dascore.constants import opt_timeable_types
+from dascore.io import FiberIO
+from dascore.utils.hdf5 import H5Reader
+from dascore.utils.models import UnitQuantity, UTF8Str
+
+from .utils import _get_opto_das_attrs, _get_opto_das_version_str, _read_opto_das
+
+
+class OptoDASPatchAttrs(dc.PatchAttrs):
+    """Patch attrs for OptoDAS."""
+
+    gauge_length: float = np.NaN
+    gauge_length_units: UnitQuantity | None = None
+    schema_version: UTF8Str = ""
+
+
+class OptoDASV8(FiberIO):
+    """Support for OptoDAS V 8."""
+
+    name = "OptoDAS"
+    preferred_extensions = ("hdf5", "h5")
+    version = "8"
+
+    def get_format(self, resource: H5Reader) -> tuple[str, str] | bool:
+        """
+        Return True if file contains OptoDAS version 8 data else False.
+
+        Parameters
+        ----------
+        resource
+            A path to the file which may contain terra15 data.
+        """
+        version_str = _get_opto_das_version_str(resource)
+        if version_str:
+            return self.name, version_str
+
+    def scan(self, resource: H5Reader) -> list[dc.PatchAttrs]:
+        """Scan a OptoDAS file, return summary information about the file's contents."""
+        file_version = _get_opto_das_version_str(resource)
+        extras = {
+            "path": resource.filename,
+            "file_format": self.name,
+            "file_version": str(file_version),
+        }
+        attrs = _get_opto_das_attrs(resource)
+        attrs.update(extras)
+        return [OptoDASPatchAttrs(**attrs)]
+
+    def read(
+        self,
+        resource: H5Reader,
+        time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
+        distance: tuple[float | None, float | None] | None = None,
+        **kwargs,
+    ) -> dc.BaseSpool:
+        """Read a OptoDAS spool of patches."""
+        patches = _read_opto_das(
+            resource, time=time, distance=distance, attr_cls=OptoDASPatchAttrs
+        )
+        return dc.spool(patches)
diff --git a/dascore/io/optodas/utils.py b/dascore/io/optodas/utils.py
@@ -0,0 +1,89 @@
+"""Utilities for terra15."""
+from __future__ import annotations
+
+import dascore as dc
+import dascore.core
+from dascore.core.coords import get_coord
+from dascore.utils.hdf5 import unpack_scalar_h5_dataset
+from dascore.utils.misc import unbyte
+
+# --- Getting format/version
+
+
+def _get_opto_das_version_str(hdf_fi) -> str:
+    """Return the version string for OptoDAS file."""
+    # define a few root attrs that act as a "fingerprint"
+    expected_attrs = (
+        "acqSpec",
+        "header",
+        "cableSpec",
+        "data",
+        "fileVersion",
+    )
+    if not all([x in hdf_fi for x in expected_attrs]):
+        return ""
+    version_str = str(unbyte(hdf_fi["fileVersion"][()]))
+    return version_str
+
+
+def _get_coord_manager(header):
+    """Get the distance ranges and spacing."""
+    dims = tuple(unbyte(x) for x in header["dimensionNames"])
+    units = tuple(unbyte(x) for x in header["dimensionUnits"])
+
+    coords = {}
+    for index, (dim, unit) in enumerate(zip(dims, units)):
+        crange = header["dimensionRanges"][f"dimension{index}"]
+        step = unpack_scalar_h5_dataset(crange["unitScale"])
+
+        # special case for time.
+        if dim == "time":
+            step = dc.to_timedelta64(step)
+            t1 = dc.to_datetime64(unpack_scalar_h5_dataset(header["time"]))
+            start = t1 + unpack_scalar_h5_dataset(crange["min"]) * step
+            stop = t1 + (unpack_scalar_h5_dataset(crange["max"]) + 1) * step
+        else:
+            # The min/max values appear to be int ranges so we need to
+            # multiply by step.
+            start = unpack_scalar_h5_dataset(crange["min"]) * step
+            stop = (unpack_scalar_h5_dataset(crange["max"]) + 1) * step
+
+        coords[dim] = get_coord(min=start, max=stop, step=step, units=unit)
+    return dascore.core.get_coord_manager(coords=coords, dims=dims)
+
+
+def _get_attr_dict(header):
+    """Map header info to DAS attrs."""
+    attr_map = {
+        "gaugeLength": "gauge_length",
+        "unit": "data_units",
+        "instrument": "intrument_id",
+        "experiment": "acquisition_id",
+    }
+    out = {"data_category": "DAS"}
+    for head_name, attr_name in attr_map.items():
+        value = header[head_name]
+        if hasattr(value, "shape"):
+            value = unpack_scalar_h5_dataset(value)
+        out[attr_name] = unbyte(value)
+    return out
+
+
+def _get_opto_das_attrs(fi) -> dict:
+    """Scan a OptoDAS file, return metadata."""
+    header = fi["header"]
+    cm = _get_coord_manager(header)
+    attrs = _get_attr_dict(header)
+    attrs["coords"] = cm
+    return attrs
+
+
+def _read_opto_das(fi, distance=None, time=None, attr_cls=dc.PatchAttrs):
+    """Read the OptoDAS values into a patch."""
+    attrs = _get_opto_das_attrs(fi)
+    data_node = fi["data"]
+    coords = attrs.pop("coords")
+    cm, data = coords.select(array=data_node, distance=distance, time=time)
+    attrs["coords"] = cm.to_summary_dict()
+    attrs["dims"] = cm.dims
+    return [dc.Patch(data=data, coords=cm, attrs=attr_cls(**attrs))]
diff --git a/dascore/io/prodml/core.py b/dascore/io/prodml/core.py
@@ -31,12 +31,12 @@ class ProdMLV2_0(FiberIO):  # noqa
 
     def get_format(self, resource: PyTablesReader) -> tuple[str, str] | bool:
         """
-        Return True if file contains terra15 version 2 data else False.
+        Return True if file contains prodML version 2 data else False.
 
         Parameters
         ----------
         resource
-            A path to the file which may contain terra15 data.
+            A path to the file which may contain prodML data.
         """
         version_str = _get_prodml_version_str(resource)
         if version_str:

diff --git a/dascore/io/prodml/utils.py b/dascore/io/prodml/utils.py
@@ -1,4 +1,4 @@
-"""Utilities for terra15."""
+"""Utilities for prodML."""
 from __future__ import annotations
 
 import dascore as dc
@@ -12,7 +12,7 @@
 
 def _get_prodml_version_str(hdf_fi) -> str:
     """Return the version string for prodml file."""
-    # define a few root attrs that act as a "fingerprint" for terra15 files
+    # define a few root attrs that act as a "fingerprint" for prodML files
 
     acquisition = getattr(hdf_fi.root, "Acquisition", None)
     if acquisition is None:

diff --git a/dascore/utils/hdf5.py b/dascore/utils/hdf5.py
@@ -456,3 +456,15 @@ class H5Writer(H5Reader):
 # used in new code.
 HDF5Writer = PyTablesWriter
 HDF5Reader = PyTablesReader
+
+
+def unpack_scalar_h5_dataset(dataset):
+    """
+    Unpack a scalar H5Py dataset.
+    """
+    assert dataset.size == 1
+    # This gets weird because datasets can be of shape () or (1,).
+    value = dataset[()]
+    if isinstance(value, np.ndarray):
+        value = value[0]
+    return value
diff --git a/pyproject.toml b/pyproject.toml
@@ -103,6 +103,7 @@ dev = ["dascore[test]", "dascore[docs]", "dascore[profile]", "dascore[extras]"]
 DASDAE__V1 = "dascore.io.dasdae.core:DASDAEV1"
 DASHDF5__V1 = "dascore.io.dashdf5.core:DASHDF5"
 H5SIMPLE__V1_0 = "dascore.io.h5simple.core:H5Simple"
+OPTODAS__V8 = "dascore.io.optodas.core:OptoDASV8"
 PICKLE = "dascore.io.pickle.core:PickleIO"
 PRODML__V2_0 = "dascore.io.prodml.core:ProdMLV2_0"
 PRODML__V2_1 = "dascore.io.prodml.core:ProdMLV2_1"

diff --git a/tests/test_io/test_common_io.py b/tests/test_io/test_common_io.py
@@ -24,6 +24,7 @@
 from dascore.io.dasdae import DASDAEV1
 from dascore.io.dashdf5 import DASHDF5
 from dascore.io.h5simple import H5Simple
+from dascore.io.optodas import OptoDASV8
 from dascore.io.pickle import PickleIO
 from dascore.io.prodml import ProdMLV2_0, ProdMLV2_1
 from dascore.io.segy import SegyV2
@@ -47,6 +48,7 @@
 # See the docs on adding a new IO format, in the contributing section,
 # for more details.
 COMMON_IO_READ_TESTS = {
+    OptoDASV8(): ("opto_das_1.hdf5",),
     DASDAEV1(): ("example_dasdae_event_1.h5",),
     H5Simple(): ("h5_simple_2.h5", "h5_simple_1.h5"),
     ProdMLV2_0(): ("prodml_2.0.h5", "opta_sense_quantx_v2.h5"),
@@ -268,7 +270,7 @@ def test_slice_single_dim_both_ends(self, io_path_tuple):
             stop = getattr(attrs_init, f"{dim}_max")
             duration = stop - start
             # first test double ended query
-            trim_tuple = (start + duration / 10, start + 2 * duration // 10)
+            trim_tuple = (start + duration / 10, start + 2 * duration / 10)
             spool = io.read(path, **{dim: trim_tuple})
             assert len(spool) == 1
             patch = spool[0]