Skip to content

Commit

Permalink
add optoDAS support
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers committed Feb 29, 2024
1 parent 8968e7f commit 45e5e66
Show file tree
Hide file tree
Showing 8 changed files with 199 additions and 5 deletions.
1 change: 1 addition & 0 deletions dascore/data_registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ brady_hs_DAS_DTS_coords.csv b2e766136aac6516ddbb757d7dc26a8df0d5de48af03c8be769c
dispersion_event.h5 598c8baa2a5610c930e1c003f2ba02da13f8d8686e3ccf2a034e94bfc5e1990c https://github.com/dasdae/test_data/raw/master/das/dispersion_event.h5
PoroTomo_iDAS_1.h5 967a2885e79937ac0426b2022a9c03d5f24790ecf3abbaa9a16eb28055566fc6 https://github.com/dasdae/test_data/raw/master/das/PoroTomo_iDAS_1.h5
DASDMSShot00_20230328155653619.das 12ac53f78b32d8b0e32cc674c43ff5b4c79a6c8b19de2ad577fd481679b2b7b3 https://github.com/dasdae/test_data/raw/master/das/DASDMSShot00_20230328155653619.das
opto_das_1.hdf5 0437d1f02d93c9f00d31133388efaf6a28c21883bcfac457b97f1224464c7dca https://github.com/dasdae/test_data/raw/master/das/opto_das_1.hdf5
9 changes: 9 additions & 0 deletions dascore/io/optodas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
Support for OptoDAS format.
This is used by the OptoDAS interrogator made by Alcatel Submarine Networks.
More info here: https://web.asn.com/
"""
from __future__ import annotations
from .core import OptoDASV8
66 changes: 66 additions & 0 deletions dascore/io/optodas/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""IO module for reading OptoDAS data."""
from __future__ import annotations

import numpy as np

import dascore as dc
from dascore.constants import opt_timeable_types
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader
from dascore.utils.models import UnitQuantity, UTF8Str

from .utils import _get_opto_das_attrs, _get_opto_das_version_str, _read_opto_das


class OptoDASPatchAttrs(dc.PatchAttrs):
"""Patch attrs for OptoDAS."""

gauge_length: float = np.NaN
gauge_length_units: UnitQuantity | None = None
schema_version: UTF8Str = ""


class OptoDASV8(FiberIO):
"""Support for OptoDAS V 8."""

name = "OptoDAS"
preferred_extensions = ("hdf5", "h5")
version = "8"

def get_format(self, resource: H5Reader) -> tuple[str, str] | bool:
"""
Return True if file contains OptoDAS version 8 data else False.
Parameters
----------
resource
A path to the file which may contain terra15 data.
"""
version_str = _get_opto_das_version_str(resource)
if version_str:
return self.name, version_str

def scan(self, resource: H5Reader) -> list[dc.PatchAttrs]:
"""Scan a OptoDAS file, return summary information about the file's contents."""
file_version = _get_opto_das_version_str(resource)
extras = {
"path": resource.filename,
"file_format": self.name,
"file_version": str(file_version),
}
attrs = _get_opto_das_attrs(resource)
attrs.update(extras)
return [OptoDASPatchAttrs(**attrs)]

def read(
self,
resource: H5Reader,
time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
distance: tuple[float | None, float | None] | None = None,
**kwargs,
) -> dc.BaseSpool:
"""Read a OptoDAS spool of patches."""
patches = _read_opto_das(
resource, time=time, distance=distance, attr_cls=OptoDASPatchAttrs
)
return dc.spool(patches)
104 changes: 104 additions & 0 deletions dascore/io/optodas/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Utilities for terra15."""
from __future__ import annotations

import dascore as dc
import dascore.core
from dascore.constants import VALID_DATA_TYPES
from dascore.core.coords import get_coord
from dascore.utils.hdf5 import unpack_scalar_h5_dataset
from dascore.utils.misc import maybe_get_attrs, unbyte

# --- Getting format/version


def _get_opto_das_version_str(hdf_fi) -> str:
"""Return the version string for OptoDAS file."""
# define a few root attrs that act as a "fingerprint"
expected_attrs = (
"acqSpec",
"header",
"cableSpec",
"data",
"fileVersion",
)
if not all([x in hdf_fi for x in expected_attrs]):
return ""
version_str = str(unbyte(hdf_fi["fileVersion"][()]))
return version_str


def _get_coord_manager(header):
"""Get the distance ranges and spacing."""
dims = tuple(unbyte(x) for x in header["dimensionNames"])
units = tuple(unbyte(x) for x in header["dimensionUnits"])

coords = {}
for index, (dim, unit) in enumerate(zip(dims, units)):
crange = header["dimensionRanges"][f"dimension{index}"]
step = unpack_scalar_h5_dataset(crange["unitScale"])

# special case for time.
if dim == "time":
step = dc.to_timedelta64(step)
t1 = dc.to_datetime64(unpack_scalar_h5_dataset(header["time"]))
start = t1 + unpack_scalar_h5_dataset(crange["min"]) * step
stop = t1 + (unpack_scalar_h5_dataset(crange["max"]) + 1) * step
else:
# The min/max values appear to be int ranges so we need to
# multiply by step.
start = unpack_scalar_h5_dataset(crange["min"]) * step
stop = (unpack_scalar_h5_dataset(crange["max"]) + 1) * step

coords[dim] = get_coord(min=start, max=stop, step=step, units=unit)
return dascore.core.get_coord_manager(coords=coords, dims=dims)


def _get_data_unit_and_type(node):
"""Get the data type and units."""
attrs = node._v_attrs
attr_map = {

Check warning on line 59 in dascore/io/optodas/utils.py

View check run for this annotation

Codecov / codecov/patch

dascore/io/optodas/utils.py#L58-L59

Added lines #L58 - L59 were not covered by tests
"RawDescription": "data_type",
"RawDataUnit": "data_units",
}
out = maybe_get_attrs(attrs, attr_map)
if (data_type := out.get("data_type")) is not None:
clean = data_type.lower().replace(" ", "_")
out["data_type"] = clean if clean in VALID_DATA_TYPES else ""
return out

Check warning on line 67 in dascore/io/optodas/utils.py

View check run for this annotation

Codecov / codecov/patch

dascore/io/optodas/utils.py#L63-L67

Added lines #L63 - L67 were not covered by tests


def _get_attr_dict(header):
"""Map header info to DAS attrs."""
attr_map = {
"gaugeLength": "gauge_length",
"unit": "data_units",
"instrument": "intrument_id",
"experiment": "acquisition_id",
}
out = {}
for head_name, attr_name in attr_map.items():
value = header[head_name]
if hasattr(value, "shape"):
value = unpack_scalar_h5_dataset(value)
out[attr_name] = unbyte(value)
return out


def _get_opto_das_attrs(fi) -> dict:
"""Scan a OptoDAS file, return metadata."""
header = fi["header"]
cm = _get_coord_manager(header)
attrs = _get_attr_dict(header)
attrs["coords"] = cm
return attrs


def _read_opto_das(fi, distance=None, time=None, attr_cls=dc.PatchAttrs):
"""Read the OptoDAS values into a patch."""
attrs = _get_opto_das_attrs(fi)
data_node = fi["data"]
coords = attrs.pop("coords")
cm, data = coords.select(array=data_node, distance=distance, time=time)
attrs["coords"] = cm.to_summary_dict()
attrs["dims"] = cm.dims
return [dc.Patch(data=data, coords=cm, attrs=attr_cls(**attrs))]
4 changes: 2 additions & 2 deletions dascore/io/prodml/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ class ProdMLV2_0(FiberIO): # noqa

def get_format(self, resource: PyTablesReader) -> tuple[str, str] | bool:
"""
Return True if file contains terra15 version 2 data else False.
Return True if file contains prodML version 2 data else False.
Parameters
----------
resource
A path to the file which may contain terra15 data.
A path to the file which may contain prodML data.
"""
version_str = _get_prodml_version_str(resource)
if version_str:
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/prodml/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Utilities for terra15."""
"""Utilities for prodML."""
from __future__ import annotations

import dascore as dc
Expand All @@ -12,7 +12,7 @@

def _get_prodml_version_str(hdf_fi) -> str:
"""Return the version string for prodml file."""
# define a few root attrs that act as a "fingerprint" for terra15 files
# define a few root attrs that act as a "fingerprint" for prodML files

acquisition = getattr(hdf_fi.root, "Acquisition", None)
if acquisition is None:
Expand Down
12 changes: 12 additions & 0 deletions dascore/utils/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,15 @@ class H5Writer(H5Reader):
# used in new code.
HDF5Writer = PyTablesWriter
HDF5Reader = PyTablesReader


def unpack_scalar_h5_dataset(dataset):
"""
Unpack a scalar H5Py dataset.
"""
assert dataset.size == 1
# This gets weird because datasets can be of shape () or (1,).
value = dataset[()]
if isinstance(value, np.ndarray):
value = value[0]
return value
4 changes: 3 additions & 1 deletion tests/test_io/test_common_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from dascore.io.dasdae import DASDAEV1
from dascore.io.dashdf5 import DASHDF5
from dascore.io.h5simple import H5Simple
from dascore.io.optodas import OptoDASV8
from dascore.io.pickle import PickleIO
from dascore.io.prodml import ProdMLV2_0, ProdMLV2_1
from dascore.io.segy import SegyV2
Expand All @@ -47,6 +48,7 @@
# See the docs on adding a new IO format, in the contributing section,
# for more details.
COMMON_IO_READ_TESTS = {
OptoDASV8(): ("opto_das_1.hdf5",),
DASDAEV1(): ("example_dasdae_event_1.h5",),
H5Simple(): ("h5_simple_2.h5", "h5_simple_1.h5"),
ProdMLV2_0(): ("prodml_2.0.h5", "opta_sense_quantx_v2.h5"),
Expand Down Expand Up @@ -265,7 +267,7 @@ def test_slice_single_dim_both_ends(self, io_path_tuple):
stop = getattr(attrs_init, f"{dim}_max")
duration = stop - start
# first test double ended query
trim_tuple = (start + duration / 10, start + 2 * duration // 10)
trim_tuple = (start + duration / 10, start + 2 * duration / 10)
spool = io.read(path, **{dim: trim_tuple})
assert len(spool) == 1
patch = spool[0]
Expand Down

0 comments on commit 45e5e66

Please sign in to comment.