Skip to content

Commit

Permalink
add OptoDAS support (#347)
Browse files Browse the repository at this point in the history
* add optoDAS support

* remove dead code, add entry point

* add data category as DAS
  • Loading branch information
d-chambers authored Mar 15, 2024
1 parent 1d49530 commit 38518fb
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 5 deletions.
1 change: 1 addition & 0 deletions dascore/data_registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ brady_hs_DAS_DTS_coords.csv b2e766136aac6516ddbb757d7dc26a8df0d5de48af03c8be769c
dispersion_event.h5 598c8baa2a5610c930e1c003f2ba02da13f8d8686e3ccf2a034e94bfc5e1990c https://github.com/dasdae/test_data/raw/master/das/dispersion_event.h5
PoroTomo_iDAS_1.h5 967a2885e79937ac0426b2022a9c03d5f24790ecf3abbaa9a16eb28055566fc6 https://github.com/dasdae/test_data/raw/master/das/PoroTomo_iDAS_1.h5
DASDMSShot00_20230328155653619.das 12ac53f78b32d8b0e32cc674c43ff5b4c79a6c8b19de2ad577fd481679b2b7b3 https://github.com/dasdae/test_data/raw/master/das/DASDMSShot00_20230328155653619.das
opto_das_1.hdf5 0437d1f02d93c9f00d31133388efaf6a28c21883bcfac457b97f1224464c7dca https://github.com/dasdae/test_data/raw/master/das/opto_das_1.hdf5
whale_1.hdf5 a09922969e740307bf26dc6ffa7fb9fbb834dc7cd7d4ced02c66b159fb1ce0cd http://piweb.ooirsn.uw.edu/das/data/Optasense/NorthCable/TransmitFiber/North-C1-LR-P1kHz-GL50m-Sp2m-FS200Hz_2021-11-03T15_06_51-0700/North-C1-LR-P1kHz-GL50m-Sp2m-FS200Hz_2021-11-04T020002Z.h5
9 changes: 9 additions & 0 deletions dascore/io/optodas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
Support for OptoDAS format.
This is used by the OptoDAS interrogator made by Alcatel Submarine Networks.
More info here: https://web.asn.com/
"""
from __future__ import annotations
from .core import OptoDASV8
66 changes: 66 additions & 0 deletions dascore/io/optodas/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""IO module for reading OptoDAS data."""
from __future__ import annotations

import numpy as np

import dascore as dc
from dascore.constants import opt_timeable_types
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader
from dascore.utils.models import UnitQuantity, UTF8Str

from .utils import _get_opto_das_attrs, _get_opto_das_version_str, _read_opto_das


class OptoDASPatchAttrs(dc.PatchAttrs):
"""Patch attrs for OptoDAS."""

gauge_length: float = np.NaN
gauge_length_units: UnitQuantity | None = None
schema_version: UTF8Str = ""


class OptoDASV8(FiberIO):
"""Support for OptoDAS V 8."""

name = "OptoDAS"
preferred_extensions = ("hdf5", "h5")
version = "8"

def get_format(self, resource: H5Reader) -> tuple[str, str] | bool:
"""
Return True if file contains OptoDAS version 8 data else False.
Parameters
----------
resource
A path to the file which may contain terra15 data.
"""
version_str = _get_opto_das_version_str(resource)
if version_str:
return self.name, version_str

def scan(self, resource: H5Reader) -> list[dc.PatchAttrs]:
"""Scan a OptoDAS file, return summary information about the file's contents."""
file_version = _get_opto_das_version_str(resource)
extras = {
"path": resource.filename,
"file_format": self.name,
"file_version": str(file_version),
}
attrs = _get_opto_das_attrs(resource)
attrs.update(extras)
return [OptoDASPatchAttrs(**attrs)]

def read(
self,
resource: H5Reader,
time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
distance: tuple[float | None, float | None] | None = None,
**kwargs,
) -> dc.BaseSpool:
"""Read a OptoDAS spool of patches."""
patches = _read_opto_das(
resource, time=time, distance=distance, attr_cls=OptoDASPatchAttrs
)
return dc.spool(patches)
89 changes: 89 additions & 0 deletions dascore/io/optodas/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Utilities for terra15."""
from __future__ import annotations

import dascore as dc
import dascore.core
from dascore.core.coords import get_coord
from dascore.utils.hdf5 import unpack_scalar_h5_dataset
from dascore.utils.misc import unbyte

# --- Getting format/version


def _get_opto_das_version_str(hdf_fi) -> str:
"""Return the version string for OptoDAS file."""
# define a few root attrs that act as a "fingerprint"
expected_attrs = (
"acqSpec",
"header",
"cableSpec",
"data",
"fileVersion",
)
if not all([x in hdf_fi for x in expected_attrs]):
return ""
version_str = str(unbyte(hdf_fi["fileVersion"][()]))
return version_str


def _get_coord_manager(header):
"""Get the distance ranges and spacing."""
dims = tuple(unbyte(x) for x in header["dimensionNames"])
units = tuple(unbyte(x) for x in header["dimensionUnits"])

coords = {}
for index, (dim, unit) in enumerate(zip(dims, units)):
crange = header["dimensionRanges"][f"dimension{index}"]
step = unpack_scalar_h5_dataset(crange["unitScale"])

# special case for time.
if dim == "time":
step = dc.to_timedelta64(step)
t1 = dc.to_datetime64(unpack_scalar_h5_dataset(header["time"]))
start = t1 + unpack_scalar_h5_dataset(crange["min"]) * step
stop = t1 + (unpack_scalar_h5_dataset(crange["max"]) + 1) * step
else:
# The min/max values appear to be int ranges so we need to
# multiply by step.
start = unpack_scalar_h5_dataset(crange["min"]) * step
stop = (unpack_scalar_h5_dataset(crange["max"]) + 1) * step

coords[dim] = get_coord(min=start, max=stop, step=step, units=unit)
return dascore.core.get_coord_manager(coords=coords, dims=dims)


def _get_attr_dict(header):
"""Map header info to DAS attrs."""
attr_map = {
"gaugeLength": "gauge_length",
"unit": "data_units",
"instrument": "intrument_id",
"experiment": "acquisition_id",
}
out = {"data_category": "DAS"}
for head_name, attr_name in attr_map.items():
value = header[head_name]
if hasattr(value, "shape"):
value = unpack_scalar_h5_dataset(value)
out[attr_name] = unbyte(value)
return out


def _get_opto_das_attrs(fi) -> dict:
"""Scan a OptoDAS file, return metadata."""
header = fi["header"]
cm = _get_coord_manager(header)
attrs = _get_attr_dict(header)
attrs["coords"] = cm
return attrs


def _read_opto_das(fi, distance=None, time=None, attr_cls=dc.PatchAttrs):
"""Read the OptoDAS values into a patch."""
attrs = _get_opto_das_attrs(fi)
data_node = fi["data"]
coords = attrs.pop("coords")
cm, data = coords.select(array=data_node, distance=distance, time=time)
attrs["coords"] = cm.to_summary_dict()
attrs["dims"] = cm.dims
return [dc.Patch(data=data, coords=cm, attrs=attr_cls(**attrs))]
4 changes: 2 additions & 2 deletions dascore/io/prodml/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ class ProdMLV2_0(FiberIO): # noqa

def get_format(self, resource: PyTablesReader) -> tuple[str, str] | bool:
"""
Return True if file contains terra15 version 2 data else False.
Return True if file contains prodML version 2 data else False.
Parameters
----------
resource
A path to the file which may contain terra15 data.
A path to the file which may contain prodML data.
"""
version_str = _get_prodml_version_str(resource)
if version_str:
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/prodml/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Utilities for terra15."""
"""Utilities for prodML."""
from __future__ import annotations

import dascore as dc
Expand All @@ -12,7 +12,7 @@

def _get_prodml_version_str(hdf_fi) -> str:
"""Return the version string for prodml file."""
# define a few root attrs that act as a "fingerprint" for terra15 files
# define a few root attrs that act as a "fingerprint" for prodML files

acquisition = getattr(hdf_fi.root, "Acquisition", None)
if acquisition is None:
Expand Down
12 changes: 12 additions & 0 deletions dascore/utils/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,15 @@ class H5Writer(H5Reader):
# used in new code.
HDF5Writer = PyTablesWriter
HDF5Reader = PyTablesReader


def unpack_scalar_h5_dataset(dataset):
"""
Unpack a scalar H5Py dataset.
"""
assert dataset.size == 1
# This gets weird because datasets can be of shape () or (1,).
value = dataset[()]
if isinstance(value, np.ndarray):
value = value[0]
return value
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ dev = ["dascore[test]", "dascore[docs]", "dascore[profile]", "dascore[extras]"]
DASDAE__V1 = "dascore.io.dasdae.core:DASDAEV1"
DASHDF5__V1 = "dascore.io.dashdf5.core:DASHDF5"
H5SIMPLE__V1_0 = "dascore.io.h5simple.core:H5Simple"
OPTODAS__V8 = "dascore.io.optodas.core:OptoDASV8"
PICKLE = "dascore.io.pickle.core:PickleIO"
PRODML__V2_0 = "dascore.io.prodml.core:ProdMLV2_0"
PRODML__V2_1 = "dascore.io.prodml.core:ProdMLV2_1"
Expand Down
4 changes: 3 additions & 1 deletion tests/test_io/test_common_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from dascore.io.dasdae import DASDAEV1
from dascore.io.dashdf5 import DASHDF5
from dascore.io.h5simple import H5Simple
from dascore.io.optodas import OptoDASV8
from dascore.io.pickle import PickleIO
from dascore.io.prodml import ProdMLV2_0, ProdMLV2_1
from dascore.io.segy import SegyV2
Expand All @@ -47,6 +48,7 @@
# See the docs on adding a new IO format, in the contributing section,
# for more details.
COMMON_IO_READ_TESTS = {
OptoDASV8(): ("opto_das_1.hdf5",),
DASDAEV1(): ("example_dasdae_event_1.h5",),
H5Simple(): ("h5_simple_2.h5", "h5_simple_1.h5"),
ProdMLV2_0(): ("prodml_2.0.h5", "opta_sense_quantx_v2.h5"),
Expand Down Expand Up @@ -268,7 +270,7 @@ def test_slice_single_dim_both_ends(self, io_path_tuple):
stop = getattr(attrs_init, f"{dim}_max")
duration = stop - start
# first test double ended query
trim_tuple = (start + duration / 10, start + 2 * duration // 10)
trim_tuple = (start + duration / 10, start + 2 * duration / 10)
spool = io.read(path, **{dim: trim_tuple})
assert len(spool) == 1
patch = spool[0]
Expand Down

0 comments on commit 38518fb

Please sign in to comment.