From 2f179cec9bd1607ac9cdc819b969b75fd8aa7a10 Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Fri, 19 Jul 2024 18:03:18 -0700 Subject: [PATCH] Neubrex dts dss support (#411) --- dascore/data_registry.txt | 2 + dascore/examples.py | 22 ++++++++++ dascore/io/neubrex/__init__.py | 16 +++++++ dascore/io/neubrex/core.py | 65 ++++++++++++++++++++++++++++ dascore/io/neubrex/utils.py | 77 +++++++++++++++++++++++++++++++++ dascore/io/optodas/utils.py | 2 +- dascore/io/prodml/utils.py | 2 +- dascore/units.py | 5 ++- pyproject.toml | 1 + tests/test_io/test_common_io.py | 2 + 10 files changed, 190 insertions(+), 4 deletions(-) create mode 100644 dascore/io/neubrex/__init__.py create mode 100644 dascore/io/neubrex/core.py create mode 100644 dascore/io/neubrex/utils.py diff --git a/dascore/data_registry.txt b/dascore/data_registry.txt index 8d4bd0a2..265656e3 100644 --- a/dascore/data_registry.txt +++ b/dascore/data_registry.txt @@ -23,3 +23,5 @@ febus_1.h5 73eba2b6e183b3bca51f8a1448c3b423c979d05ce6c18bfd7fb76b4f9bda5c0b http ap_sensing_1.hdf5 322429f2c44bed5dc72fb9a02f79bb0d3cb71048e93d906d3d24b0605b431b12 https://github.com/dasdae/test_data/raw/master/das/ap_sensing_1.hdf5 silixa_h5_1.hdf5 d3f1b92b17ae2d00f900426e80d48964fb5a33b9480ef9805721ac756acd4a21 https://github.com/dasdae/test_data/raw/master/das/silixa_h5_1.hdf5 deformation_rate_event_1.hdf5 be8574ae523de9b17d2a0f9e847f30301e1607e2076366e005cdb3a46b79f172 https://github.com/dasdae/test_data/raw/master/das/deformation_rate_event_1.hdf5 +neubrex_dss_forge.h5 49e501e16d880b22c5d9d8997223f0c1aceb942386efb09aae938b9d97eb51ed https://github.com/dasdae/test_data/raw/master/dss/neubrex_dss_forge.h5 +neubrex_dts_forge.h5 940f7bea6dd4c8a1340b4936b8eb7f9edc577cbcaf77c1f5ac295890f88c9ba5 https://github.com/dasdae/test_data/raw/master/dts/neubrex_dts_forge.h5 diff --git a/dascore/examples.py b/dascore/examples.py index 7b50f6e8..f8807130 100644 --- a/dascore/examples.py +++ b/dascore/examples.py @@ -249,6 +249,28 @@ def deformation_rate_event_1(): return dc.spool(path)[0] +@register_func(EXAMPLE_PATCHES, key="forge_dss") +def forge_dss(): + """ + A DSS file from the Forge dataset collected by Neubrex. + + https://gdr.openei.org/submissions/1565 + """ + path = fetch("neubrex_dss_forge.h5") + return dc.spool(path)[0] + + +@register_func(EXAMPLE_PATCHES, key="forge_dts") +def forge_dts(): + """ + A DTS file from the Forge dataset collected by Neubrex. + + https://gdr.openei.org/submissions/1565 + """ + path = fetch("neubrex_dts_forge.h5") + return dc.spool(path)[0] + + @register_func(EXAMPLE_PATCHES, key="ricker_moveout") def ricker_moveout( frequency=15, diff --git a/dascore/io/neubrex/__init__.py b/dascore/io/neubrex/__init__.py new file mode 100644 index 00000000..ee26e5fd --- /dev/null +++ b/dascore/io/neubrex/__init__.py @@ -0,0 +1,16 @@ +""" +Support for Neubrex H5 DSS/DTS files. + +This module was written to read the DSS/DTS files created by Neubrex for +the Forge dataset: https://gdr.openei.org/submissions/1565 + +The citation for the dataset is: + +Energy and Geoscience Institute at the University of Utah. (2023). +Utah FORGE: Well 16B(78)-32 2023 Neubrex Energy Services Circulation +Test Period with Fiber Optics Monitoring [data set]. +Retrieved from https://dx.doi.org/10.15121/2222469. +""" +from __future__ import annotations + +from .core import NeubrexV1 diff --git a/dascore/io/neubrex/core.py b/dascore/io/neubrex/core.py new file mode 100644 index 00000000..2e4f160f --- /dev/null +++ b/dascore/io/neubrex/core.py @@ -0,0 +1,65 @@ +""" +Core modules for reading Neubrex (Forge) dss/dts data. +""" + +from __future__ import annotations + +import dascore as dc +from dascore.constants import SpoolType +from dascore.io import FiberIO +from dascore.utils.hdf5 import H5Reader + +from .utils import _get_attrs_coords_and_data, _is_neubrex, _maybe_trim_data + + +class NeubrexPatchAttrs(dc.PatchAttrs): + """Patch attrs for Neubrex files.""" + + api: str | None = None + filed_name: str = "" + well_id: str = "" + well_name: str = "" + well_bore_id: str = "" + + +class NeubrexV1(FiberIO): + """Support for bare-bones h5 format.""" + + name = "Neubrex" + preferred_extensions = ("hdf5", "h5") + version = "1" + + def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool: + """Determine if is simple h5 format.""" + if _is_neubrex(resource): + return self.name, self.version + return False + + def read(self, resource: H5Reader, snap=True, **kwargs) -> SpoolType: + """ + Read a simple h5 file. + + Parameters + ---------- + resource + The open h5 object. + snap + If True, snap each coordinate to be evenly sampled. + **kwargs + Passed to filtering coordinates. + """ + attr_dict, cm, data = _get_attrs_coords_and_data(resource, snap) + if kwargs: + cm, data = _maybe_trim_data(cm, data, **kwargs) + attrs = NeubrexPatchAttrs(**attr_dict) + patch = dc.Patch(coords=cm, data=data[:], attrs=attrs) + return dc.spool([patch]) + + def scan(self, resource: H5Reader, snap=True, **kwargs) -> list[dc.PatchAttrs]: + """Get the attributes of a h5simple file.""" + attrs, cm, data = _get_attrs_coords_and_data(resource, snap) + attrs["coords"] = cm.to_summary_dict() + attrs["path"] = resource.filename + attrs["file_format"] = self.name + attrs["file_version"] = self.version + return [dc.PatchAttrs(**attrs)] diff --git a/dascore/io/neubrex/utils.py b/dascore/io/neubrex/utils.py new file mode 100644 index 00000000..9fcb37be --- /dev/null +++ b/dascore/io/neubrex/utils.py @@ -0,0 +1,77 @@ +"""Utilities functions for Neubrex IO support""" + +import dascore as dc +from dascore.utils.misc import maybe_get_items + + +def _is_neubrex(h5fi): + """Determine if the file is of Neubrex origin.""" + expected_keys = {"data", "depth", "stamps"} + keys = set(h5fi.keys()) + if not expected_keys.issubset(keys): + return False + expected_attrs = {"DataUnitLabel", "StartDateTime", "EndDateTime"} + data_attrs = set(h5fi["data"].attrs) + if expected_attrs.issubset(data_attrs): + return True + + +def _get_coord_manager(h5fi, snap=True): + """Get a coordinate manager from the file.""" + + def _get_time_coord(h5fi, snap): + """Get the time coordinate.""" + # Unix stamps are in us for test files, not sure if always true. + unix_stamps = dc.to_datetime64(h5fi["stamps_unix"][:] / 1_000_000) + time_coord = dc.get_coord(values=unix_stamps) + if snap: + time_coord = time_coord.snap() + return time_coord + + def _get_dist_coord(h5fi): + """Get the distance (depth) coordinate.""" + depth = h5fi["depth"][:] + return dc.get_coord(values=depth) + + coords = { + "time": _get_time_coord(h5fi, snap=snap), + "distance": _get_dist_coord(h5fi), + } + return dc.get_coord_manager(coords=coords, dims=("time", "distance")) + + +def _get_data_units_and_type(data_unit_label): + """Get the units from contained string.""" + quantity = dc.get_quantity(data_unit_label.replace("-", "")) + return quantity + + +def _get_attr_dict(h5fi): + """Get a dict of neubrex attributes.""" + mapping = { + "API": "api", + # "DataUnitLabel": "data_unit_label", + "FieldName": "field_name", + "WellID": "well_id", + "WellName": "well_name", + "WellBoreID": "well_bore_id", + } + data_attrs = dict(h5fi["data"].attrs) + out = maybe_get_items(data_attrs, mapping) + out["data_units"] = _get_data_units_and_type(data_attrs["DataUnitLabel"]) + return out + + +def _maybe_trim_data(cm, data, time=None, distance=None, **kwargs): + """Maybe trim the data.""" + if time is not None or distance is not None: + cm, data = cm.select(time=time, distance=distance, array=data) + return cm, data + + +def _get_attrs_coords_and_data(h5fi, snap=True): + """Return the attributes, coordinates, and data array.""" + cm = _get_coord_manager(h5fi, snap) + attrs = _get_attr_dict(h5fi) + data = h5fi["data"] + return attrs, cm, data diff --git a/dascore/io/optodas/utils.py b/dascore/io/optodas/utils.py index 83d7b0db..b000d18b 100644 --- a/dascore/io/optodas/utils.py +++ b/dascore/io/optodas/utils.py @@ -58,7 +58,7 @@ def _get_attr_dict(header): attr_map = { "gaugeLength": "gauge_length", "unit": "data_units", - "instrument": "intrument_id", + "instrument": "instrument_id", "experiment": "acquisition_id", } out = {"data_category": "DAS"} diff --git a/dascore/io/prodml/utils.py b/dascore/io/prodml/utils.py index d7b3cb62..b6f81c87 100644 --- a/dascore/io/prodml/utils.py +++ b/dascore/io/prodml/utils.py @@ -120,7 +120,7 @@ def _get_prodml_attrs(fi, extras=None) -> list[dict]: def _get_dims(node): """Get the dimension names in the form of a tuple.""" - # we use distance rather than locus, setup mapping to relect this. + # we use distance rather than locus, setup mapping to rename this. map_ = {"locus": "distance", "Locus": "distance", "Time": "time"} data_attrs = node["RawData"].attrs dims = unbyte(data_attrs.get("Dimensions", "time, distance")) diff --git a/dascore/units.py b/dascore/units.py index 133532c6..e0773002 100644 --- a/dascore/units.py +++ b/dascore/units.py @@ -28,10 +28,11 @@ def get_registry(): ureg.define("RADIANS=radians") ureg.define("Radians=radians") ureg.define("Radian=radians") - + # define strain + ureg.define("strain=[]=ϵ") # allow multiplication with offset units. ureg.autoconvert_offset_to_baseunit = True - # set shortest display for units. + # set the shortest display for units. ureg.default_format = "~" pint.set_application_registry(ureg) return ureg diff --git a/pyproject.toml b/pyproject.toml index eb53f7da..c174c603 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,6 +106,7 @@ DASHDF5__V1 = "dascore.io.dashdf5.core:DASHDF5" H5SIMPLE__V1_0 = "dascore.io.h5simple.core:H5Simple" FEBUS__V1 = "dascore.io.febus.core:Febus1" FEBUS__V2 = "dascore.io.febus.core:Febus2" +NEUBREX__V1 = "dascore.io.neubrex:NeubrexV1" OPTODAS__V8 = "dascore.io.optodas.core:OptoDASV8" PICKLE = "dascore.io.pickle.core:PickleIO" PRODML__V2_0 = "dascore.io.prodml.core:ProdMLV2_0" diff --git a/tests/test_io/test_common_io.py b/tests/test_io/test_common_io.py index 4d2d99be..a71c822f 100644 --- a/tests/test_io/test_common_io.py +++ b/tests/test_io/test_common_io.py @@ -27,6 +27,7 @@ from dascore.io.dashdf5 import DASHDF5 from dascore.io.febus import Febus2 from dascore.io.h5simple import H5Simple +from dascore.io.neubrex import NeubrexV1 from dascore.io.optodas import OptoDASV8 from dascore.io.pickle import PickleIO from dascore.io.prodml import ProdMLV2_0, ProdMLV2_1 @@ -52,6 +53,7 @@ # See the docs on adding a new IO format, in the contributing section, # for more details. COMMON_IO_READ_TESTS = { + NeubrexV1(): ("neubrex_dss_forge.h5", "neubrex_dts_forge.h5"), SilixaH5V1(): ("silixa_h5_1.hdf5",), ProdMLV2_0(): ("prodml_2.0.h5", "opta_sense_quantx_v2.h5"), ProdMLV2_1(): (