Skip to content

Commit

Permalink
Neubrex dts dss support (#411)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers authored Jul 20, 2024
1 parent 6610b4c commit 2f179ce
Show file tree
Hide file tree
Showing 10 changed files with 190 additions and 4 deletions.
2 changes: 2 additions & 0 deletions dascore/data_registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ febus_1.h5 73eba2b6e183b3bca51f8a1448c3b423c979d05ce6c18bfd7fb76b4f9bda5c0b http
ap_sensing_1.hdf5 322429f2c44bed5dc72fb9a02f79bb0d3cb71048e93d906d3d24b0605b431b12 https://github.com/dasdae/test_data/raw/master/das/ap_sensing_1.hdf5
silixa_h5_1.hdf5 d3f1b92b17ae2d00f900426e80d48964fb5a33b9480ef9805721ac756acd4a21 https://github.com/dasdae/test_data/raw/master/das/silixa_h5_1.hdf5
deformation_rate_event_1.hdf5 be8574ae523de9b17d2a0f9e847f30301e1607e2076366e005cdb3a46b79f172 https://github.com/dasdae/test_data/raw/master/das/deformation_rate_event_1.hdf5
neubrex_dss_forge.h5 49e501e16d880b22c5d9d8997223f0c1aceb942386efb09aae938b9d97eb51ed https://github.com/dasdae/test_data/raw/master/dss/neubrex_dss_forge.h5
neubrex_dts_forge.h5 940f7bea6dd4c8a1340b4936b8eb7f9edc577cbcaf77c1f5ac295890f88c9ba5 https://github.com/dasdae/test_data/raw/master/dts/neubrex_dts_forge.h5
22 changes: 22 additions & 0 deletions dascore/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,28 @@ def deformation_rate_event_1():
return dc.spool(path)[0]


@register_func(EXAMPLE_PATCHES, key="forge_dss")
def forge_dss():
"""
A DSS file from the Forge dataset collected by Neubrex.
https://gdr.openei.org/submissions/1565
"""
path = fetch("neubrex_dss_forge.h5")
return dc.spool(path)[0]


@register_func(EXAMPLE_PATCHES, key="forge_dts")
def forge_dts():
"""
A DTS file from the Forge dataset collected by Neubrex.
https://gdr.openei.org/submissions/1565
"""
path = fetch("neubrex_dts_forge.h5")
return dc.spool(path)[0]


@register_func(EXAMPLE_PATCHES, key="ricker_moveout")
def ricker_moveout(
frequency=15,
Expand Down
16 changes: 16 additions & 0 deletions dascore/io/neubrex/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Support for Neubrex H5 DSS/DTS files.
This module was written to read the DSS/DTS files created by Neubrex for
the Forge dataset: https://gdr.openei.org/submissions/1565
The citation for the dataset is:
Energy and Geoscience Institute at the University of Utah. (2023).
Utah FORGE: Well 16B(78)-32 2023 Neubrex Energy Services Circulation
Test Period with Fiber Optics Monitoring [data set].
Retrieved from https://dx.doi.org/10.15121/2222469.
"""
from __future__ import annotations

from .core import NeubrexV1
65 changes: 65 additions & 0 deletions dascore/io/neubrex/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""
Core modules for reading Neubrex (Forge) dss/dts data.
"""

from __future__ import annotations

import dascore as dc
from dascore.constants import SpoolType
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader

from .utils import _get_attrs_coords_and_data, _is_neubrex, _maybe_trim_data


class NeubrexPatchAttrs(dc.PatchAttrs):
"""Patch attrs for Neubrex files."""

api: str | None = None
filed_name: str = ""
well_id: str = ""
well_name: str = ""
well_bore_id: str = ""


class NeubrexV1(FiberIO):
"""Support for bare-bones h5 format."""

name = "Neubrex"
preferred_extensions = ("hdf5", "h5")
version = "1"

def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
"""Determine if is simple h5 format."""
if _is_neubrex(resource):
return self.name, self.version
return False

def read(self, resource: H5Reader, snap=True, **kwargs) -> SpoolType:
"""
Read a simple h5 file.
Parameters
----------
resource
The open h5 object.
snap
If True, snap each coordinate to be evenly sampled.
**kwargs
Passed to filtering coordinates.
"""
attr_dict, cm, data = _get_attrs_coords_and_data(resource, snap)
if kwargs:
cm, data = _maybe_trim_data(cm, data, **kwargs)
attrs = NeubrexPatchAttrs(**attr_dict)
patch = dc.Patch(coords=cm, data=data[:], attrs=attrs)
return dc.spool([patch])

def scan(self, resource: H5Reader, snap=True, **kwargs) -> list[dc.PatchAttrs]:
"""Get the attributes of a h5simple file."""
attrs, cm, data = _get_attrs_coords_and_data(resource, snap)
attrs["coords"] = cm.to_summary_dict()
attrs["path"] = resource.filename
attrs["file_format"] = self.name
attrs["file_version"] = self.version
return [dc.PatchAttrs(**attrs)]
77 changes: 77 additions & 0 deletions dascore/io/neubrex/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Utilities functions for Neubrex IO support"""

import dascore as dc
from dascore.utils.misc import maybe_get_items


def _is_neubrex(h5fi):
"""Determine if the file is of Neubrex origin."""
expected_keys = {"data", "depth", "stamps"}
keys = set(h5fi.keys())
if not expected_keys.issubset(keys):
return False
expected_attrs = {"DataUnitLabel", "StartDateTime", "EndDateTime"}
data_attrs = set(h5fi["data"].attrs)
if expected_attrs.issubset(data_attrs):
return True


def _get_coord_manager(h5fi, snap=True):
"""Get a coordinate manager from the file."""

def _get_time_coord(h5fi, snap):
"""Get the time coordinate."""
# Unix stamps are in us for test files, not sure if always true.
unix_stamps = dc.to_datetime64(h5fi["stamps_unix"][:] / 1_000_000)
time_coord = dc.get_coord(values=unix_stamps)
if snap:
time_coord = time_coord.snap()
return time_coord

def _get_dist_coord(h5fi):
"""Get the distance (depth) coordinate."""
depth = h5fi["depth"][:]
return dc.get_coord(values=depth)

coords = {
"time": _get_time_coord(h5fi, snap=snap),
"distance": _get_dist_coord(h5fi),
}
return dc.get_coord_manager(coords=coords, dims=("time", "distance"))


def _get_data_units_and_type(data_unit_label):
"""Get the units from contained string."""
quantity = dc.get_quantity(data_unit_label.replace("-", ""))
return quantity


def _get_attr_dict(h5fi):
"""Get a dict of neubrex attributes."""
mapping = {
"API": "api",
# "DataUnitLabel": "data_unit_label",
"FieldName": "field_name",
"WellID": "well_id",
"WellName": "well_name",
"WellBoreID": "well_bore_id",
}
data_attrs = dict(h5fi["data"].attrs)
out = maybe_get_items(data_attrs, mapping)
out["data_units"] = _get_data_units_and_type(data_attrs["DataUnitLabel"])
return out


def _maybe_trim_data(cm, data, time=None, distance=None, **kwargs):
"""Maybe trim the data."""
if time is not None or distance is not None:
cm, data = cm.select(time=time, distance=distance, array=data)
return cm, data


def _get_attrs_coords_and_data(h5fi, snap=True):
"""Return the attributes, coordinates, and data array."""
cm = _get_coord_manager(h5fi, snap)
attrs = _get_attr_dict(h5fi)
data = h5fi["data"]
return attrs, cm, data
2 changes: 1 addition & 1 deletion dascore/io/optodas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _get_attr_dict(header):
attr_map = {
"gaugeLength": "gauge_length",
"unit": "data_units",
"instrument": "intrument_id",
"instrument": "instrument_id",
"experiment": "acquisition_id",
}
out = {"data_category": "DAS"}
Expand Down
2 changes: 1 addition & 1 deletion dascore/io/prodml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _get_prodml_attrs(fi, extras=None) -> list[dict]:

def _get_dims(node):
"""Get the dimension names in the form of a tuple."""
# we use distance rather than locus, setup mapping to relect this.
# we use distance rather than locus, setup mapping to rename this.
map_ = {"locus": "distance", "Locus": "distance", "Time": "time"}
data_attrs = node["RawData"].attrs
dims = unbyte(data_attrs.get("Dimensions", "time, distance"))
Expand Down
5 changes: 3 additions & 2 deletions dascore/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ def get_registry():
ureg.define("RADIANS=radians")
ureg.define("Radians=radians")
ureg.define("Radian=radians")

# define strain
ureg.define("strain=[]=ϵ")
# allow multiplication with offset units.
ureg.autoconvert_offset_to_baseunit = True
# set shortest display for units.
# set the shortest display for units.
ureg.default_format = "~"
pint.set_application_registry(ureg)
return ureg
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ DASHDF5__V1 = "dascore.io.dashdf5.core:DASHDF5"
H5SIMPLE__V1_0 = "dascore.io.h5simple.core:H5Simple"
FEBUS__V1 = "dascore.io.febus.core:Febus1"
FEBUS__V2 = "dascore.io.febus.core:Febus2"
NEUBREX__V1 = "dascore.io.neubrex:NeubrexV1"
OPTODAS__V8 = "dascore.io.optodas.core:OptoDASV8"
PICKLE = "dascore.io.pickle.core:PickleIO"
PRODML__V2_0 = "dascore.io.prodml.core:ProdMLV2_0"
Expand Down
2 changes: 2 additions & 0 deletions tests/test_io/test_common_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from dascore.io.dashdf5 import DASHDF5
from dascore.io.febus import Febus2
from dascore.io.h5simple import H5Simple
from dascore.io.neubrex import NeubrexV1
from dascore.io.optodas import OptoDASV8
from dascore.io.pickle import PickleIO
from dascore.io.prodml import ProdMLV2_0, ProdMLV2_1
Expand All @@ -52,6 +53,7 @@
# See the docs on adding a new IO format, in the contributing section,
# for more details.
COMMON_IO_READ_TESTS = {
NeubrexV1(): ("neubrex_dss_forge.h5", "neubrex_dts_forge.h5"),
SilixaH5V1(): ("silixa_h5_1.hdf5",),
ProdMLV2_0(): ("prodml_2.0.h5", "opta_sense_quantx_v2.h5"),
ProdMLV2_1(): (
Expand Down

0 comments on commit 2f179ce

Please sign in to comment.