Skip to content

Commit

Permalink
continue refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers committed Jan 9, 2025
1 parent 186991a commit 23525c0
Show file tree
Hide file tree
Showing 15 changed files with 105 additions and 612 deletions.
2 changes: 1 addition & 1 deletion dascore/core/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class PatchAttrs(DascoreBaseModel):
"""

model_config = ConfigDict(
title="Patch Summary",
title="Patch Attrs",
extra="allow",
frozen=True,
arbitrary_types_allowed=True,
Expand Down
14 changes: 7 additions & 7 deletions dascore/core/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,16 +380,16 @@ def io(self) -> dc.io.PatchIO:

def to_summary(
self,
uri=None,
path=None,
resource_format=None,
resource_version=None,
) -> PatchSummary:
"""
Summarize the contents of the Patch.
"""
uri = uri if uri is not None else self.get_patch_name()
path = path if path is not None else self.get_patch_name()
psum = PatchSummary(
uri=uri,
uri=path,
coords=self.coords.to_summary(),
attrs=self.attrs,
data=ArraySummary.from_array(self.data),
Expand All @@ -404,9 +404,9 @@ class PatchSummary(DascoreBaseModel):
A class for summarizing the metadata of the Patch.
"""

uri: str
resource_format: str = ""
resource_version: str = ""
path: str
format: str = ""
version: str = ""

data: Annotated[ArraySummary, PlainValidator(ArraySummary.from_array)]

Expand All @@ -415,7 +415,7 @@ class PatchSummary(DascoreBaseModel):

def to_summary(
self,
uri=None,
path=None,
resource_format=None,
resource_version=None,
):
Expand Down
4 changes: 0 additions & 4 deletions dascore/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@
from dascore.io.core import FiberIO, read, scan, scan_to_df, write, PatchFileSummary
from dascore.utils.io import BinaryReader, BinaryWriter
from dascore.utils.hdf5 import (
HDF5Writer,
HDF5Reader,
PyTablesWriter,
PyTablesReader,
H5Reader,
H5Writer,
)
Expand Down
16 changes: 10 additions & 6 deletions dascore/io/ap_sensing/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader

from .utils import _get_patch, _get_version_string
from .utils import _get_attrs_dict, _get_coords, _get_patch, _get_version_string


class APSensingPatchAttrs(dc.PatchAttrs):
Expand Down Expand Up @@ -43,13 +43,17 @@ def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:

def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchSummary]:
"""Scan an AP sensing file, return summary info about the contents."""
attrs = _get_attrs_dict(resource)
coords = _get_coords(resource)
info = {
"uri": resource.filename,
"resource_format": self.name,
"resource_version": _get_version_string(resource),
"path": resource.filename,
"format": self.name,
"version": _get_version_string(resource),
"attrs": attrs,
"coords": coords,
"data": resource["DAS"],
}
patch = _get_patch(resource, load_data=False)
return [patch.to_summary(**info)]
return [dc.PatchSummary(**info)]

def read(
self,
Expand Down
3 changes: 0 additions & 3 deletions dascore/io/ap_sensing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ def _get_patch(
time=None,
distance=None,
attr_cls=dc.PatchAttrs,
load_data=True,
**kwargs,
):
"""Get a patch from ap_sensing file."""
Expand All @@ -100,7 +99,5 @@ def _get_patch(
data = resource["DAS"]
if time is not None or distance is not None:
coords, data = coords.select(array=data, time=time, distance=distance)
attrs["coords"] = coords
attrs = attr_cls.model_validate(attrs)
data = data[:] if load_data else data
return dc.Patch(data=data, coords=coords, attrs=attrs, **kwargs)
2 changes: 1 addition & 1 deletion dascore/io/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def scan(self, resource, **kwargs) -> list[dc.PatchSummary]:
new = pa.to_summary(
resource_format=self.name,
resource_version=self.version,
uri=str(resource),
path=str(resource),
)
out.append(new)
return out
Expand Down
2 changes: 1 addition & 1 deletion dascore/io/dasdae/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def read(self, resource: H5Reader, **kwargs) -> SpoolType:
patches.append(_read_patch(patch_group, **kwargs))
return dc.spool(patches)

def scan(self, resource: H5Reader, **kwargs):
def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchSummary]:
"""
Get the patch info from the file.
Expand Down
30 changes: 13 additions & 17 deletions dascore/io/dasdae/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,16 +197,13 @@ def _get_contents_from_patch_groups(h5, file_version, file_format="DASDAE"):
for name, group in h5[("/waveforms")].items():
contents = _get_patch_content_from_group(group)
# populate file info
contents["file_version"] = file_version
contents["file_format"] = file_format
contents["version"] = file_version
contents["format"] = file_format
contents["path"] = h5.filename
# suppressing warnings because old dasdae files will issue warning
# due to d_dim rather than dim_step. TODO fix test files in the future
with suppress_warnings(DeprecationWarning):
try:
out.append(dc.PatchAttrs(**contents))
except:
breakpoint()
out.append(dc.PatchSummary(**contents))

return out

Expand All @@ -227,7 +224,7 @@ def _get_coord_info(info, group):
if "min" not in attrs:
c_summary = (
dc.core.get_coord(data=ds[:])
.to_summary()
.to_summary(name=name, dims=attrs["dims"])
.model_dump(exclude_unset=True, exclude_defaults=True)
)
c_info.update(c_summary)
Expand All @@ -245,19 +242,18 @@ def _get_coord_info(info, group):

def _get_patch_content_from_group(group):
"""Get patch content from a single node."""
out = {}
attrs = _santize_pytables(dict(group.attrs))
for key, value in attrs.items():
attrs = {}
# The attributes in the table.
tables_attrs = _santize_pytables(dict(group.attrs))
for key, value in tables_attrs.items():
new_key = key.replace("_attrs_", "")
# need to unpack 0 dim arrays.
if isinstance(value, np.ndarray) and not value.shape:
value = np.atleast_1d(value)[0]
out[new_key] = value
attrs[new_key] = value
# Add coord info.
out["coords"] = _get_coord_info(out, group)
coords = _get_coord_info(attrs, group)
# Add data info.
out["shape"] = group["data"].shape
out["dtype"] = group["data"].dtype.str
# rename dims
out["dims"] = out.pop("_dims")
return out
data = group["data"]
dims = attrs.pop("_dims")
return dict(data=data, attrs=attrs, dims=dims, coords=coords)
34 changes: 11 additions & 23 deletions dascore/io/dashdf5/core.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,14 @@
"""IO module for reading prodML data."""
"""IO module for reading DAShdf5 data."""

from __future__ import annotations

import numpy as np

import dascore as dc
from dascore.constants import opt_timeable_types
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader
from dascore.utils.models import UnitQuantity, UTF8Str

from .utils import _get_cf_attrs, _get_cf_coords, _get_cf_version_str


class ProdMLPatchAttrs(dc.PatchAttrs):
"""Patch attrs for ProdML."""

pulse_width: float = np.nan
pulse_width_units: UnitQuantity | None = None
gauge_length: float = np.nan
gauge_length_units: UnitQuantity | None = None
schema_version: UTF8Str = ""


class DASHDF5(FiberIO):
"""IO Support for DASHDF5 which uses CF version 1.7."""

Expand All @@ -43,25 +29,27 @@ def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
if version_str:
return self.name, version_str

def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchAttrs]:
def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchSummary]:
"""Get metadata from file."""
coords = _get_cf_coords(resource)
extras = {
info = {
"path": resource.filename,
"file_format": self.name,
"file_version": str(self.version),
"format": self.name,
"version": str(self.version),
"coords": coords,
"dims": coords.dims,
"attrs": _get_cf_attrs(resource, coords),
}
attrs = _get_cf_attrs(resource, coords, extras=extras)
return [attrs]
return [dc.PatchSummary(**info)]

def read(
self,
resource: H5Reader,
time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
channel: tuple[float | None, float | None] | None = None,
**kwargs,
):
"""Read a CF file and return a Patch."""
) -> dc.BaseSpool:
"""Read a file and return a Patch."""
coords = _get_cf_coords(resource)
coords_new, data = coords.select(
array=resource["das"],
Expand Down
8 changes: 4 additions & 4 deletions dascore/io/dashdf5/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _get_cf_coords(hdf_fi, minimal=False) -> dc.core.CoordManager:
"""

def _get_spatialcoord(hdf_fi, code):
def _get_spatial_coord(hdf_fi, code):
"""Get spatial coord."""
return get_coord(
data=hdf_fi[code],
Expand All @@ -51,9 +51,9 @@ def _get_spatialcoord(hdf_fi, code):
"channel": get_coord(data=hdf_fi["channel"][:]),
"trace": get_coord(data=hdf_fi["trace"][:]),
"time": get_coord(data=dc.to_datetime64(hdf_fi["t"][:])),
"x": _get_spatialcoord(hdf_fi, "x"),
"y": _get_spatialcoord(hdf_fi, "y"),
"z": _get_spatialcoord(hdf_fi, "z"),
"x": _get_spatial_coord(hdf_fi, "x"),
"y": _get_spatial_coord(hdf_fi, "y"),
"z": _get_spatial_coord(hdf_fi, "z"),
}
dim_map = {
"time": ("time",),
Expand Down
19 changes: 4 additions & 15 deletions dascore/io/febus/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
import numpy as np

import dascore as dc
from dascore.constants import opt_timeable_types
from dascore.constants import opt_timeable_types, attr_conflict_description
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader
from dascore.utils.models import UTF8Str

from .utils import (
_get_febus_version_str,
_read_febus,
_yield_attrs_coords,
_scan_febus,
)


Expand Down Expand Up @@ -66,20 +66,9 @@ def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
if version_str:
return self.name, version_str

def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchAttrs]:
def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchSummary]:
"""Scan a febus file, return summary information about the file's contents."""
out = []
file_version = _get_febus_version_str(resource)
extras = {
"path": resource.filename,
"file_format": self.name,
"file_version": str(file_version),
}
for attr, cm, _ in _yield_attrs_coords(resource):
attr["coords"] = cm.to_summary_dict()
attr.update(dict(extras))
out.append(FebusPatchAttrs(**attr))
return out
return _scan_febus(resource, resource.path, attr_cls=FebusPatchAttrs)

def read(
self,
Expand Down
41 changes: 39 additions & 2 deletions dascore/io/febus/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import dascore as dc
from dascore.core import get_coord, get_coord_manager
from dascore.core.coordmanager import CoordManager
from dascore.utils.models import ArraySummary
from dascore.utils.misc import (
_maybe_unpack,
broadcast_for_index,
Expand Down Expand Up @@ -262,11 +263,47 @@ def _get_time_filtered_data(data, t_start_end, time, total_slice, time_coord):
return data, cm


def _read_febus(fi, distance=None, time=None, attr_cls=dc.PatchAttrs):
def _get_data_summary(febus):
"""Get the summary of the data array."""
data = febus.zone[febus.data_name]
data_shape = data.shape
skip_rows = _get_time_overlap_samples(febus, data_shape) // 2
# Need to handle case where excess_rows == 0
data_slice = slice(skip_rows, -skip_rows if skip_rows else None)
total_slice = list(broadcast_for_index(3, 1, data_slice))
total_time_rows = data_shape[1] - 2 * skip_rows
data_3d = data[tuple(total_slice)]
data = data_3d.reshape(-1, data_3d.shape[2])
ArraySummary(ndim=2, dtype=data.dtype, shape=shape)
return data, cm



def _read_febus(h5, distance=None, time=None, attr_cls=dc.PatchAttrs):
"""Read the febus values into a patch."""
out = []
for attr, cm, febus in _yield_attrs_coords(fi):
for attr, cm, febus in _yield_attrs_coords(h5):
data, new_cm = _get_data_new_cm(cm, febus, distance=distance, time=time)
patch = dc.Patch(data=data, coords=new_cm, attrs=attr_cls(**attr))
out.append(patch)
return out


def _scan_febus(h5, path, attr_cls=dc.PatchAttrs):
"""Read the febus values into a patch."""
out = []
format = "febus"
version = _get_febus_version_str(h5)
for attr, cm, febus in _yield_attrs_coords(h5):
data_summary = _get_data_summary(febus)
patch = dc.PatchSummary(
data=data_summary,
coords=cm,
attrs=attr_cls(**attr),
dims=cm.dims,
format=format,
version=version,
path=path,
)
out.append(patch)
return out
Loading

0 comments on commit 23525c0

Please sign in to comment.