Skip to content

Commit

Permalink
work on patch summary
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers committed Jan 14, 2025
1 parent 5bd8397 commit f12b0d4
Show file tree
Hide file tree
Showing 28 changed files with 381 additions and 439 deletions.
1 change: 1 addition & 0 deletions dascore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from dascore.units import get_quantity, get_unit
from dascore.utils.patch import patch_function
from dascore.utils.time import to_datetime64, to_timedelta64, to_float
from dascore.utils.fs import UPath
from dascore.version import __last_version__, __version__

# flag for disabling progress bar when debugging
Expand Down
16 changes: 10 additions & 6 deletions dascore/core/coordmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,11 +1211,12 @@ def _maybe_coord_from_nested(name, coord, new_dims):
return c_map, d_map, dims


class CoordManagerSummary(CoordManager):
class CoordManagerSummary(DascoreBaseModel):
"""A coordinate manager with summary coordinates."""

dims: tuple[str, ...]
coord_map: Annotated[
FrozenDict[str, CoordSummary],
FrozenDict[str, BaseCoord],
frozen_dict_validator,
frozen_dict_serializer,
]
Expand All @@ -1226,11 +1227,14 @@ def to_coord_manager(self):
This only works if the coordinates were evenly sampled/sorted.
"""
out = {}
coord_map = {}
dim_map = {}
for name, coord in self.coord_map.items():
out[name] = coord.to_coord()
dim_map[name] = coord.dims
coord_map[name] = coord.to_coord()

return CoordManager(
coord_map=out,
dim_map=self.dim_map,
coord_map=FrozenDict(coord_map),
dim_map=FrozenDict(dim_map),
dims=self.dims,
)
45 changes: 45 additions & 0 deletions dascore/core/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,17 @@ class PatchSummary(DascoreBaseModel):
attrs: PatchAttrs
coords: CoordManagerSummary

_attrs_to_patch_keys = (
"data_type",
"dtype",
"data_units",
"path",
"format_version",
"format_name",
"acquistion_id",
"tag",
)

def to_summary(
self,
path=None,
Expand All @@ -417,3 +428,37 @@ def to_summary(
This is here to be compatible with Patch.to_summary.
"""
return self

def _attrs_to_patch_info(self, attr_info, patch_info, patch_id):
"""Transfer some attrs to the patch info."""
out = []
for key in self._attrs_to_patch_keys:
if value := attr_info.pop(key, None):
patch_info[key] = value
# flatten remaining attrs
for item, value in attr_info.items():
out.append(dict(name=item, value=value, patch_id=patch_id))
return out

def _reshape_coords(self, patch_info, coord_info, patch_key):
"""Move some coord info over to patch info."""
patch_info["dims"] = coord_info.pop("dims")
return list(coord_info["coord_map"].values())

def to_patch_coords_attrs_info(
self,
patch_key,
) -> tuple[list[dict], list[dict], list[dict]]:
"""
Convert the PatchSummary to three lists of dicts.
The lists are for patch info, coord info, and attr info.
"""
attrs = self.attrs.model_dump(exclude_unset=True)
coords = self.coords.model_dump(exclude_unset=True)
patch_info = self.data.model_dump(exclude_unset=True)

patch_info["patch_key"] = patch_key
attrs = self._attrs_to_patch_info(attrs, patch_info, patch_key)
coords = self._reshape_coords(patch_info, coords, patch_key)
return patch_info, attrs, coords
4 changes: 2 additions & 2 deletions dascore/io/ap_sensing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import dascore as dc
from dascore.core import get_coord, get_coord_manager
from dascore.utils.fs import get_uri
from dascore.utils.fs import get_path
from dascore.utils.misc import _maybe_unpack, unbyte


Expand Down Expand Up @@ -84,7 +84,7 @@ def _get_attrs_dict(resource, format_name):
instrumet_id=unbyte(_maybe_unpack(daq["SerialNumber"])),
gauge_length=_maybe_unpack(pserver["GaugeLength"]),
radians_to_nano_strain=_maybe_unpack(pserver["RadiansToNanoStrain"]),
path=get_uri(resource),
path=get_path(resource),
format_name=format_name,
format_version=version,
)
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/asn/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dascore as dc
import dascore.core
from dascore.core.coords import get_coord
from dascore.utils.fs import get_uri
from dascore.utils.fs import get_path
from dascore.utils.hdf5 import unpack_scalar_h5_dataset
from dascore.utils.misc import unbyte

Expand Down Expand Up @@ -79,7 +79,7 @@ def _get_attr_dict(header, path, format_name, format_version):
def _get_opto_das_coords_attrs(fi, format_name) -> tuple[dc.CoordManager, dict]:
"""Scan a OptoDAS file, return metadata."""
cm = _get_coord_manager(fi)
path = get_uri(fi)
path = get_path(fi)
version = _get_opto_das_version_str(fi)
attrs = _get_attr_dict(fi["header"], path, format_name, version)
return cm, attrs
Expand Down
34 changes: 17 additions & 17 deletions dascore/io/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
MissingOptionalDependencyError,
UnknownFiberFormatError,
)
from dascore.utils.fs import UPath, iter_path_contents
from dascore.utils.io import IOResourceManager, get_handle_from_resource
from dascore.utils.mapping import FrozenDict
from dascore.utils.misc import cached_method, iterate, warn_or_raise
Expand All @@ -47,7 +48,6 @@
)
from dascore.utils.pd import _model_list_to_df
from dascore.utils.progress import track
from dascore.utils.fs import FSPath


class PatchFileSummary(DascoreBaseModel):
Expand Down Expand Up @@ -691,8 +691,8 @@ def scan_to_df(
return path.get_contents()
info = scan(
path=path,
file_format=file_format,
file_version=file_version,
resource_format=file_format,
resource_version=file_version,
ext=ext,
timestamp=timestamp,
progress=progress,
Expand All @@ -705,7 +705,7 @@ def _iterate_scan_inputs(patch_source, ext, mtime, include_directories=True, **k
"""Yield scan candidates."""
for el in iterate(patch_source):
if isinstance(el, str | Path) and (path := Path(el)).exists():
generator = _iter_filesystem(
generator = iter_path_contents(
path, ext=ext, timestamp=mtime, include_directories=include_directories
)
yield from generator
Expand Down Expand Up @@ -781,9 +781,9 @@ def _handle_missing_optionals(outputs, optional_dep_dict):


def scan(
path: Path | FSPath | str | PatchType | SpoolType | IOResourceManager,
file_format: str | None = None,
file_version: str | None = None,
path: UPath | Path | str | PatchType | SpoolType | IOResourceManager,
resource_format: str | None = None,
resource_version: str | None = None,
ext: str | None = None,
timestamp: float | None = None,
progress: PROGRESS_LEVELS = "standard",
Expand All @@ -795,10 +795,10 @@ def scan(
----------
path
A resource containing Fiber data.
file_format
resource_format
Format of the file. If not provided DASCore will try to determine it.
Only applicable for path-like inputs.
file_version
resource_version
Version of the file. If not provided DASCore will try to determine it.
Only applicable for path-like inputs.
ext : str or None
Expand All @@ -823,9 +823,9 @@ def scan(
>>> # Replace with your file path.
>>> file_path = fetch("prodml_2.1.h5")
>>>
>>> attr_list = dc.scan(file_path)
>>> summary_list = dc.scan(file_path)
See also [`FSPath`](`dascore.utils.fs.FSPath`)
See also [`PatchSummary`](`dascore.core.patch.PatchSummary`)
"""
out = []
fiber_io_hint: dict[str, FiberIO] = {}
Expand All @@ -848,19 +848,19 @@ def scan(
for patch_source in tracker:
# just pull attrs from patch
if isinstance(patch_source, dc.Patch):
out.append(patch_source.attrs)
out.append(patch_source.to_summary())
continue
with IOResourceManager(patch_source) as man:
try:
fiber_io, resource = _get_fiber_io_and_req_type(
man,
file_format=file_format,
file_version=file_version,
file_format=resource_format,
file_version=resource_version,
fiber_io_hint=fiber_io_hint,
)
except UnknownFiberFormatError: # skip bad entities
continue
# Cache this fiber io to given preferential treatment next iteration.
# Cache this fiber io to give preferential treatment next iteration.
# This speeds up the common case of many files with the same format.
fiber_io_hint[fiber_io.input_type] = fiber_io
# Special handling of directory FiberIOs.
Expand All @@ -882,8 +882,8 @@ def scan(
except MissingOptionalDependencyError as ex:
missing_optional_deps[ex.msg.split(" ")[0]] += 1
continue
for attr in source:
out.append(dc.PatchAttrs.from_dict(attr))
for summary in source:
out.append(dc.PatchSummary.model_validate(summary))
if missing_optional_deps:
_handle_missing_optionals(out, missing_optional_deps)
return out
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/dasdae/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
import dascore as dc
from dascore.constants import SpoolType
from dascore.io import FiberIO
from dascore.utils.fs import get_path
from dascore.utils.hdf5 import (
H5Reader,
H5Writer,
)
from dascore.utils.misc import unbyte
from dascore.utils.patch import get_patch_names

from ...utils.fs import get_uri
from .utils import (
_get_summary_from_patch_groups,
_read_patch,
Expand Down Expand Up @@ -89,7 +89,7 @@ def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
def read(self, resource: H5Reader, **kwargs) -> SpoolType:
"""Read a DASDAE file."""
patches = []
path = get_uri(resource)
path = get_path(resource)
format_version = unbyte(resource.attrs["__DASDAE_version__"])
format_name = self.name
try:
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/dasdae/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import dascore as dc
from dascore.core.coordmanager import get_coord_manager
from dascore.core.coords import get_coord
from dascore.utils.fs import get_uri
from dascore.utils.fs import get_path
from dascore.utils.hdf5 import Empty
from dascore.utils.misc import suppress_warnings, unbyte
from dascore.utils.time import to_int
Expand Down Expand Up @@ -190,7 +190,7 @@ def _read_patch(patch_group, path, format_name, format_version, **kwargs):

def _get_summary_from_patch_groups(h5, format_name="DASDAE"):
"""Get the contents from each patch group."""
path = get_uri(h5)
path = get_path(h5)
format_version = h5.attrs["__DASDAE_version__"]
out = []
for name, group in h5[("/waveforms")].items():
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/dashdf5/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader

from ...utils.fs import get_uri
from ...utils.fs import get_path
from .utils import _get_cf_attrs, _get_cf_coords, _get_cf_version_str


Expand All @@ -21,7 +21,7 @@ class DASHDF5(FiberIO):
def _get_attr(self, resource: H5Reader):
"""Get the attrs dict with path and such populated."""
attrs = _get_cf_attrs(resource)
attrs["path"] = get_uri(resource)
attrs["path"] = get_path(resource)
attrs["format_name"] = self.name
attrs["format_version"] = self.version
return dc.PatchAttrs.model_validate(attrs)
Expand Down
6 changes: 3 additions & 3 deletions dascore/io/febus/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dascore.utils.hdf5 import H5Reader
from dascore.utils.models import UTF8Str

from ...utils.fs import get_uri
from ...utils.fs import get_path
from .utils import (
_get_febus_version_str,
_read_febus,
Expand Down Expand Up @@ -71,7 +71,7 @@ def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchSummary]:
"""Scan a febus file, return summary information about the file's contents."""
return _scan_febus(
resource,
path=get_uri(resource),
path=get_path(resource),
format_name=self.name,
format_version=self.version,
attr_cls=FebusPatchAttrs,
Expand All @@ -87,7 +87,7 @@ def read(
"""Read a febus spool of patches."""
patches = _read_febus(
resource,
path=get_uri(resource),
path=get_path(resource),
format_name=self.name,
format_version=self.version,
time=time,
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/gdr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
_get_version,
_maybe_trim_data,
)
from dascore.utils.fs import get_uri
from dascore.utils.fs import get_path
from dascore.utils.hdf5 import H5Reader


Expand All @@ -40,7 +40,7 @@ class GDR_V1(FiberIO): # noqa
def _get_attr_coord_data(self, resource, snap=True):
"""Get the attributes, coordinates, and h5 dataset."""
attr_dict, cm, data = _get_attrs_coords_and_data(resource, snap=snap)
attr_dict["path"] = get_uri(resource)
attr_dict["path"] = get_path(resource)
attr_dict["format_name"] = self.name
attr_dict["version"] = self.version
attr = GDRPatchAttrs(**attr_dict)
Expand Down
6 changes: 3 additions & 3 deletions dascore/io/neubrex/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import dascore.io.neubrex.utils_rfs as rfs_utils
from dascore.constants import SpoolType
from dascore.io import FiberIO
from dascore.utils.fs import get_uri
from dascore.utils.fs import get_path
from dascore.utils.hdf5 import H5Reader


Expand Down Expand Up @@ -53,7 +53,7 @@ class NeubrexRFSV1(FiberIO):
def _get_attrs(self, resource) -> NeubrexRFSPatchAttrs:
"""Get the patch attributes."""
attr = rfs_utils._get_attr_dict(resource)
attr["path"] = get_uri(resource)
attr["path"] = get_path(resource)
attr["format_name"] = self.name
attr["format_version"] = self.version
return NeubrexRFSPatchAttrs(**attr)
Expand Down Expand Up @@ -106,7 +106,7 @@ class NeubrexDASV1(FiberIO):
def _get_attr(self, resource) -> NeubrexDASPatchAttrs:
"""Get the attrs for from the file."""
attr = das_utils._get_attr_dict(resource["Acoustic"])
attr["path"] = get_uri(resource)
attr["path"] = get_path(resource)
attr["format_name"] = self.name
attr["format_version"] = self.version
return NeubrexDASPatchAttrs(**attr)
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/pickle/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import dascore as dc
from dascore.io import BinaryReader, BinaryWriter, FiberIO
from dascore.utils.fs import get_uri
from dascore.utils.fs import get_path


class PickleIO(FiberIO):
Expand Down Expand Up @@ -53,7 +53,7 @@ def read(self, resource: BinaryReader, **kwargs):
patch: dc.Patch = pickle.load(resource)
# Add the relevant file info.
out = patch.update_attrs(
path=get_uri(resource),
path=get_path(resource),
format_name=self.name,
format_version=self.version,
)
Expand Down
Loading

0 comments on commit f12b0d4

Please sign in to comment.