Skip to content

Commit

Permalink
work on fsspec
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers committed Jan 11, 2025
1 parent 84f1b2a commit be044b6
Show file tree
Hide file tree
Showing 25 changed files with 484 additions and 255 deletions.
5 changes: 3 additions & 2 deletions dascore/io/ap_sensing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import dascore as dc
from dascore.core import get_coord, get_coord_manager
from dascore.utils.misc import _maybe_unpack, get_path, unbyte
from dascore.utils.misc import _maybe_unpack, unbyte
from dascore.utils.fs import get_uri


def _get_version_string(resource):
Expand Down Expand Up @@ -83,7 +84,7 @@ def _get_attrs_dict(resource, format_name):
instrumet_id=unbyte(_maybe_unpack(daq["SerialNumber"])),
gauge_length=_maybe_unpack(pserver["GaugeLength"]),
radians_to_nano_strain=_maybe_unpack(pserver["RadiansToNanoStrain"]),
path=get_path(resource),
path=get_uri(resource),
format_name=format_name,
format_version=version,
)
Expand Down
6 changes: 4 additions & 2 deletions dascore/io/asn/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import dascore.core
from dascore.core.coords import get_coord
from dascore.utils.hdf5 import unpack_scalar_h5_dataset
from dascore.utils.misc import get_path, unbyte
from dascore.utils.misc import unbyte
from dascore.utils.fs import get_uri


# --- Getting format/version

Expand Down Expand Up @@ -78,7 +80,7 @@ def _get_attr_dict(header, path, format_name, format_version):
def _get_opto_das_coords_attrs(fi, format_name) -> tuple[dc.CoordManager, dict]:
"""Scan a OptoDAS file, return metadata."""
cm = _get_coord_manager(fi)
path = get_path(fi)
path = get_uri(fi)
version = _get_opto_das_version_str(fi)
attrs = _get_attr_dict(fi["header"], path, format_name, version)
return cm, attrs
Expand Down
3 changes: 2 additions & 1 deletion dascore/io/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
)
from dascore.utils.io import IOResourceManager, get_handle_from_resource
from dascore.utils.mapping import FrozenDict
from dascore.utils.misc import _iter_filesystem, cached_method, iterate, warn_or_raise
from dascore.utils.misc import cached_method, iterate, warn_or_raise
from dascore.utils.fs import _iter_filesystem
from dascore.utils.models import (
CommaSeparatedStr,
DascoreBaseModel,
Expand Down
5 changes: 3 additions & 2 deletions dascore/io/dasdae/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
H5Reader,
H5Writer,
)
from dascore.utils.misc import get_path, unbyte
from dascore.utils.misc import unbyte
from ...utils.fs import get_uri
from dascore.utils.patch import get_patch_names

from .utils import (
Expand Down Expand Up @@ -88,7 +89,7 @@ def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
def read(self, resource: H5Reader, **kwargs) -> SpoolType:
"""Read a DASDAE file."""
patches = []
path = get_path(resource)
path = get_uri(resource)
format_version = unbyte(resource.attrs["__DASDAE_version__"])
format_name = self.name
try:
Expand Down
5 changes: 3 additions & 2 deletions dascore/io/dasdae/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from dascore.core.coordmanager import get_coord_manager
from dascore.core.coords import get_coord
from dascore.utils.hdf5 import Empty
from dascore.utils.misc import get_path, suppress_warnings, unbyte
from dascore.utils.misc import suppress_warnings, unbyte
from dascore.utils.fs import get_uri
from dascore.utils.time import to_int

# --- Functions for writing DASDAE format
Expand Down Expand Up @@ -189,7 +190,7 @@ def _read_patch(patch_group, path, format_name, format_version, **kwargs):

def _get_summary_from_patch_groups(h5, format_name="DASDAE"):
"""Get the contents from each patch group."""
path = get_path(h5)
path = get_uri(h5)
format_version = h5.attrs["__DASDAE_version__"]
out = []
for name, group in h5[("/waveforms")].items():
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/dashdf5/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from dascore.constants import opt_timeable_types
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader
from dascore.utils.misc import get_path
from ...utils.fs import get_uri

from .utils import _get_cf_attrs, _get_cf_coords, _get_cf_version_str

Expand All @@ -21,7 +21,7 @@ class DASHDF5(FiberIO):
def _get_attr(self, resource: H5Reader):
"""Get the attrs dict with path and such populated."""
attrs = _get_cf_attrs(resource)
attrs["path"] = get_path(resource)
attrs["path"] = get_uri(resource)
attrs["format_name"] = self.name
attrs["format_version"] = self.version
return dc.PatchAttrs.model_validate(attrs)
Expand Down
6 changes: 3 additions & 3 deletions dascore/io/febus/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from dascore.constants import opt_timeable_types
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader
from dascore.utils.misc import get_path
from ...utils.fs import get_uri
from dascore.utils.models import UTF8Str

from .utils import (
Expand Down Expand Up @@ -71,7 +71,7 @@ def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchSummary]:
"""Scan a febus file, return summary information about the file's contents."""
return _scan_febus(
resource,
path=get_path(resource),
path=get_uri(resource),
format_name=self.name,
format_version=self.version,
attr_cls=FebusPatchAttrs,
Expand All @@ -87,7 +87,7 @@ def read(
"""Read a febus spool of patches."""
patches = _read_febus(
resource,
path=get_path(resource),
path=get_uri(resource),
format_name=self.name,
format_version=self.version,
time=time,
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/gdr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
_maybe_trim_data,
)
from dascore.utils.hdf5 import H5Reader
from dascore.utils.misc import get_path
from dascore.utils.fs import get_uri


class GDRPatchAttrs(dc.PatchAttrs):
Expand All @@ -40,7 +40,7 @@ class GDR_V1(FiberIO): # noqa
def _get_attr_coord_data(self, resource, snap=True):
"""Get the attributes, coordinates, and h5 dataset."""
attr_dict, cm, data = _get_attrs_coords_and_data(resource, snap=snap)
attr_dict["path"] = get_path(resource)
attr_dict["path"] = get_uri(resource)
attr_dict["format_name"] = self.name
attr_dict["version"] = self.version
attr = GDRPatchAttrs(**attr_dict)
Expand Down
1 change: 0 additions & 1 deletion dascore/io/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import dascore as dc
from dascore.constants import ONE_SECOND_IN_NS, PROGRESS_LEVELS
from dascore.exceptions import InvalidIndexVersionError
from dascore.utils.hdf5 import HDFPatchIndexManager
from dascore.utils.misc import iterate
from dascore.utils.pd import filter_df
from dascore.utils.time import get_max_min_times, to_timedelta64
Expand Down
6 changes: 3 additions & 3 deletions dascore/io/neubrex/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dascore.constants import SpoolType
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader
from dascore.utils.misc import get_path
from dascore.utils.fs import get_uri


class NeubrexRFSPatchAttrs(dc.PatchAttrs):
Expand Down Expand Up @@ -53,7 +53,7 @@ class NeubrexRFSV1(FiberIO):
def _get_attrs(self, resource) -> NeubrexRFSPatchAttrs:
"""Get the patch attributes."""
attr = rfs_utils._get_attr_dict(resource)
attr["path"] = get_path(resource)
attr["path"] = get_uri(resource)
attr["format_name"] = self.name
attr["format_version"] = self.version
return NeubrexRFSPatchAttrs(**attr)
Expand Down Expand Up @@ -106,7 +106,7 @@ class NeubrexDASV1(FiberIO):
def _get_attr(self, resource) -> NeubrexDASPatchAttrs:
"""Get the attrs for from the file."""
attr = das_utils._get_attr_dict(resource["Acoustic"])
attr["path"] = get_path(resource)
attr["path"] = get_uri(resource)
attr["format_name"] = self.name
attr["format_version"] = self.version
return NeubrexDASPatchAttrs(**attr)
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/pickle/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import dascore as dc
from dascore.io import BinaryReader, BinaryWriter, FiberIO
from dascore.utils.misc import get_path
from dascore.utils.fs import get_uri


class PickleIO(FiberIO):
Expand Down Expand Up @@ -53,7 +53,7 @@ def read(self, resource: BinaryReader, **kwargs):
patch: dc.Patch = pickle.load(resource)
# Add the relevant file info.
out = patch.update_attrs(
path=get_path(resource),
path=get_uri(resource),
format_name=self.name,
format_version=self.version,
)
Expand Down
5 changes: 3 additions & 2 deletions dascore/io/prodml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from dascore.constants import VALID_DATA_TYPES
from dascore.core.coordmanager import get_coord_manager
from dascore.core.coords import get_coord
from dascore.utils.misc import get_path, iterate, maybe_get_items, unbyte
from dascore.utils.misc import iterate, maybe_get_items, unbyte
from dascore.utils.fs import get_uri

# --- Getting format/version

Expand Down Expand Up @@ -100,7 +101,7 @@ def _get_prodml_attrs(fi, format_name, format_version) -> list[dict]:
acq = fi["Acquisition"]
base_info = maybe_get_items(acq.attrs, _root_attrs)
raw_nodes = _get_raw_node_dict(acq)
path = get_path(fi)
path = get_uri(fi)

# Iterate each raw data node. I have only ever seen 1 in a file but since
# it is indexed like Raw[0] there might be more.
Expand Down
5 changes: 3 additions & 2 deletions dascore/io/segy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import dascore as dc
from dascore.io.core import FiberIO
from dascore.utils.io import BinaryReader
from dascore.utils.misc import get_path, optional_import
from dascore.utils.misc import optional_import
from ...utils.fs import get_uri

from .utils import (
_get_coords,
Expand All @@ -31,7 +32,7 @@ class SegyV1_0(FiberIO): # noqa
def _get_attrs(self, resource):
"""Get the basic attributes for a segy file."""
info = dict(
path=get_path(resource),
path=get_uri(resource),
format_name=self.name,
format_version=self.version,
)
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/sentek/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import dascore as dc
from dascore.io import BinaryReader
from dascore.io.core import FiberIO
from dascore.utils.misc import get_path
from ...utils.fs import get_uri
from dascore.utils.models import ArraySummary

from .utils import _get_patch_attrs, _get_version
Expand All @@ -24,7 +24,7 @@ def _get_attrs_coords_offsets(self, resource):
"""Get attributes, coordinates, and data offsets from file."""
attrs_dict, coords, offsets = _get_patch_attrs(
resource,
path=get_path(resource),
path=get_uri(resource),
format_name=self.name,
format_version=self.version,
)
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/sentek/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@

import dascore as dc
from dascore.core import get_coord, get_coord_manager
from dascore.utils.misc import get_path
from dascore.utils.fs import get_uri


def _get_version(fid):
"""Determine if Sentek file."""
path = get_path(fid)
path = get_uri(fid)
# Sentek files cannot change the extension, or file name.
sw_data = path.endswith(".das")
fid.seek(0)
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/silixah5/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dascore.constants import opt_timeable_types
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader
from dascore.utils.misc import get_path
from dascore.utils.fs import get_uri


class SilixaPatchAttrs(dc.PatchAttrs):
Expand All @@ -33,7 +33,7 @@ class SilixaH5V1(FiberIO):
def _get_attr_coords(self, resource):
"""Get attributes and coordinates of patch in file."""
info, coords = util._get_attr_dict(resource)
info["path"] = get_path(resource)
info["path"] = get_uri(resource)
info["format_name"] = self.name
info["format_version"] = self.version
return SilixaPatchAttrs(**info), coords
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/tdms/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from dascore.constants import timeable_types
from dascore.core import Patch
from dascore.io import BinaryReader, FiberIO
from dascore.utils.misc import get_path
from ...utils.fs import get_uri

from .utils import _get_attrs_coords, _get_data, _get_version_str

Expand All @@ -22,7 +22,7 @@ class TDMSFormatterV4713(FiberIO):
def _get_attr_coords(self, resource):
"""Get a PatchAttrs for the file."""
out, coords, _ = _get_attrs_coords(resource)
out["path"] = get_path(resource)
out["path"] = get_uri(resource)
out["file_format"] = self.name
out["file_version"] = self.version
return dc.PatchAttrs(**out), coords
Expand Down
4 changes: 2 additions & 2 deletions dascore/io/terra15/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dascore.constants import timeable_types
from dascore.io import FiberIO
from dascore.utils.hdf5 import H5Reader
from dascore.utils.misc import get_path
from ...utils.fs import get_uri

from .utils import (
_get_default_attrs,
Expand Down Expand Up @@ -39,7 +39,7 @@ def _get_attrs_coords_data_node(self, resource):
"""Get attributes, coords, and datanode for this file."""
version, data_node = _get_version_data_node(resource)
attrs = _get_default_attrs(resource)
attrs["path"] = get_path(resource)
attrs["path"] = get_uri(resource)
attrs["format_name"] = self.name
attrs["format_version"] = version
coords_dict = {
Expand Down
5 changes: 3 additions & 2 deletions dascore/io/xml_binary/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import dascore as dc
from dascore.io import FiberIO
from dascore.utils.models import UTF8Str
from ...utils.fs import get_uri

from .utils import _load_patches, _paths_to_attrs, _read_xml_metadata

Expand All @@ -36,9 +37,9 @@ class XMLBinaryV1(FiberIO):
# File extension for data files.
_data_extension = ".raw"

def scan(self, resource, timestamp=None, **kwargs) -> list[dc.PatchAttrs]:
def scan(self, resource, timestamp=None, **kwargs) -> list[dc.PatchSummary]:
"""Scan the contents of the directory."""
path = Path(resource)
path = get_uri(resource)
metadata = _read_xml_metadata(path / self._metadata_name)
data_files = list(path.glob(f"*{self._data_extension}"))
extra_attrs = {
Expand Down
Loading

0 comments on commit be044b6

Please sign in to comment.