Skip to content

Commit

Permalink
Optional segyio and segy write (#469)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers authored Dec 24, 2024
1 parent 884c41f commit df47d0e
Show file tree
Hide file tree
Showing 11 changed files with 499 additions and 42 deletions.
1 change: 1 addition & 0 deletions dascore/data_registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ neubrex_dts_forge.h5 940f7bea6dd4c8a1340b4936b8eb7f9edc577cbcaf77c1f5ac295890f88
decimated_optodas.hdf5 48ce9c2ab4916d5536faeef0bd789f326ec4afc232729d32014d4d835a9fb74e https://github.com/dasdae/test_data/raw/master/das/decimated_optodas.hdf5
neubrex_das_1.h5 48a97e27c56e66cc2954ba4eaaadd2169919bb8f897d78e95ef6ab50abb5027b https://github.com/dasdae/test_data/raw/master/das/neubrex_das_1.h5
UoU_lf_urban.hdf5 d3f8fa6ff3d8ae993484b3fbf9b39505e2cf15cb3b39925a3519b27d5fbe7b5b https://github.com/dasdae/test_data/raw/master/das/UoU_lf_urban.hdf5
small_channel_patch.sgy 31e551aadb361189c1c9325d504c883114ba9a7bb75fe4791e5089fabccef704 https://github.com/dasdae/test_data/raw/master/das/small_channel_patch.sgy
37 changes: 35 additions & 2 deletions dascore/io/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,14 @@
timeable_types,
)
from dascore.core.attrs import str_validator
from dascore.exceptions import InvalidFiberIOError, UnknownFiberFormatError
from dascore.exceptions import (
InvalidFiberIOError,
MissingOptionalDependencyError,
UnknownFiberFormatError,
)
from dascore.utils.io import IOResourceManager, get_handle_from_resource
from dascore.utils.mapping import FrozenDict
from dascore.utils.misc import _iter_filesystem, cached_method, iterate
from dascore.utils.misc import _iter_filesystem, cached_method, iterate, warn_or_raise
from dascore.utils.models import (
CommaSeparatedStr,
DascoreBaseModel,
Expand Down Expand Up @@ -748,6 +752,27 @@ def _count_generator(generator):
return entity_count


def _handle_missing_optionals(outputs, optional_dep_dict):
"""
Inform the user there are files that can be read but the proper
dependencies are not installed.
If there are other readable files that were found, raise a warning.
Otherwise, raise a MissingOptionalDependencyError.
"""
msg = (
f"DASCore found files that can be read if additional packages are "
f"installed. The needed packages and the found number of files are: "
f"{dict(optional_dep_dict)}"
)
warn_or_raise(
msg,
exception=MissingOptionalDependencyError,
warning=UserWarning,
behavior="warn" if len(outputs) else "raise",
)


def scan(
path: Path | str | PatchType | SpoolType | IOResourceManager,
file_format: str | None = None,
Expand Down Expand Up @@ -796,6 +821,8 @@ def scan(
"""
out = []
fiber_io_hint: dict[str, FiberIO] = {}
# A dict for keeping track of missing optional dependencies.
missing_optional_deps = defaultdict(lambda: 0)
# Unfortunately, we have to iterate the scan candidates twice to get
# an estimate for the progress bar length. Maybe there is a better way...
_generator = _iterate_scan_inputs(
Expand Down Expand Up @@ -826,6 +853,7 @@ def scan(
except UnknownFiberFormatError: # skip bad entities
continue
# Cache this fiber io to given preferential treatment next iteration.
# This speeds up the common case of many files with the same format.
fiber_io_hint[fiber_io.input_type] = fiber_io
# Special handling of directory FiberIOs.
if fiber_io.input_type == "directory":
Expand All @@ -843,8 +871,13 @@ def scan(
except OSError: # This happens if the file is corrupt see #346.
warnings.warn(f"Failed to scan {resource}", UserWarning)
continue
except MissingOptionalDependencyError as ex:
missing_optional_deps[ex.msg.split(" ")[0]] += 1
continue
for attr in source:
out.append(dc.PatchAttrs.from_dict(attr))
if missing_optional_deps:
_handle_missing_optionals(out, missing_optional_deps)
return out


Expand Down
9 changes: 8 additions & 1 deletion dascore/io/segy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
-----
- Distance information is not found in most SEGY DAS files so returned
dimensions are "channel" and "time" rather than "distance" and "time".
- Segy standards found at: https://library.seg.org/pb-assets/technical-standards
segy v1 spec: seg_y_rev1-1686080991247.pdf
segy v2 spec: seg_y_rev2_0-mar2017-1686080998003.pdf
segy v2.1 spec: seg_y_rev2_1-oct2023-1701361639333.pdf
Examples
--------
Expand All @@ -17,4 +24,4 @@
segy_patch = dc.spool(path)[0]
"""

from .core import SegyV2
from .core import SegyV1_0, SegyV2_0, SegyV2_1
62 changes: 50 additions & 12 deletions dascore/io/segy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,35 @@

from __future__ import annotations

import segyio

import dascore as dc
from dascore.io.core import FiberIO
from dascore.utils.io import BinaryReader
from dascore.utils.misc import optional_import

from .utils import _get_attrs, _get_coords, _get_filtered_data_and_coords
from .utils import (
_get_attrs,
_get_coords,
_get_filtered_data_and_coords,
_get_segy_version,
_write_segy,
)


class SegyV2(FiberIO):
"""An IO class supporting version 2 of the SEGY format."""
class SegyV1_0(FiberIO): # noqa
"""An IO class supporting version 1.0 of the SEGY format."""

name = "segy"
preferred_extensions = ("segy", "sgy")
# also specify a version so when version 2 is released you can
# just make another class in the same module named JingleV2.
version = "2"
version = "1.0"
# The name of the package to import. This is here so the class can be
# subclassed and this changed for debugging reasons.
_package_name = "segyio"

def get_format(self, path, **kwargs) -> tuple[str, str] | bool:
def get_format(self, fp: BinaryReader, **kwargs) -> tuple[str, str] | bool:
"""Make sure input is segy."""
try:
with segyio.open(path, ignore_geometry=True):
return self.name, self.version
except Exception:
return False
return _get_segy_version(fp)

def read(self, path, time=None, channel=None, **kwargs):
"""
Expand All @@ -35,6 +40,7 @@ def read(self, path, time=None, channel=None, **kwargs):
accept kwargs. If the format supports partial reads, these should
be implemented as well.
"""
segyio = optional_import(self._package_name)
with segyio.open(path, ignore_geometry=True) as fi:
coords = _get_coords(fi)
attrs = _get_attrs(fi, coords, path, self)
Expand All @@ -55,7 +61,39 @@ def scan(self, path, **kwargs) -> list[dc.PatchAttrs]:
from the [dascore.core.attrs](`dascore.core.attrs`) module, or a
format-specific subclass.
"""
segyio = optional_import(self._package_name)
with segyio.open(path, ignore_geometry=True) as fi:
coords = _get_coords(fi)
attrs = _get_attrs(fi, coords, path, self)
return [attrs]

def write(self, spool: dc.Patch | dc.BaseSpool, resource, **kwargs):
"""
Create a segy file from length 1 spool or patch.
Parameters
----------
spool
The patch or length 1 spool to write.
resource
The target for writing patch.
Notes
-----
Based on the example from segyio:
https://github.com/equinor/segyio/blob/master/python/examples/make-file.py
"""
segyio = optional_import(self._package_name)
_write_segy(spool, resource, self.version, segyio)


class SegyV2_0(SegyV1_0): # noqa
"""An IO class supporting version 2.0 of the SEGY format."""

version = "2.0"


class SegyV2_1(SegyV1_0): # noqa
"""An IO class supporting version 2.1 of the SEGY format."""

version = "2.1"
Loading

0 comments on commit df47d0e

Please sign in to comment.