From 8dbbe294b9a8071b6d0672db826bc7dec585b939 Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Fri, 6 Dec 2024 17:34:24 -0800 Subject: [PATCH] make segyio optional --- dascore/io/segy/__init__.py | 7 +++++ dascore/io/segy/core.py | 14 +++++---- dascore/io/segy/utils.py | 58 ++++++++++++++++++++++++++++++++++++- 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/dascore/io/segy/__init__.py b/dascore/io/segy/__init__.py index 2a421dd7..e6f8a3f9 100644 --- a/dascore/io/segy/__init__.py +++ b/dascore/io/segy/__init__.py @@ -5,6 +5,13 @@ ----- - Distance information is not found in most SEGY DAS files so returned dimensions are "channel" and "time" rather than "distance" and "time". +- Segy standards found at: https://library.seg.org/pb-assets/technical-standards + +segy v1 spec: seg_y_rev1-1686080991247.pdf + +segy v2 spec: seg_y_rev2_0-mar2017-1686080998003.pdf + +segy v2.1 spec: seg_y_rev2_1-oct2023-1701361639333.pdf Examples -------- diff --git a/dascore/io/segy/core.py b/dascore/io/segy/core.py index 5e82c5bf..b892e53b 100644 --- a/dascore/io/segy/core.py +++ b/dascore/io/segy/core.py @@ -7,7 +7,7 @@ import dascore as dc from dascore.io.core import FiberIO -from .utils import _get_attrs, _get_coords, _get_filtered_data_and_coords +from .utils import _get_attrs, _get_coords, _get_filtered_data_and_coords, _is_segy class SegyV2(FiberIO): @@ -21,11 +21,13 @@ class SegyV2(FiberIO): def get_format(self, path, **kwargs) -> tuple[str, str] | bool: """Make sure input is segy.""" - try: - with segyio.open(path, ignore_geometry=True): - return self.name, self.version - except Exception: - return False + with open(path, "rb") as fp: + return _is_segy(fp) + # try: + # with segyio.open(path, ignore_geometry=True): + # return self.name, self.version + # except Exception: + # return False def read(self, path, time=None, channel=None, **kwargs): """ diff --git a/dascore/io/segy/utils.py b/dascore/io/segy/utils.py index 97f4e75e..89eae603 100644 --- a/dascore/io/segy/utils.py +++ b/dascore/io/segy/utils.py @@ -4,13 +4,69 @@ import datetime +# --- Getting format/version import numpy as np from segyio import TraceField import dascore as dc from dascore.core import get_coord_manager -# --- Getting format/version +# Valid data format codes as specified in the SEGY rev1 manual. +VALID_FORMATS = [1, 2, 3, 4, 5, 8] + +# This is the maximum possible interval between two samples due to the nature +# of the SEG Y format. +MAX_INTERVAL_IN_SECONDS = 0.065535 + +# largest number possible with int16 +MAX_NUMBER_OF_SAMPLES = 32767 + + +def twos_comp(bytes_): + """Get twos complement of bytestring.""" + bits = len(bytes_) * 8 + val = int.from_bytes(bytes_, "big") + if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255 + val = val - (1 << bits) # compute negative value + return val # return positive value as is + + +def _is_segy(fp): + """ + Return True if file pointer contains segy formatted data. + + Based on ObsPy's implementation writen by Lion Krischer. + https://github.com/obspy/obspy/blob/master/obspy/io/segy/core.py + """ + # # Read 400byte header into byte string. + # fp.seek(3200) + # header = fp.read(400) + # data_trace_count = twos_comp(header[12:14]) + # auxiliary_trace_count = twos_comp(header[14:16]) + # sample_interval = twos_comp(header[16:18]) + # samples_per_trace = twos_comp(header[20:22]) + # data_format_code = twos_comp(header[24:26]) + # format_number_major = int.from_bytes(header[300:301]) + # format_number_minor = int.from_bytes(header[301:302]) + # fixed_len_flag = twos_comp(header[302:304]) + # + # + # if _format_number not in (0x0000, 0x0100, 0x0010, 0x0001): + # return False + # + # _fixed_length = unpack(fmt, _fixed_length)[0] + # _extended_number = unpack(fmt, _extended_number)[0] + # # Make some sanity checks and return False if they fail. + # if ( + # _sample_interval <= 0 + # or _samples_per_trace <= 0 + # or _number_of_data_traces < 0 + # or _number_of_auxiliary_traces < 0 + # or _fixed_length < 0 + # or _extended_number < 0 + # ): + # return False + return True def _get_filtered_data_and_coords(segy_fi, coords, time=None, channel=None):