Skip to content

Commit

Permalink
make segyio optional
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers committed Dec 7, 2024
1 parent 3001b76 commit 8dbbe29
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 7 deletions.
7 changes: 7 additions & 0 deletions dascore/io/segy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
-----
- Distance information is not found in most SEGY DAS files so returned
dimensions are "channel" and "time" rather than "distance" and "time".
- Segy standards found at: https://library.seg.org/pb-assets/technical-standards
segy v1 spec: seg_y_rev1-1686080991247.pdf
segy v2 spec: seg_y_rev2_0-mar2017-1686080998003.pdf
segy v2.1 spec: seg_y_rev2_1-oct2023-1701361639333.pdf
Examples
--------
Expand Down
14 changes: 8 additions & 6 deletions dascore/io/segy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import dascore as dc
from dascore.io.core import FiberIO

from .utils import _get_attrs, _get_coords, _get_filtered_data_and_coords
from .utils import _get_attrs, _get_coords, _get_filtered_data_and_coords, _is_segy


class SegyV2(FiberIO):
Expand All @@ -21,11 +21,13 @@ class SegyV2(FiberIO):

def get_format(self, path, **kwargs) -> tuple[str, str] | bool:
"""Make sure input is segy."""
try:
with segyio.open(path, ignore_geometry=True):
return self.name, self.version
except Exception:
return False
with open(path, "rb") as fp:
return _is_segy(fp)
# try:
# with segyio.open(path, ignore_geometry=True):
# return self.name, self.version
# except Exception:
# return False

def read(self, path, time=None, channel=None, **kwargs):
"""
Expand Down
58 changes: 57 additions & 1 deletion dascore/io/segy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,69 @@

import datetime

# --- Getting format/version
import numpy as np
from segyio import TraceField

import dascore as dc
from dascore.core import get_coord_manager

# --- Getting format/version
# Valid data format codes as specified in the SEGY rev1 manual.
VALID_FORMATS = [1, 2, 3, 4, 5, 8]

# This is the maximum possible interval between two samples due to the nature
# of the SEG Y format.
MAX_INTERVAL_IN_SECONDS = 0.065535

# largest number possible with int16
MAX_NUMBER_OF_SAMPLES = 32767


def twos_comp(bytes_):
"""Get twos complement of bytestring."""
bits = len(bytes_) * 8
val = int.from_bytes(bytes_, "big")
if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
val = val - (1 << bits) # compute negative value
return val # return positive value as is


def _is_segy(fp):
"""
Return True if file pointer contains segy formatted data.
Based on ObsPy's implementation writen by Lion Krischer.
https://github.com/obspy/obspy/blob/master/obspy/io/segy/core.py
"""
# # Read 400byte header into byte string.
# fp.seek(3200)
# header = fp.read(400)
# data_trace_count = twos_comp(header[12:14])
# auxiliary_trace_count = twos_comp(header[14:16])
# sample_interval = twos_comp(header[16:18])
# samples_per_trace = twos_comp(header[20:22])
# data_format_code = twos_comp(header[24:26])
# format_number_major = int.from_bytes(header[300:301])
# format_number_minor = int.from_bytes(header[301:302])
# fixed_len_flag = twos_comp(header[302:304])
#
#
# if _format_number not in (0x0000, 0x0100, 0x0010, 0x0001):
# return False
#
# _fixed_length = unpack(fmt, _fixed_length)[0]
# _extended_number = unpack(fmt, _extended_number)[0]
# # Make some sanity checks and return False if they fail.
# if (
# _sample_interval <= 0
# or _samples_per_trace <= 0
# or _number_of_data_traces < 0
# or _number_of_auxiliary_traces < 0
# or _fixed_length < 0
# or _extended_number < 0
# ):
# return False
return True


def _get_filtered_data_and_coords(segy_fi, coords, time=None, channel=None):
Expand Down

0 comments on commit 8dbbe29

Please sign in to comment.