diff --git a/CHANGELOG.md b/CHANGELOG.md index ea77979..ae350dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,7 @@ - Adds version release documentation - Slightly modifies pysep-docs conda environment to accomodate converted nbooks + ## Version 0.5.0 - Improves functions 'read_forcesolution' and 'read_source', which now return `obspy.core.event.Event` objects, rather than the makeshift Source objects @@ -109,6 +110,15 @@ - Removed unnused parameters 'legacy_naming' and 'log_level' from `Pysep.write_config` + ## Version 0.5.1 - Bugfix: RecSec subset streams, which checked that 'st' and 'st_syn' had the same stations, would not run for streams of the same length, leading to edge case where same length streams would plot out of order because they had not been sorted. removed the criteria and now subset streams runs at all times + + +## Version 0.6.0 + +- #136: New read function for ASDFDataSets for misfit window plotting +- #137: More control over RecSec kwargs and better warning messages +- #138: Improved SAC header creation for SPECFEM synthetics +- #139: Improved RecSec preprocessing setup, more manual control for the User diff --git a/docs/conf.py b/docs/conf.py index d4ecd77..da19641 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,7 +15,7 @@ def setup(app): # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = 'PySEP' -copyright = '2023, adjTomo Dev Team' +copyright = '2024, adjTomo Dev Team' author = 'adjTomo Dev Team' release = '' # Grab version number from 'pyproject.toml' diff --git a/pyproject.toml b/pyproject.toml index 6f85bd5..673c2d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pysep-adjtomo" -version = "0.5.1" +version = "0.6.0" description = "Python Seismogram Extraction and Processing" readme = "README.md" requires-python = ">=3.8" diff --git a/pysep/recsec.py b/pysep/recsec.py index fd9e10c..90147e9 100755 --- a/pysep/recsec.py +++ b/pysep/recsec.py @@ -95,6 +95,7 @@ from glob import glob from datetime import datetime from matplotlib.ticker import MultipleLocator +from matplotlib.patches import Rectangle from obspy import read, Stream from obspy.geodetics import (kilometers2degrees, gps2dist_azimuth) @@ -125,21 +126,29 @@ class RecordSection: 2) sorts source-receiver pairs based on User input, 3) produces record section waveform figures. """ - def __init__(self, pysep_path=None, syn_path=None, - stations=None, source=None, st=None, st_syn=None, - sort_by="default", scale_by=None, time_shift_s=None, - zero_pad_s=None, move_out=None, - min_period_s=None, max_period_s=None, - preprocess=None, max_traces_per_rs=None, integrate=0, - xlim_s=None, components="ZRTNE12", y_label_loc="default", - y_axis_spacing=1, amplitude_scale_factor=1, - azimuth_start_deg=0., distance_units="km", tmarks=None, - geometric_spreading_factor=0.5, geometric_spreading_k_val=None, - geometric_spreading_exclude=None, - geometric_spreading_ymax=None, geometric_spreading_save=None, - figsize=(9, 11), show=True, save="./record_section.png", - overwrite=False, log_level="DEBUG", synsyn=False, srcfmt=None, - wildcard="*", syn_wildcard=None, **kwargs): + def __init__( + # Data input parameters + self, pysep_path=None, syn_path=None, stations=None, source=None, + st=None, st_syn=None, windows=None, synsyn=False, srcfmt=None, + wildcard="*", syn_wildcard=None, + # Data organization parameters + sort_by="default", scale_by=None, time_shift_s=None, + zero_pad_s=None, move_out=None, + # Data processing parameters + preprocess=True, min_period_s=None, max_period_s=None, integrate=0, + trim=True, taper=True, + # Plotting aesthetic + xlim_s=None, components="ZRTNE12", y_label_loc="default", + y_axis_spacing=1, amplitude_scale_factor=1, azimuth_start_deg=0., + distance_units="km", tmarks=None, + # Geometric Spreading + geometric_spreading_factor=0.5, geometric_spreading_k_val=None, + geometric_spreading_exclude=None, + geometric_spreading_ymax=None, geometric_spreading_save=None, + # Figure generation control parameters + figsize=(9, 11), show=True, save="./record_section.png", + overwrite=True, max_traces_per_rs=None, log_level="DEBUG", + **kwargs): """ .. note:: Used for reading in Pysep-generated waveforms @@ -197,6 +206,15 @@ def __init__(self, pysep_path=None, syn_path=None, :type st_syn: obspy.core.stream.Stream :param st_syn: Stream objects containing synthetic time series to be plotted on the record section. Must be same length as `st` + :type windows: dict of lists of tuples + :param windows: EXPERIMENTAL FEATURE -- plot misfit windows collected + by windowing algorithm like Pyflex. Essentially these are provided + as start and end times for each trace in the Stream. The dictionary + should be formated where keys are the Trace IDs of observed + waveforms, and values are lists of tuples, where each tuple + represents a window start and end time in seconds. See + `pysep.utils.io.read_asdfdataset` for code on how windows are + structured .. note:: Waveform plotting organization parameters @@ -295,27 +313,33 @@ def __init__(self, pysep_path=None, syn_path=None, 'km': kilometers on the sphere 'deg': degrees on the sphere 'km_utm': kilometers on flat plane, UTM coordinate system + :type components: str + :param components: a sequence of strings representing acceptable + components from the data. Also determines the order these are shown + EVEN when sorted by other variables. For example, components=='ZR' + would only display Z and R components, and Z components would be + should BEFORE R components for the SAME station. .. note:: Data processing parameters :type preprocess: str - :param preprocess: choose which data to preprocess, options are: + :param preprocess: choose whether preprocessing steps listed below are + applied to waveform data, and if so, which data are procesed: - - None: do not run preprocessing step, including filter (Default) - - 'st': process waveforms in `st` - - 'st_syn': process waveforms in `st_syn`. st still must be given - - 'both': process waveforms in both `st` and `st_syn` + - True: process waveforms in both `st` and `st_syn` (Default) + - False: do not run any processing on waveforms + - 'st': only process waveforms in `st` + - 'st_syn': only process waveforms in `st_syn`. st still required :type min_period_s: float - :param min_period_s: minimum filter period in seconds + :param min_period_s: minimum filter period in seconds, if not given + and `max_period_s` also not given, then no filtering is applied :type max_period_s: float - :param max_period_s: maximum filter period in seconds - :type components: str - :param components: a sequence of strings representing acceptable - components from the data. Also determines the order these are shown - EVEN when sorted by other variables. For example, components=='ZR' - would only display Z and R components, and Z components would be - should BEFORE R components for the SAME station. + :param max_period_s: maximum filter period in seconds, if not given + and `min_period_s` also not given, then no filtering is applied + :type trim: bool + :param trim: trim waveforms to the same length, and if any data gaps + are present, fill with mean values by default :type integrate: int :param integrate: apply integration `integrate` times on all traces. acceptable values [-inf, inf], where positive values are integration @@ -479,12 +503,14 @@ def __init__(self, pysep_path=None, syn_path=None, self.time_shift_s = time_shift_s self.zero_pad_s = zero_pad_s - # Filtering parameters + # Processing parameters + self.preprocess = preprocess self.min_period_s = min_period_s self.max_period_s = max_period_s - self.preprocess = preprocess self.max_traces_per_rs = max_traces_per_rs self.integrate = int(integrate) + self.trim = bool(trim) + self.taper = bool(taper) # Plotting parameters self.xlim_s = xlim_s @@ -519,6 +545,9 @@ def __init__(self, pysep_path=None, syn_path=None, self.xlim_syn = [] self.sorted_idx = [] + # Experimental Feature + self.windows = windows + def read_data(self, path, data_type, wildcard="*", source=None, stations=None, srcfmt=None): """ @@ -611,7 +640,7 @@ def read_data(self, path, data_type, wildcard="*", source=None, else: raise NotImplementedError("`data_type` must be 'data' or 'syn'") - return st + return st def check_parameters(self): """ @@ -713,23 +742,30 @@ def check_parameters(self): len(self.amplitude_scale_factor) != len(self.st): err.amplitude_scale_factor = f"must be list length {len(self.st)}" + acceptable_preprocess = [True, False, "st", "st_syn"] + if self.preprocess not in acceptable_preprocess: + err.preprocess = f"must be in {acceptable_preprocess}" + + if self.preprocess == "st_syn": + assert(self.st is not None and self.st_syn is not None), ( + f"`preprocess` choice requires both `st` & `st_syn` to exist." + f"If you only have one or the other, set: `preprocess`=='st'" + ) + if self.min_period_s is not None and self.max_period_s is not None: if self.min_period_s >= self.max_period_s: err.min_period_s = "must be less than `max_period_s`" if self.min_period_s is not None or self.max_period_s is not None: - assert(self.preprocess is not None), \ + assert(self.preprocess is not False), \ f"Setting filter bounds requires `preprocess` flag to be set" - - if self.preprocess is not None: - acceptable_preprocess = ["both", "st", "st_syn"] - if self.preprocess not in acceptable_preprocess: - err.preprocess = f"must be in {acceptable_preprocess}" - if self.preprocess in ["st_syn", "both"]: - assert(self.st is not None and self.st_syn is not None), ( - f"`preprocess` choice requires both `st` & `st_syn` to exist." - f"If you only have one or the other, set: `preprocess`=='st'" - ) + + # Do not allow for 0 period filter because that's wrong and will also + # trip up later logic checks + if self.min_period_s: + assert(self.min_period_s != 0) + if self.max_period_s: + assert(self.max_period_s != 0) # Overwrite the max traces per record section, enforce type int if self.max_traces_per_rs is None: @@ -1465,79 +1501,126 @@ def get_x_axis_tick_values(self): return xtick_minor, xtick_major - def process_st(self): + def process_st(self, **kwargs): """ Preprocess the Stream with optional filtering in place. .. note:: + Data in memory will be irretrievably altered by running preprocess. + .. warning:: + + if `trim` is False but `zero_pad_s` is True we may run into the + issue of filling data gaps with zeros + TODO Add feature to allow list-like periods to individually filter seismograms. At the moment we just apply a blanket filter. + + KWARGS + :type max_percentage: float + :param max_percentage: percentage of the trace to taper at both ends + :type zerophase: bool + :param zerophase: whether to apply zero-phase filtering or not, + defaults to True + :type taper_type: str + :param taper_type: type of taper to apply to the waveforms + :type fill_value: str or float + :param fill_value: value to fill gaps in the data after trimming, + defaults to mean of the trace """ - if self.preprocess is None: - logger.info("no preprocessing (including filtering) applied") + max_percentage = float(self.kwargs.get("max_percentage", 0.05)) + zerophase = bool(self.kwargs.get("zerophase", True)) + taper_type = self.kwargs.get("taper_type", "cosine") + fill_value = self.kwargs.get("fill_value", "mean") + + # Determine which traces we are running through preprocessing + if self.preprocess == False: + logger.info("no preprocessing will be applied to waveforms") return + elif self.preprocess == True: + preprocess_list = [self.st] + if self.st_syn is not None: + preprocess_list.append(self.st_syn) + n = sum([len(_) for _ in preprocess_list]) + logger.info(f"preprocessing {n} waveforms") elif self.preprocess == "st": logger.info(f"preprocessing {len(self.st)} `st` waveforms") preprocess_list = [self.st] elif self.preprocess == "st_syn": logger.info(f"preprocessing {len(self.st_syn)} `st_syn` waveforms") preprocess_list = [self.st_syn] - elif self.preprocess == "both": - logger.info(f"preprocessing {len(self.st) + len(self.st_syn)} " - f"`st` and `st_syn` waveforms") - preprocess_list = [self.st, self.st_syn] for st in preprocess_list: - # Fill any data gaps with mean of the data, do it on a trace by - # trace basis to get individual mean values - for tr in st: - tr.trim(starttime=tr.stats.starttime, endtime=tr.stats.endtime, - pad=True, fill_value=tr.data.mean()) - st.detrend("demean") - st.taper(max_percentage=0.05, type="cosine") + if self.trim: + logger.debug("trimming start and end times and filling any " + f"gaps with {fill_value}") + for tr in st: + if fill_value == "mean": + fill_value = tr.data.mean() + tr.trim(starttime=tr.stats.starttime, + endtime=tr.stats.endtime, pad=True, + fill_value=fill_value) + + # Taper prior to zero pad so that the taper actually hits signal + if self.taper: + # If we don't demean, then tapering may hit a static offset + logger.debug("demean waveform in preparation for tapering") + st.detrend("demean") + + logger.debug(f"tapering waveforms with {max_percentage} taper") + st.taper(max_percentage=max_percentage, type=taper_type) # Zero pad start and end of data if requested by user if self.zero_pad_s: + if not self.taper: + # If we don't demean, zero pad may introduce static offset + logger.debug("demean waveform in preparation for zero pad") + st.detrend("demean") + _start, _end = self.zero_pad_s - logger.info(f"padding zeros to traces with {_start}s before " + logger.debug(f"padding zeros to traces with {_start}s before " f"and {_end}s after") - for idx, tr in enumerate(st): + for tr in st: tr.trim(starttime=tr.stats.starttime - _start, endtime=tr.stats.endtime + _end, pad=True, fill_value=0) - # Allow multiple filter options based on user input - # Max period only == high-pass - if self.max_period_s is not None and self.min_period_s is None: - logger.info(f"apply highpass filter w/ cutoff " - f"{1/self.max_period_s}") - st.filter("highpass", freq=1/self.max_period_s, zerophase=True) - # Min period only == low-pass - elif self.min_period_s is not None and self.max_period_s is None: - logger.info(f"apply lowpass filter w/ cutoff " - f"{1/self.min_period_s}") - st.filter("lowpass", freq=1/self.min_period_s, zerophase=True) - # Both min and max period == band-pass - elif self.min_period_s is not None and \ - self.max_period_s is not None: - logger.info(f"applying bandpass filter w/ " - f"[{1/self.max_period_s}, {self.min_period_s}]") - st.filter("bandpass", freqmin=1/self.max_period_s, - freqmax=1/self.min_period_s, zerophase=True) - else: - logger.info("no filtering applied") + # Apply filtering + if self.min_period_s or self.max_period_s: + # Max period only == high-pass filter + if self.max_period_s and self.min_period_s is None: + logger.debug(f"apply highpass filter w/ cutoff " + f"{1/self.max_period_s}") + st.filter("highpass", freq=1/self.max_period_s, + zerophase=zerophase) + + # Min period only == low-pass filter + elif self.min_period_s and self.max_period_s is None: + logger.debug(f"apply lowpass filter w/ cutoff " + f"{1/self.min_period_s}") + st.filter("lowpass", freq=1/self.min_period_s, + zerophase=zerophase) + + # Both min and max period == band-pass filter + elif self.min_period_s and self.max_period_s: + logger.debug( + f"applying bandpass filter w/ " + f"[{1/self.max_period_s}, {self.min_period_s}]" + ) + st.filter("bandpass", freqmin=1/self.max_period_s, + freqmax=1/self.min_period_s, zerophase=zerophase) - # Integrate and differentiate N number of times specified by user - st.detrend("simple") + # Integrate or differentiate N number of times specified by user if self.integrate != 0: + st.detrend("simple") if self.integrate < 0: func = "differentiate" elif self.integrate > 0: func = "integrate" - for i in range(np.abs(self.integrate)): - logger.info(f"{func} all waveform data x{abs(self.integrate)}") + for _ in range(np.abs(self.integrate)): + logger.info(f"{func} all waveform data " + f"x{abs(self.integrate)}") getattr(st, func)() def plot(self, subset=None, page_num=None, **kwargs): @@ -1683,6 +1766,10 @@ def _plot_trace(self, idx, y_index, choice="st"): to plot the observed or synthetic waveforms """ linewidth = self.kwargs.get("linewidth", .25) + window_alpha = self.kwargs.get("window_alpha", .1) + window_color = self.kwargs.get("window_color", "orange") + obs_color = self.kwargs.get("obs_color", "k") + syn_color = self.kwargs.get("syn_color", "r") # Used to differentiate the two types of streams for plotting diffs choices = ["st", "st_syn"] @@ -1715,7 +1802,7 @@ def _plot_trace(self, idx, y_index, choice="st"): else: sign = 1 - # Ampplitude scaling may change between observed and synthetic if e.g., + # Amplitude scaling may change between observed and synthetic if e.g., # we are doing trace-wise normalization if choice == "st": amplitude_scaling = self.amplitude_scaling @@ -1724,8 +1811,24 @@ def _plot_trace(self, idx, y_index, choice="st"): amplitude_scaling = self.amplitude_scaling_syn max_amplitudes = self.max_amplitudes_syn - y = sign * tr.data / amplitude_scaling[idx] + self.y_axis[y_index] - + # Avoid ZeroDivisionError if the amplitude scaling value is 0 (#131) + scale = amplitude_scaling[idx] + if scale == 0: + logger.warning("amplitude scaling value is 0, setting to 1, " + "amplitude scaling may not behave as expected") + scale = 1 + + y = sign * tr.data / scale + self.y_axis[y_index] + + # Experimental: Plot windows over the record section if provided by User + if self.windows and tr.id in self.windows: + for window in self.windows[tr.id]: + rc = Rectangle(xy=(window[0] + tshift, y.min()), + width=window[1] - window[0], + height=y.max() - y.min(), alpha=window_alpha, + color=window_color, zorder=2) + self.ax.add_patch(rc) + # Truncate waveforms to get figure scaling correct. Make the distinction # between data and synthetics which may have different start and end # times natively @@ -1736,8 +1839,9 @@ def _plot_trace(self, idx, y_index, choice="st"): x = x[start:stop] y = y[start:stop] - self.ax.plot(x, y, c=["k", "r"][c], linewidth=linewidth, zorder=10) - + self.ax.plot(x, y, c=[obs_color, syn_color][c], linewidth=linewidth, + zorder=10) + # Sanity check print station information to check against plot log_str = (f"{idx}" f"\t{int(self.y_axis[y_index])}" @@ -2334,7 +2438,7 @@ def parse_args(): parser.add_argument("--save", default="./record_section.png", type=str, nargs="?", help="Path to save the resulting record section fig") - parser.add_argument("-o", "--overwrite", default=False, action="store_true", + parser.add_argument("-o", "--overwrite", default=True, action="store_true", help="overwrite existing figure if path exists") parser.add_argument("--synsyn", default=False, action="store_true", help="Let RecSec know that both `pysep_path` and " diff --git a/pysep/tests/test_recsec.py b/pysep/tests/test_recsec.py index 252cf10..ef289eb 100644 --- a/pysep/tests/test_recsec.py +++ b/pysep/tests/test_recsec.py @@ -137,3 +137,24 @@ def test_recsec_calc_time_offset(recsec_w_synthetics): recsec_w_synthetics.get_parameters() for tr in recsec_w_synthetics.st: assert(tr.stats.time_offset == -100) + +def test_recsec_zero_amplitude(recsec): + """ + waveforms that have zero amplitude and are normalized should be able + to bypass normalizations which lead to weird plotting (see #131). + + .. note:: + + This does not really test that the method is working correctly because + dividing a NumPy array by zero leads to NaNs in the array which just + won't plot. This is more of a visual test to make sure that the + zero amplitude is plotting correctly, look for green lines + """ + recsec.kwargs.scale_by = "normalize" + recsec.kwargs.obs_color = "green" + recsec.linewidth = 30 + for tr in recsec.st: + tr.data *= 0 + recsec.process_st() + recsec.get_parameters() + recsec.plot() diff --git a/pysep/tests/test_utils.py b/pysep/tests/test_utils.py index d60c37a..0b4221b 100644 --- a/pysep/tests/test_utils.py +++ b/pysep/tests/test_utils.py @@ -59,6 +59,16 @@ def test_append_sac_headers(test_st, test_inv, test_event): assert(st[0].stats.sac["evla"] == test_event.preferred_origin().latitude) +def test_append_sac_headers_cartesian(test_st, test_inv, test_event): + """ + Make sure we can write SAC headers correctly + """ + st = append_sac_headers(st=test_st, inv=test_inv, event=test_event) + assert(not hasattr(test_st[0].stats, "sac")) + assert(hasattr(st[0].stats, "sac")) + assert(st[0].stats.sac["evla"] == test_event.preferred_origin().latitude) + + def test_event_tag_and_event_tag_legacy(test_event): """ Check that event tagging works as expected @@ -166,6 +176,14 @@ def test_read_sem(): st += read_sem(fid=test_synthetic, source=test_cmtsolution, stations=test_stations) assert(st) + + expected_headers = ["iztype", "b", "e", "evla", "evlo", "stla", "stlo", + "stel", "kevnm", "nzyear", "nzjday", "nzhour", "nzmin", + "nzsec", "nzmsec", "dist", "az", "baz", "gcarc", + "lpspol", "lcalda", "evdp", "mag"] + for expected_header in expected_headers: + assert(expected_header in st[0].stats.sac) + assert(st[0].stats.sac.evla == -40.5405) @@ -182,7 +200,17 @@ def test_read_sem_cartesian(): st += read_sem(fid=test_synthetic, source=test_cmtsolution, stations=test_stations) assert(st) + + expected_headers = ["iztype", "b", "e", "evla", "evlo", "stla", "stlo", + "kevnm", "nzyear", "nzjday", "nzhour", "nzmin", + "nzsec", "nzmsec", "dist", "az", "baz", "gcarc", + "lpspol", "lcalda", "evdp"] + for expected_header in expected_headers: + assert(expected_header in st[0].stats.sac) + assert(st[0].stats.sac.stla == 67000.0) + assert(st[0].stats.sac.evdp == 30.) + assert(st[0].stats.sac.b == -10.) def test_estimate_prefilter_corners(test_st): """ diff --git a/pysep/utils/cap_sac.py b/pysep/utils/cap_sac.py index 9be09e2..e8994b2 100644 --- a/pysep/utils/cap_sac.py +++ b/pysep/utils/cap_sac.py @@ -155,7 +155,7 @@ def _append_sac_headers_trace(tr, event, inv): We explicitely set 'iztype, 'b' and 'e' in the SAC header to tell ObsPy that the trace start is NOT the origin time. Otherwise all the relative - timing (e.g., picks) will be wrong. + timing (e.g., picks) in SAC will be wrong. :type tr: obspy.core.trace.Trace :param tr: Trace to append SAC header to @@ -206,6 +206,7 @@ def _append_sac_headers_trace(tr, event, inv): "lpspol": 0, # 1 if left-hand polarity (usually no in passive seis) "lcalda": 1, # 1 if DIST, AZ, BAZ, GCARC to be calc'd from metadata } + # Some Inventory objects will not go all the way to channel, only to station try: cha = sta[0] @@ -227,6 +228,13 @@ def _append_sac_headers_trace(tr, event, inv): except Exception: # NOQA pass + # Warn User that the following SAC headers could not be found + _warn_about = [] + for key in ["cmpinc", "cmpaz", "evdp", "mag"]: + if key not in sac_header: + _warn_about.append(key) + logger.warning(f"no SAC header values found for: {_warn_about}") + # Append SAC header and include back azimuth for rotation tr.stats.sac = sac_header tr.stats.back_azimuth = baz @@ -297,24 +305,29 @@ def _append_sac_headers_cartesian_trace(tr, event, rcv_x, rcv_y): :rtype: obspy.core.trace.Trace :return: Trace with appended SAC header """ - net_sta = f"{tr.stats.network}.{tr.stats.station}" - src_y = event.preferred_origin().latitude src_x = event.preferred_origin().longitude + otime = event.preferred_origin().time + evdepth_km = event.preferred_origin().depth / 1E3 # units: m -> km # Calculate Cartesian distance and azimuth/backazimuth dist_m = np.sqrt(((rcv_x - src_x) ** 2) + ((rcv_y - src_y) ** 2)) + dist_km = dist_m * 1E-3 # units: m -> km + dist_deg = kilometer2degrees(dist_km) # spherical earth approximation azimuth = np.degrees(np.arctan2((rcv_x - src_x), (rcv_y - src_y))) % 360 backazimuth = (azimuth - 180) % 360 - otime = event.preferred_origin().time - - # Barebones SAC header, we only append values required by RecSec + sac_header = { + "iztype": 9, # Ref time equivalence, IB (9): Begin time + "b": tr.stats.starttime - otime, # begin time + "e": tr.stats.npts * tr.stats.delta, # end time "stla": rcv_y, "stlo": rcv_x, "evla": src_y, "evlo": src_x, - "dist": dist_m * 1E-3, # units: km + "evdp": evdepth_km, + "dist": dist_km, + "gcarc": dist_deg, # degrees "az": azimuth, "baz": backazimuth, "kevnm": format_event_tag_legacy(event), # only take date code @@ -324,7 +337,9 @@ def _append_sac_headers_cartesian_trace(tr, event, rcv_x, rcv_y): "nzmin": otime.minute, "nzsec": otime.second, "nzmsec": otime.microsecond, - } + "lpspol": 0, # 1 if left-hand polarity (usually no in passive seis) + "lcalda": 1, # 1 if DIST, AZ, BAZ, GCARC to be calc'd from metadata + } tr.stats.sac = sac_header diff --git a/pysep/utils/io.py b/pysep/utils/io.py index dc269d5..4bdfeb6 100644 --- a/pysep/utils/io.py +++ b/pysep/utils/io.py @@ -14,7 +14,7 @@ from pysep import logger from pysep.utils.mt import moment_magnitude, seismic_moment -from pysep.utils.fmt import format_event_tag_legacy, channel_code +from pysep.utils.fmt import channel_code from pysep.utils.cap_sac import append_sac_headers, append_sac_headers_cartesian @@ -498,6 +498,67 @@ def read_event_file(fid): return list_out +def read_asdfdataset(path, evaluation): + """ + Read an ASDFDataSet created by a SeisFlows Pyaflowa inversion run. + The dataset should contain observed and synthetic waveforms, and + optionally contains misfit windows. Will return Streams with SAC headers + + .. note:: + + This function makes assumptions about the PyASDF data structure which + is defined by the external package Pyatoa. If Pyatoa changes that + structure, this function will break. + + :type path: str + :param path: Path to the ASDF dataset to be read + :type evaluation: str + :param evaluation: evaluation to take synthetics from. These are saved + following a format specified by Pyatoa, but usually follow the form + iteration/step_count, e.g., i01s00 gives iteration 1, step count 0. + Take a look at the waveform tags in `ASDFDataSet.waveforms[]` + for tags following the 'synthetic_' prefix + """ + # PySEP, by default will not require PyASDF to be installed + try: + from pyasdf import ASDFDataSet # NOQA + except ImportError: + logger.critical("pyasdf is not installed. Please install pyasdf " + "to read ASDF datasets") + return None, None + + with ASDFDataSet(path) as ds: + event = ds.events[0] + st_out = Stream() + st_syn_out = Stream() + for station in ds.waveforms.list(): + inv = ds.waveforms[station].StationXML + st = ds.waveforms[station].observed + st_syn = ds.waveforms[station][f"synthetic_{evaluation}"] + + st_out += append_sac_headers(st, event, inv) + st_syn_out += append_sac_headers(st_syn, event, inv) + + # Read windows from the dataset + windows = {} + if hasattr(ds.auxiliary_data, "MisfitWindows"): + iter_ = evaluation[:3] # 'i01s00' -> 'i01' + step = evaluation[3:] + for station in ds.auxiliary_data.MisfitWindows[iter_][step].list(): + parameters = ds.auxiliary_data.MisfitWindows[iter_][step][ + station].parameters + trace_id = parameters["channel_id"] + starttime = parameters["relative_starttime"] + endtime = parameters["relative_endtime"] + # initialize empty window array + if trace_id not in windows: + windows[trace_id] = [] + + windows[trace_id].append((starttime, endtime)) + + return st_out, st_syn_out, windows + + def write_sem(st, unit, path="./", time_offset=0): """ Write an ObsPy Stream in the two-column ASCII format that Specfem uses