From 3d635c1333a106ec5b48a65a7f463e8424cbf190 Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Tue, 8 Oct 2024 07:22:39 -0700 Subject: [PATCH 1/4] try to fix doc-build env (#446) --- .../workflows/build_deploy_master_docs.yaml | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_deploy_master_docs.yaml b/.github/workflows/build_deploy_master_docs.yaml index a9b91f28..adf86cca 100644 --- a/.github/workflows/build_deploy_master_docs.yaml +++ b/.github/workflows/build_deploy_master_docs.yaml @@ -40,16 +40,23 @@ jobs: run: | git fetch --tags --force # Retrieve annotated tags. - - name: Setup conda - uses: conda-incubator/setup-miniconda@v3 + - uses: mamba-org/setup-micromamba@v1 with: - mamba-version: "*" - channels: conda-forge,defaults - channel-priority: true - python-version: "3.10" - activate-environment: dascore + micromamba-version: '1.5.8-0' # versions: https://github.com/mamba-org/micromamba-releases environment-file: environment.yml - condarc-file: .github/test_condarc.yml + init-shell: >- + bash + powershell + cache-environment: true + post-cleanup: 'all' + + # Not sure why this is needed but it appears to be the case + - name: fix env + shell: bash -l {0} + run: | + micromamba shell init --shell bash --root-prefix=~/micromamba + eval "$(micromamba shell hook --shell bash)" + micromamba activate dascore - name: install dascore with docbuild reqs shell: bash -l {0} From e6ddf306750846a8775ff2642fc969280cecf341 Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Fri, 18 Oct 2024 20:25:23 -0700 Subject: [PATCH 2/4] add uv for linting (#450) --- .github/workflows/lint.yml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 36c06f35..01807ae1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,8 +12,19 @@ jobs: steps: - uses: actions/checkout@v4 + - name: "get tags" + run: | + git fetch --tags --force # Retrieve annotated tags. + + - name: Install uv + uses: astral-sh/setup-uv@v3 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: install linting packages - run: pip install pre-commit --break-system-packages + run: uv tool install pre-commit - name: run all precommits - run: pre-commit run --files dascore/**/* + run: uv run pre-commit run --all From 7649ac4965b36b7a4ef73d5bb1943940f2945a50 Mon Sep 17 00:00:00 2001 From: Ahmad Tourei <92008628+ahmadtourei@users.noreply.github.com> Date: Mon, 28 Oct 2024 18:03:02 -0500 Subject: [PATCH 3/4] notch for Patch (#448) * notch for Patch * fix examples * fix examples with units * address review, add unit capability * uncomment examples * fix examples and tests * fix distance examples --------- Co-authored-by: Derrick Chambers --- dascore/core/patch.py | 1 + dascore/proc/__init__.py | 2 +- dascore/proc/filter.py | 109 +++++++++++++++++++++++++-------- dascore/units.py | 45 +++++++------- tests/test_proc/test_filter.py | 52 +++++++++++++++- 5 files changed, 159 insertions(+), 50 deletions(-) diff --git a/dascore/core/patch.py b/dascore/core/patch.py index 53969cdf..969f592c 100644 --- a/dascore/core/patch.py +++ b/dascore/core/patch.py @@ -291,6 +291,7 @@ def iselect(self, *args, **kwargs): pass_filter = dascore.proc.pass_filter sobel_filter = dascore.proc.sobel_filter median_filter = dascore.proc.median_filter + notch_filter = dascore.proc.notch_filter savgol_filter = dascore.proc.savgol_filter gaussian_filter = dascore.proc.gaussian_filter slope_filter = dascore.proc.slope_filter diff --git a/dascore/proc/__init__.py b/dascore/proc/__init__.py index 471eceb4..e3f31ae0 100644 --- a/dascore/proc/__init__.py +++ b/dascore/proc/__init__.py @@ -8,7 +8,7 @@ from .coords import * # noqa from .correlate import correlate, correlate_shift from .detrend import detrend -from .filter import median_filter, pass_filter, sobel_filter, savgol_filter, gaussian_filter, slope_filter +from .filter import median_filter, pass_filter, sobel_filter, savgol_filter, gaussian_filter, slope_filter, notch_filter from .resample import decimate, interpolate, resample from .rolling import rolling from .taper import taper diff --git a/dascore/proc/filter.py b/dascore/proc/filter.py index 78c0d994..e0f61d79 100644 --- a/dascore/proc/filter.py +++ b/dascore/proc/filter.py @@ -15,13 +15,18 @@ from scipy import ndimage from scipy.ndimage import gaussian_filter as np_gauss from scipy.ndimage import median_filter as nd_median_filter -from scipy.signal import iirfilter, sosfilt, sosfiltfilt, zpk2sos +from scipy.signal import filtfilt, iirfilter, iirnotch, sosfilt, sosfiltfilt, zpk2sos from scipy.signal import savgol_filter as np_savgol_filter import dascore as dc from dascore.constants import PatchType, samples_arg_description from dascore.exceptions import FilterValueError, ParameterError, UnitError -from dascore.units import convert_units, get_filter_units, invert_quantity +from dascore.units import ( + convert_units, + get_filter_units, + get_inverted_quant, + invert_quantity, +) from dascore.utils.docs import compose_docstring from dascore.utils.misc import ( broadcast_for_index, @@ -33,6 +38,7 @@ get_multiple_dim_value_from_kwargs, patch_function, ) +from dascore.utils.time import to_float def _check_sobel_args(dim, mode, cval): @@ -94,7 +100,7 @@ def pass_filter(patch: PatchType, corners=4, zerophase=True, **kwargs) -> PatchT Parameters ---------- corners - The number of corners for the filter. + The number of corners for the filter. Default is 4. zerophase If True, apply the filter twice. **kwargs @@ -172,28 +178,6 @@ def sobel_filter(patch: PatchType, dim: str, mode="reflect", cval=0.0) -> PatchT return dc.Patch(data=out, coords=patch.coords, attrs=patch.attrs, dims=patch.dims) -# -# @patch_function() -# def stop_filter(patch: PatchType, corners=4, zerophase=True, **kwargs) -> PatchType: -# """ -# Apply a Butterworth band stop filter or (highpass, or lowpass). -# -# Parameters -# ---------- -# corners -# The number of corners for the filter. -# zerophase -# If True, apply the filter twice. -# **kwargs -# Used to specify the dimension and frequency, wavenumber, or equivalent -# limits. -# -# -# """ -# # get nyquist and low/high in terms of nyquist -# if zerophase: - - def _create_size_and_axes(patch, kwargs, samples): """ Return a tuple of (size) and (axes). @@ -265,6 +249,76 @@ def median_filter( return patch.update(data=new_data) +@patch_function() +def notch_filter(patch: PatchType, q, **kwargs) -> PatchType: + """ + Apply a second-order IIR notch digital filter on patch's data. + + A notch filter is a band-stop filter with a narrow bandwidth (high quality factor). + It rejects a narrow frequency band and leaves the rest of the spectrum + little changed. + + Parameters + ---------- + patch + The patch to filter + q + Quality factor (float). A higher Q value means a narrower notch, + which targets the specific frequency more precisely. + A lower Q results in a wider notch, meaning a broader range of + frequencies around the specific frequency will be attenuated. + **kwargs + Used to specify the dimension(s) and associated frequency and/or wavelength + (or equivalent values) for the filter. + + Notes + ----- + See [scipy.signal.iirnotch] + (https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.iirnotch.html) + for more information. + + Examples + -------- + >>> import dascore + >>> pa = dascore.get_example_patch() + + >>> # Apply a notch filter along time axis to remove 60 Hz + >>> filtered = pa.notch_filter(time=60, q=30) + + >>> # Apply a notch filter along distance axis to remove 5 m wavelength + >>> filtered = pa.notch_filter(distance=0.2, q=10) + + >>> # Apply a notch filter along both time and distance axes + >>> filtered = pa.notch_filter(time=60, distance=0.2, q=40) + + >>> # Optionally, units can be specified for a more expressive API. + >>> from dascore.units import m, ft, s, Hz + >>> # Apply a notch filter along time axis to remove 60 Hz + >>> filtered = pa.notch_filter(time=60 * Hz, q=30) + >>> # Apply a notch filter along distance axis to remove 5 m wavelength + >>> filtered = pa.notch_filter(distance=5 * m, q=30) + """ + data = patch.data + for coord_name, info in get_multiple_dim_value_from_kwargs(patch, kwargs).items(): + dim, ax, value = info["dim"], info["axis"], info["value"] + coord = patch.get_coord(coord_name) + + # Invert units if needed + if isinstance(value, dc.units.Quantity) and coord.units is not None: + value, _ = get_inverted_quant(value, coord.units) + + # Check valid parameters + w0 = to_float(value) + sr = get_dim_sampling_rate(patch, dim) + nyquist = 0.5 * sr + if w0 > nyquist: + msg = f"possible filter values are in [0, {nyquist}] you passed {w0}" + raise FilterValueError(msg) + b, a = iirnotch(w0, Q=q, fs=sr) + data = filtfilt(b, a, data, axis=ax) + return dc.Patch(data=data, coords=patch.coords, attrs=patch.attrs, dims=patch.dims) + + @patch_function() @compose_docstring(sample_explination=samples_arg_description) def savgol_filter( @@ -308,7 +362,7 @@ def savgol_filter( >>> >>> # Apply Savgol filter over distance dimension using a 5 sample >>> # distance window. - >>> filtered_pa_2 = pa.median_filter(distance=5, samples=True,polyorder=2) + >>> filtered_pa_2 = pa.savgol_filter(distance=5, samples=True, polyorder=2) >>> >>> # Combine distance and time filter >>> filtered_pa_3 = pa.savgol_filter(distance=10, time=0.1, polyorder=4) @@ -430,8 +484,11 @@ def slope_filter( >>> # Example 1: Compare slope filtered patch to Non-filtered. >>> import matplotlib.pyplot as plt >>> import numpy as np + >>> >>> import dascore as dc >>> from dascore.units import Hz + >>> + >>> >>> # Apply taper function and bandpass filter along time axis from 1 to 500 Hz >>> patch = ( ... dc.get_example_patch('example_event_1') diff --git a/dascore/units.py b/dascore/units.py index c517a419..4b64c4fe 100644 --- a/dascore/units.py +++ b/dascore/units.py @@ -208,6 +208,29 @@ def get_quantity_str(quant_value: str | Quantity | None) -> str | None: return str(quant_value) +def get_inverted_quant(quant, data_units): + """Convert to inverted units.""" + if quant is None: + return quant, True + if quant.units == get_unit("dimensionless"): + msg = ( + "Both inputs must be quantities to get filter parameters. " + f"You passed ({quant}, {data_units})" + ) + raise UnitError(msg) + data_units = get_unit(data_units) + inverted_units = (1 / data_units).units + units_inversed = True + if data_units.dimensionality == quant.units.dimensionality: + quant, units_inversed = 1 / quant, False + # try to get invert units, otherwise raise. + try: + mag = quant.to(inverted_units).magnitude + except DimensionalityError as e: + raise UnitError(str(e)) + return mag, units_inversed + + def get_filter_units( arg1: Quantity | float, arg2: Quantity | float, @@ -250,28 +273,6 @@ def _ensure_same_units(quant1, quant2): msg = f"Units must match, {quant1} and {quant2} were provided." raise UnitError(msg) - def get_inverted_quant(quant, data_units): - """Convert to inverted units.""" - if quant is None: - return quant, True - if quant.units == get_unit("dimensionless"): - msg = ( - "Both inputs must be quantities to get filter parameters. " - f"You passed ({arg1}, {arg2})" - ) - raise UnitError(msg) - data_units = get_unit(data_units) - inverted_units = (1 / data_units).units - units_inversed = True - if data_units.dimensionality == quant.units.dimensionality: - quant, units_inversed = 1 / quant, False - # try to get invert units, otherwise raise. - try: - mag = quant.to(inverted_units).magnitude - except DimensionalityError as e: - raise UnitError(str(e)) - return mag, units_inversed - def _check_to_units(to_unit, dim): """Ensure to units are valid.""" if to_unit is None: diff --git a/tests/test_proc/test_filter.py b/tests/test_proc/test_filter.py index 3fb6f1e1..79986616 100644 --- a/tests/test_proc/test_filter.py +++ b/tests/test_proc/test_filter.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re import sys import numpy as np @@ -16,7 +17,7 @@ PatchCoordinateError, UnitError, ) -from dascore.units import convert_units, get_unit +from dascore.units import Hz, convert_units, get_unit, m from dascore.utils.misc import broadcast_for_index @@ -226,6 +227,55 @@ def test_median_filter_ones(self, random_patch): assert out == random_patch +class TestNotchFilter: + """Tests for the notch filter.""" + + def test_notch_no_kwargs_raises(self, random_patch): + """Test that no dimension raises an appropriate error.""" + msg = "You must specify one or more" + with pytest.raises(PatchCoordinateError, match=msg): + random_patch.notch_filter(q=30) + + def test_notch_filter_time(self, random_patch): + """Test the notch filter along the time dimension.""" + filtered_patch = random_patch.notch_filter(time=60, q=30) + assert isinstance(filtered_patch, dc.Patch) + assert not np.any(np.isnan(filtered_patch.data)) + + def test_notch_filter_distance(self, random_patch): + """Test the notch filter along the distance dimension.""" + filtered_patch = random_patch.notch_filter(distance=0.2, q=20) + assert isinstance(filtered_patch, dc.Patch) + assert not np.any(np.isnan(filtered_patch.data)) + + def test_notch_filter_time_distance(self, random_patch): + """Test the notch filter along the time and distance dimension.""" + filtered_patch = random_patch.notch_filter(distance=0.25, time=12, q=40) + assert isinstance(filtered_patch, dc.Patch) + assert not np.any(np.isnan(filtered_patch.data)) + + def test_notch_filter_high_frequency_error(self, random_patch): + """Test notch filter raises error for frequency beyond Nyquist.""" + sr = dc.utils.patch.get_dim_sampling_rate(random_patch, "time") + nyquist = 0.5 * sr + too_high_freq = nyquist + 1 + msg = f"possible filter values are in [0, {nyquist}] you passed {too_high_freq}" + with pytest.raises(FilterValueError, match=re.escape(msg)): + random_patch.notch_filter(time=too_high_freq, q=30) + + def test_notch_filter_time_units(self, random_patch): + """Test notch filter with time dimension and frequency in Hz.""" + filtered_patch = random_patch.notch_filter(time=60 * Hz, q=40) + assert isinstance(filtered_patch, dc.Patch) + assert not np.any(np.isnan(filtered_patch.data)) + + def test_notch_filter_distance_units(self, random_patch): + """Test notch filter with distance dimension in meters.""" + filtered_patch = random_patch.notch_filter(distance=0.4 * 1 / m, q=25) + assert isinstance(filtered_patch, dc.Patch) + assert not np.any(np.isnan(filtered_patch.data)) + + class TestSavgolFilter: """Simple tests on Savgol filter.""" From b2551d2654f9789acd3e7ca1366b8b973dd10b9a Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Tue, 29 Oct 2024 12:34:36 -0700 Subject: [PATCH 4/4] fix febus overlap issue (#449) * fix febus overlap issue * fix start/end date * use uv for pre-commit * add absolute time test --------- Co-authored-by: Derrick Chambers --- dascore/io/febus/utils.py | 55 +++++++++++++++++--------- tests/test_io/test_febus/test_febus.py | 43 ++++++++++++++++++++ 2 files changed, 80 insertions(+), 18 deletions(-) create mode 100644 tests/test_io/test_febus/test_febus.py diff --git a/dascore/io/febus/utils.py b/dascore/io/febus/utils.py index 40b00414..4d608800 100644 --- a/dascore/io/febus/utils.py +++ b/dascore/io/febus/utils.py @@ -97,37 +97,57 @@ def _get_febus_attrs(feb: _FebusSlice) -> dict: return out +def _get_time_overlap_samples(feb, data_shape): + """Determine the number of redundant samples in the time dimension.""" + time_step = feb.zone.attrs["Spacing"][1] / 1_000 # value in ms, convert to s. + block_time = _maybe_unpack(1 / (feb.zone.attrs["BlockRate"] / 1_000)) + # Since the data have overlaps in each block's time dimension, we need to + # trim the overlap off the time dimension to avoid having to merge blocks later. + # However, sometimes the "BlockOverlap" is wrong, so we calculate it + # manually here. + expected_samples = int(np.round(block_time / time_step)) + excess_rows = data_shape[1] - expected_samples + assert ( + excess_rows % 2 == 0 + ), "excess rows must be symmetric to distribute on both ends" + return excess_rows + + def _get_time_coord(feb): """Get the time coordinate contained in the febus slice.""" time = feb.source["time"] - # In older version time shape is different, always grap first eleemnt. + # In older version time shape is different, always grab first element. first_slice = tuple(0 for _ in time.shape) t_0 = time[first_slice] # Data dimensions are block_index, time, distance data_shape = feb.zone[feb.data_name].shape n_blocks = data_shape[0] - # Since the data have overlaps in each block's time dimension, we need to - # trim the overlap off the time dimension to avoid having to merge blocks later. - overlap_percentage = _maybe_unpack(feb.zone.attrs.get("BlockOverlap", 0)) - rows_to_remove = int(np.round(data_shape[1] * overlap_percentage / 100)) - total_time_rows = (data_shape[1] - 2 * rows_to_remove) * n_blocks - # Get spacing between time samples (in s) + # Get spacing between time samples (in s) and the total time of each block. time_step = feb.zone.attrs["Spacing"][1] / 1_000 # value in ms, convert to s. - # Get origin info, these are offsets from time for start of block, - time_origin = feb.zone.attrs["Origin"][1] / 1_000 # also convert to s - # Get the start/stop indicies for the zone + excess_rows = _get_time_overlap_samples(feb, data_shape) + total_time_rows = (data_shape[1] - excess_rows) * n_blocks + # Get origin info, these are offsets from time to get to the first simple + # of the block. These should always be non-positive. + time_offset = feb.zone.attrs["Origin"][1] / 1_000 # also convert to s + assert time_offset <= 0, "time offset must be non positive" + # Get the start/stop indices for the zone. We assume zones never sub-slice + # time (only distance) but assert that here. extent = feb.zone.attrs["Extent"] - time_ids = (extent[2], extent[3]) + assert (extent[3] - extent[2] + 1) == data_shape[1], "Cant handle sub time zones" # Create time coord # Need to account for removing overlap times. - total_start = time_origin - rows_to_remove * time_step + time_ids[0] * time_step + total_start = t_0 + time_offset + (excess_rows // 2) * time_step total_end = total_start + total_time_rows * time_step time_coord = get_coord( - start=dc.to_datetime64(t_0 + total_start), - stop=dc.to_datetime64(t_0 + total_end), + start=dc.to_datetime64(total_start), + stop=dc.to_datetime64(total_end), step=dc.to_timedelta64(time_step), ) - return time_coord.change_length(total_time_rows) + # Note: we have found some files in which the sampling rate is 1/3e-4 + # because we use datetime64 we lose some precision which has caused + # slight differences in shape of the patch. + out = time_coord.change_length(total_time_rows) + return out def _get_distance_coord(feb): @@ -221,9 +241,8 @@ def _get_time_filtered_data(data, t_start_end, time, total_slice, time_coord): dist_coord, time_coord = cm.coord_map["distance"], cm.coord_map["time"] data = febus.zone[febus.data_name] data_shape = data.shape - overlap_percentage = _maybe_unpack(febus.zone.attrs.get("BlockOverlap", 0)) - skip_rows = int(np.round(overlap_percentage / 100 * data_shape[1])) - # Need to handle case where skip_rows == 0 + skip_rows = _get_time_overlap_samples(febus, data_shape) // 2 + # Need to handle case where excess_rows == 0 data_slice = slice(skip_rows, -skip_rows if skip_rows else None) total_slice = list(broadcast_for_index(3, 1, data_slice)) total_time_rows = data_shape[1] - 2 * skip_rows diff --git a/tests/test_io/test_febus/test_febus.py b/tests/test_io/test_febus/test_febus.py new file mode 100644 index 00000000..40a24511 --- /dev/null +++ b/tests/test_io/test_febus/test_febus.py @@ -0,0 +1,43 @@ +""" +Febus specific tests. +""" + +import h5py +import numpy as np +import pytest + +from dascore.io.febus import Febus2 +from dascore.io.febus.utils import _flatten_febus_info +from dascore.utils.downloader import fetch +from dascore.utils.time import to_float + + +class TestFebus: + """Special test cases for febus.""" + + @pytest.fixture(scope="class") + def febus_path(self): + """Return the path to a test febus file.""" + return fetch("febus_1.h5") + + def test_time_coords_consistent_with_metadata(self, febus_path): + """ + Ensure the time coords returned have the same length as + metadata indicates. + """ + patch = Febus2().read(febus_path)[0] + coords = patch.coords + time = coords.get_coord("time") + time_span = to_float((time.max() - time.min()) + time.step) + + with h5py.File(febus_path, "r") as f: + feb = _flatten_febus_info(f)[0] + # First check total time extent + n_blocks = feb.zone[feb.data_name].shape[0] + block_time = 1 / (feb.zone.attrs["BlockRate"] / 1_000) + expected_time_span = block_time * n_blocks + assert np.isclose(expected_time_span, time_span) + # Then check absolute time + time_offset = feb.zone.attrs["Origin"][1] / 1_000 + time_start = feb.source["time"][0] + time_offset + assert np.isclose(to_float(time.min()), time_start)