merge master

DASDAE · Oct 31, 2024 · 301cd42 · 301cd42
2 parents c9460e3 + b2551d2
commit 301cd42
Show file tree

Hide file tree

Showing 9 changed files with 267 additions and 80 deletions.
diff --git a/.github/workflows/build_deploy_master_docs.yaml b/.github/workflows/build_deploy_master_docs.yaml
@@ -40,16 +40,23 @@ jobs:
         run: |
           git fetch --tags --force # Retrieve annotated tags.
 
-      - name: Setup conda
-        uses: conda-incubator/setup-miniconda@v3
+      - uses: mamba-org/setup-micromamba@v1
         with:
-          mamba-version: "*"
-          channels: conda-forge,defaults
-          channel-priority: true
-          python-version: "3.10"
-          activate-environment: dascore
+          micromamba-version: '1.5.8-0' # versions: https://github.com/mamba-org/micromamba-releases
           environment-file: environment.yml
-          condarc-file: .github/test_condarc.yml
+          init-shell: >-
+            bash
+            powershell
+          cache-environment: true
+          post-cleanup: 'all'
+
+      # Not sure why this is needed but it appears to be the case
+      - name: fix env
+        shell: bash -l {0}
+        run: | 
+          micromamba shell init --shell bash --root-prefix=~/micromamba
+          eval "$(micromamba shell hook --shell bash)"
+          micromamba activate dascore
 
       - name: install dascore with docbuild reqs
         shell: bash -l {0}

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -12,10 +12,19 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
+      - name: "get tags"
+        run: |
+          git fetch --tags --force # Retrieve annotated tags.
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
       - name: install linting packages
-        # I added --break-system-packages due to pep 668. No need to protect
-        # system packages in a github container. 
-        run: pip install pre-commit --break-system-packages  
+        run: uv tool install pre-commit
 
       - name: run all precommits
-        run: pre-commit run --files dascore/**/*
+        run: uv run pre-commit run --all
diff --git a/dascore/core/patch.py b/dascore/core/patch.py
@@ -291,6 +291,7 @@ def iselect(self, *args, **kwargs):
     pass_filter = dascore.proc.pass_filter
     sobel_filter = dascore.proc.sobel_filter
     median_filter = dascore.proc.median_filter
+    notch_filter = dascore.proc.notch_filter
     savgol_filter = dascore.proc.savgol_filter
     gaussian_filter = dascore.proc.gaussian_filter
     slope_filter = dascore.proc.slope_filter

diff --git a/dascore/io/febus/utils.py b/dascore/io/febus/utils.py
@@ -97,37 +97,57 @@ def _get_febus_attrs(feb: _FebusSlice) -> dict:
     return out
 
 
+def _get_time_overlap_samples(feb, data_shape):
+    """Determine the number of redundant samples in the time dimension."""
+    time_step = feb.zone.attrs["Spacing"][1] / 1_000  # value in ms, convert to s.
+    block_time = _maybe_unpack(1 / (feb.zone.attrs["BlockRate"] / 1_000))
+    # Since the data have overlaps in each block's time dimension, we need to
+    # trim the overlap off the time dimension to avoid having to merge blocks later.
+    # However, sometimes the "BlockOverlap" is wrong, so we calculate it
+    # manually here.
+    expected_samples = int(np.round(block_time / time_step))
+    excess_rows = data_shape[1] - expected_samples
+    assert (
+        excess_rows % 2 == 0
+    ), "excess rows must be symmetric to distribute on both ends"
+    return excess_rows
+
+
 def _get_time_coord(feb):
     """Get the time coordinate contained in the febus slice."""
     time = feb.source["time"]
-    # In older version time shape is different, always grap first eleemnt.
+    # In older version time shape is different, always grab first element.
     first_slice = tuple(0 for _ in time.shape)
     t_0 = time[first_slice]
     # Data dimensions are block_index, time, distance
     data_shape = feb.zone[feb.data_name].shape
     n_blocks = data_shape[0]
-    # Since the data have overlaps in each block's time dimension, we need to
-    # trim the overlap off the time dimension to avoid having to merge blocks later.
-    overlap_percentage = _maybe_unpack(feb.zone.attrs.get("BlockOverlap", 0))
-    rows_to_remove = int(np.round(data_shape[1] * overlap_percentage / 100))
-    total_time_rows = (data_shape[1] - 2 * rows_to_remove) * n_blocks
-    # Get spacing between time samples (in s)
+    # Get spacing between time samples (in s) and the total time of each block.
     time_step = feb.zone.attrs["Spacing"][1] / 1_000  # value in ms, convert to s.
-    # Get origin info, these are offsets from time for start of block,
-    time_origin = feb.zone.attrs["Origin"][1] / 1_000  # also convert to s
-    # Get the start/stop indicies for the zone
+    excess_rows = _get_time_overlap_samples(feb, data_shape)
+    total_time_rows = (data_shape[1] - excess_rows) * n_blocks
+    # Get origin info, these are offsets from time to get to the first simple
+    # of the block. These should always be non-positive.
+    time_offset = feb.zone.attrs["Origin"][1] / 1_000  # also convert to s
+    assert time_offset <= 0, "time offset must be non positive"
+    # Get the start/stop indices for the zone. We assume zones never sub-slice
+    # time (only distance) but assert that here.
     extent = feb.zone.attrs["Extent"]
-    time_ids = (extent[2], extent[3])
+    assert (extent[3] - extent[2] + 1) == data_shape[1], "Cant handle sub time zones"
     # Create time coord
     # Need to account for removing overlap times.
-    total_start = time_origin - rows_to_remove * time_step + time_ids[0] * time_step
+    total_start = t_0 + time_offset + (excess_rows // 2) * time_step
     total_end = total_start + total_time_rows * time_step
     time_coord = get_coord(
-        start=dc.to_datetime64(t_0 + total_start),
-        stop=dc.to_datetime64(t_0 + total_end),
+        start=dc.to_datetime64(total_start),
+        stop=dc.to_datetime64(total_end),
         step=dc.to_timedelta64(time_step),
     )
-    return time_coord.change_length(total_time_rows)
+    # Note: we have found some files in which the sampling rate is 1/3e-4
+    # because we use datetime64 we lose some precision which has caused
+    # slight differences in shape of the patch.
+    out = time_coord.change_length(total_time_rows)
+    return out
 
 
 def _get_distance_coord(feb):
@@ -221,9 +241,8 @@ def _get_time_filtered_data(data, t_start_end, time, total_slice, time_coord):
     dist_coord, time_coord = cm.coord_map["distance"], cm.coord_map["time"]
     data = febus.zone[febus.data_name]
     data_shape = data.shape
-    overlap_percentage = _maybe_unpack(febus.zone.attrs.get("BlockOverlap", 0))
-    skip_rows = int(np.round(overlap_percentage / 100 * data_shape[1]))
-    # Need to handle case where skip_rows == 0
+    skip_rows = _get_time_overlap_samples(febus, data_shape) // 2
+    # Need to handle case where excess_rows == 0
     data_slice = slice(skip_rows, -skip_rows if skip_rows else None)
     total_slice = list(broadcast_for_index(3, 1, data_slice))
     total_time_rows = data_shape[1] - 2 * skip_rows

diff --git a/dascore/proc/__init__.py b/dascore/proc/__init__.py
@@ -8,7 +8,7 @@
 from .coords import *  # noqa
 from .correlate import correlate, correlate_shift
 from .detrend import detrend
-from .filter import median_filter, pass_filter, sobel_filter, savgol_filter, gaussian_filter, slope_filter
+from .filter import median_filter, pass_filter, sobel_filter, savgol_filter, gaussian_filter, slope_filter, notch_filter
 from .resample import decimate, interpolate, resample
 from .rolling import rolling
 from .taper import taper, taper_range

diff --git a/dascore/proc/filter.py b/dascore/proc/filter.py
@@ -15,13 +15,18 @@
 from scipy import ndimage
 from scipy.ndimage import gaussian_filter as np_gauss
 from scipy.ndimage import median_filter as nd_median_filter
-from scipy.signal import iirfilter, sosfilt, sosfiltfilt, zpk2sos
+from scipy.signal import filtfilt, iirfilter, iirnotch, sosfilt, sosfiltfilt, zpk2sos
 from scipy.signal import savgol_filter as np_savgol_filter
 
 import dascore as dc
 from dascore.constants import PatchType, samples_arg_description
 from dascore.exceptions import FilterValueError, ParameterError, UnitError
-from dascore.units import convert_units, get_filter_units, invert_quantity
+from dascore.units import (
+    convert_units,
+    get_filter_units,
+    get_inverted_quant,
+    invert_quantity,
+)
 from dascore.utils.docs import compose_docstring
 from dascore.utils.misc import (
     broadcast_for_index,
@@ -33,6 +38,7 @@
     get_multiple_dim_value_from_kwargs,
     patch_function,
 )
+from dascore.utils.time import to_float
 
 
 def _check_sobel_args(dim, mode, cval):
@@ -94,7 +100,7 @@ def pass_filter(patch: PatchType, corners=4, zerophase=True, **kwargs) -> PatchT
     Parameters
     ----------
     corners
-        The number of corners for the filter.
+        The number of corners for the filter. Default is 4.
     zerophase
         If True, apply the filter twice.
     **kwargs
@@ -172,28 +178,6 @@ def sobel_filter(patch: PatchType, dim: str, mode="reflect", cval=0.0) -> PatchT
     return dc.Patch(data=out, coords=patch.coords, attrs=patch.attrs, dims=patch.dims)
 
 
-#
-# @patch_function()
-# def stop_filter(patch: PatchType, corners=4, zerophase=True, **kwargs) -> PatchType:
-#     """
-#     Apply a Butterworth band stop filter or (highpass, or lowpass).
-#
-#     Parameters
-#     ----------
-#     corners
-#         The number of corners for the filter.
-#     zerophase
-#         If True, apply the filter twice.
-#     **kwargs
-#         Used to specify the dimension and frequency, wavenumber, or equivalent
-#         limits.
-#
-#
-#     """
-#     # get nyquist and low/high in terms of nyquist
-#     if zerophase:
-
-
 def _create_size_and_axes(patch, kwargs, samples):
     """
     Return a tuple of (size) and (axes).
@@ -265,6 +249,76 @@ def median_filter(
     return patch.update(data=new_data)
 
 
+@patch_function()
+def notch_filter(patch: PatchType, q, **kwargs) -> PatchType:
+    """
+    Apply a second-order IIR notch digital filter on patch's data.
+
+    A notch filter is a band-stop filter with a narrow bandwidth (high quality factor).
+    It rejects a narrow frequency band and leaves the rest of the spectrum
+    little changed.
+
+    Parameters
+    ----------
+    patch
+        The patch to filter
+    q
+        Quality factor (float). A higher Q value means a narrower notch,
+        which targets the specific frequency more precisely.
+        A lower Q results in a wider notch, meaning a broader range of
+        frequencies around the specific frequency will be attenuated.
+    **kwargs
+        Used to specify the dimension(s) and associated frequency and/or wavelength
+        (or equivalent values) for the filter.
+
+    Notes
+    -----
+    See [scipy.signal.iirnotch]
+        (https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.iirnotch.html)
+        for more information.
+
+    Examples
+    --------
+    >>> import dascore
+    >>> pa = dascore.get_example_patch()
+
+    >>>  # Apply a notch filter along time axis to remove 60 Hz
+    >>> filtered = pa.notch_filter(time=60, q=30)
+
+    >>>  # Apply a notch filter along distance axis to remove 5 m wavelength
+    >>> filtered = pa.notch_filter(distance=0.2, q=10)
+
+    >>>  # Apply a notch filter along both time and distance axes
+    >>> filtered = pa.notch_filter(time=60, distance=0.2, q=40)
+
+    >>> # Optionally, units can be specified for a more expressive API.
+    >>> from dascore.units import m, ft, s, Hz
+    >>> # Apply a notch filter along time axis to remove 60 Hz
+    >>> filtered = pa.notch_filter(time=60 * Hz, q=30)
+    >>> # Apply a notch filter along distance axis to remove 5 m wavelength
+    >>> filtered = pa.notch_filter(distance=5 * m, q=30)
+    """
+    data = patch.data
+    for coord_name, info in get_multiple_dim_value_from_kwargs(patch, kwargs).items():
+        dim, ax, value = info["dim"], info["axis"], info["value"]
+        coord = patch.get_coord(coord_name)
+
+        # Invert units if needed
+        if isinstance(value, dc.units.Quantity) and coord.units is not None:
+            value, _ = get_inverted_quant(value, coord.units)
+
+        # Check valid parameters
+        w0 = to_float(value)
+        sr = get_dim_sampling_rate(patch, dim)
+        nyquist = 0.5 * sr
+        if w0 > nyquist:
+            msg = f"possible filter values are in [0, {nyquist}] you passed {w0}"
+            raise FilterValueError(msg)
+        b, a = iirnotch(w0, Q=q, fs=sr)
+        data = filtfilt(b, a, data, axis=ax)
+    return dc.Patch(data=data, coords=patch.coords, attrs=patch.attrs, dims=patch.dims)
+
+
 @patch_function()
 @compose_docstring(sample_explination=samples_arg_description)
 def savgol_filter(
@@ -308,7 +362,7 @@ def savgol_filter(
     >>>
     >>> # Apply Savgol filter over distance dimension using a 5 sample
     >>> # distance window.
-    >>> filtered_pa_2 = pa.median_filter(distance=5, samples=True,polyorder=2)
+    >>> filtered_pa_2 = pa.savgol_filter(distance=5, samples=True, polyorder=2)
     >>>
     >>> # Combine distance and time filter
     >>> filtered_pa_3 = pa.savgol_filter(distance=10, time=0.1, polyorder=4)
@@ -430,8 +484,11 @@ def slope_filter(
     >>> # Example 1: Compare slope filtered patch to Non-filtered.
     >>> import matplotlib.pyplot as plt
     >>> import numpy as np
+    >>>
     >>> import dascore as dc
     >>> from dascore.units import Hz
+    >>>
+    >>>
     >>> # Apply taper function and bandpass filter along time axis from 1 to 500 Hz
     >>> patch = (
     ...     dc.get_example_patch('example_event_1')

diff --git a/dascore/units.py b/dascore/units.py
@@ -208,6 +208,29 @@ def get_quantity_str(quant_value: str | Quantity | None) -> str | None:
     return str(quant_value)
 
 
+def get_inverted_quant(quant, data_units):
+    """Convert to inverted units."""
+    if quant is None:
+        return quant, True
+    if quant.units == get_unit("dimensionless"):
+        msg = (
+            "Both inputs must be quantities to get filter parameters. "
+            f"You passed ({quant}, {data_units})"
+        )
+        raise UnitError(msg)
+    data_units = get_unit(data_units)
+    inverted_units = (1 / data_units).units
+    units_inversed = True
+    if data_units.dimensionality == quant.units.dimensionality:
+        quant, units_inversed = 1 / quant, False
+    # try to get invert units, otherwise raise.
+    try:
+        mag = quant.to(inverted_units).magnitude
+    except DimensionalityError as e:
+        raise UnitError(str(e))
+    return mag, units_inversed
+
+
 def get_filter_units(
     arg1: Quantity | float,
     arg2: Quantity | float,
@@ -250,28 +273,6 @@ def _ensure_same_units(quant1, quant2):
             msg = f"Units must match, {quant1} and {quant2} were provided."
             raise UnitError(msg)
 
-    def get_inverted_quant(quant, data_units):
-        """Convert to inverted units."""
-        if quant is None:
-            return quant, True
-        if quant.units == get_unit("dimensionless"):
-            msg = (
-                "Both inputs must be quantities to get filter parameters. "
-                f"You passed ({arg1}, {arg2})"
-            )
-            raise UnitError(msg)
-        data_units = get_unit(data_units)
-        inverted_units = (1 / data_units).units
-        units_inversed = True
-        if data_units.dimensionality == quant.units.dimensionality:
-            quant, units_inversed = 1 / quant, False
-        # try to get invert units, otherwise raise.
-        try:
-            mag = quant.to(inverted_units).magnitude
-        except DimensionalityError as e:
-            raise UnitError(str(e))
-        return mag, units_inversed
-
     def _check_to_units(to_unit, dim):
         """Ensure to units are valid."""
         if to_unit is None: