diff --git a/dascore/core/patch.py b/dascore/core/patch.py index 54b9a3ef..fc2660f5 100644 --- a/dascore/core/patch.py +++ b/dascore/core/patch.py @@ -221,6 +221,7 @@ def iselect(self, *args, **kwargs): sobel_filter = dascore.proc.sobel_filter median_filter = dascore.proc.median_filter savgol_filter = dascore.proc.savgol_filter + gaussian_filter = dascore.proc.gaussian_filter aggregate = dascore.proc.aggregate abs = dascore.proc.abs real = dascore.proc.real diff --git a/dascore/proc/__init__.py b/dascore/proc/__init__.py index 1c8bd98c..d5148e0a 100644 --- a/dascore/proc/__init__.py +++ b/dascore/proc/__init__.py @@ -7,7 +7,7 @@ from .coords import * # noqa from .correlate import correlate from .detrend import detrend -from .filter import median_filter, pass_filter, sobel_filter, savgol_filter +from .filter import median_filter, pass_filter, sobel_filter, savgol_filter, gaussian_filter from .resample import decimate, interpolate, resample from .rolling import rolling from .select import select diff --git a/dascore/proc/filter.py b/dascore/proc/filter.py index 446c717b..665d3a58 100644 --- a/dascore/proc/filter.py +++ b/dascore/proc/filter.py @@ -8,6 +8,7 @@ import pandas as pd from scipy import ndimage +from scipy.ndimage import gaussian_filter as np_gauss from scipy.ndimage import median_filter as nd_median_filter from scipy.signal import iirfilter, sosfilt, sosfiltfilt, zpk2sos from scipy.signal import savgol_filter as np_savgol_filter @@ -20,7 +21,6 @@ from dascore.utils.misc import check_filter_kwargs, check_filter_range from dascore.utils.patch import ( get_dim_sampling_rate, - get_dim_value_from_kwargs, get_multiple_dim_value_from_kwargs, patch_function, ) @@ -190,7 +190,12 @@ def sobel_filter(patch: PatchType, dim: str, mode="reflect", cval=0.0) -> PatchT def _create_size_and_axes(patch, kwargs, samples): - """Return""" + """ + Return a tuple of (size) and (axes). + + Note: size will always have the same size as the patch, but + 1s will be used if axis is not used. + """ dimfo = get_multiple_dim_value_from_kwargs(patch, kwargs) axes = [x["axis"] for x in dimfo.values()] size = [1] * len(patch.dims) @@ -252,7 +257,7 @@ def median_filter( """ size, _ = _create_size_and_axes(patch, kwargs, samples) new_data = nd_median_filter(patch.data, size=size, mode=mode, cval=cval) - return patch.new(data=new_data) + return patch.update(data=new_data) @patch_function() @@ -261,7 +266,9 @@ def savgol_filter( patch: PatchType, polyorder, samples=False, mode="interp", cval=0.0, **kwargs ) -> PatchType: """ - Applies Savgol filter along one dimension. + Applies Savgol filter along spenfied dimensions. + + The filter will be applied over each selected dimension sequentially. Parameters ---------- @@ -277,38 +284,98 @@ def savgol_filter( cval The constant value for when mode == constant. **kwargs - Used to specify the shape of the median filter in each dimension. - See examples for more info. + Used to specify the shape of the savgol filter in each dimension. + + Notes + ----- + See scipy.signal.savgol_filter for more info on implementation + and arguments. Examples -------- >>> import dascore >>> from dascore.units import m, s >>> pa = dascore.get_example_patch() - - >>> # 1. Apply second order polynomial Savgol filter - >>> # only over time dimension with 0.10 sec window + >>> + >>> # Apply second order polynomial Savgol filter + >>> # over time dimension with 0.10 sec window. >>> filtered_pa_1 = pa.savgol_filter(polyorder=2, time=0.1) + >>> + >>> # Apply Savgol filter over distance dimension using a 5 sample + >>> # distance window. + >>> filtered_pa_2 = pa.median_filter(distance=5, samples=True, polyorder=2) + >>> + >>> # Combine distance and time filter + >>> filtered_pa_3 = pa.savgol_filter(distance=10, time=0.1, polyorder=4) + """ + data = patch.data + size, axes = _create_size_and_axes(patch, kwargs, samples) + for ax in axes: + data = np_savgol_filter( + x=patch.data, + window_length=size[ax], + polyorder=polyorder, + mode=mode, + cval=cval, + axis=ax, + ) + return patch.update(data=data) + + +@patch_function() +@compose_docstring(sample_explination=samples_arg_description) +def gaussian_filter( + patch: PatchType, samples=False, mode="reflect", cval=0.0, truncate=4.0, **kwargs +) -> PatchType: + """ + Applies a Gaussian filter along specified dimensions. - >>> # 2. Apply Savgol filter over distance dimension - >>> # using a 5 sample distance window - >>> filtered_pa_2 = pa.median_filter(samples=True, polyorder=2, distance=5) + Parameters + ---------- + patch + The patch to filter + samples + If True samples are specified + If False coordinate of dimension + mode + The mode for handling edges. + cval + The constant value for when mode == constant. + truncate + Truncate the filter kernel length to this many standard deviations. + **kwargs + Used to specify the sigma value (standard deviation) for desired + dimensions. + + Examples + -------- + >>> import dascore + >>> from dascore.units import m, s + >>> pa = dascore.get_example_patch() + >>> + >>> # Apply Gaussian smoothing along time axis. + >>> pa_1 = pa.gaussian_filter(time=0.1) + >>> + >>> # Apply Gaussian filter over distance dimension + >>> # using a 3 sample standard deviation. + >>> pa_2 = pa.gaussian_filter(samples=True, distance=3) + >>> + >>> # Apply filter to time and distance axis. + >>> pa_3 = pa.gaussian_filter(time=0.1, distance=3) Notes ----- - See scipy.signal.savgol_filter for more info on implementation + See scipy.ndimage.gaussian_filter for more info on implementation and arguments. - """ - dim, axis, value = get_dim_value_from_kwargs(patch, kwargs) - coord = patch.get_coord(dim) - window = coord.get_sample_count(value, samples=samples) - new_data = np_savgol_filter( - x=patch.data, - window_length=window, - polyorder=polyorder, + size, axes = _create_size_and_axes(patch, kwargs, samples) + used_size = tuple(size[x] for x in axes) + data = np_gauss( + input=patch.data, + sigma=used_size, + axes=axes, mode=mode, cval=cval, - axis=axis, + truncate=truncate, ) - return patch.new(data=new_data) + return patch.update(data=data) diff --git a/dascore/proc/rolling.py b/dascore/proc/rolling.py index 2063935f..3d7421c6 100644 --- a/dascore/proc/rolling.py +++ b/dascore/proc/rolling.py @@ -215,10 +215,14 @@ def rolling( engine: Literal["numpy", "pandas", None] = None, samples=False, **kwargs, -) -> _NumpyPatchRoller: +) -> _NumpyPatchRoller | _PandasPatchRoller: """ Apply a rolling function along a specified dimension. + See also the + [rolling section of the processing tutorial](/tutorial/processing.qmd#rolling) + and the [smoothing recipe](/recipes/smoothing.qmd). + Parameters ---------- step @@ -243,16 +247,6 @@ def rolling( For example `time=10` represents window size of 10*(default unit) along the time axis. - Examples - -------- - >>> # Simple example for rolling mean function - >>> import dascore as dc - >>> patch = dc.get_example_patch() - >>> # apply rolling over 1 second with 0.5 step - >>> mean_patch = patch.rolling(time=1, step=0.5).mean() - >>> # drop nan at the start of the time axis. - >>> out = mean_patch.dropna("time") - Notes ----- This class behaves like pandas.rolling @@ -280,6 +274,19 @@ def rolling( [NaN, 1.0, 3.0] if time = 3 * dt and step = 3 * dt [NaN, 2.0] + + Examples + -------- + >>> import dascore as dc + >>> + >>> # Simple example for rolling mean function + >>> patch = dc.get_example_patch() + >>> + >>> # apply rolling over 1 second with 0.5 step + >>> mean_patch = patch.rolling(time=1, step=0.5).mean() + >>> + >>> # drop nan at the start of the time axis. + >>> out = mean_patch.dropna("time") """ def _get_engine(step, engine, patch): diff --git a/dascore/utils/patch.py b/dascore/utils/patch.py index ba34ac95..41cf3881 100644 --- a/dascore/utils/patch.py +++ b/dascore/utils/patch.py @@ -17,7 +17,6 @@ from dascore.core.coordmanager import merge_coord_managers from dascore.exceptions import ( CoordDataError, - ParameterError, PatchAttributeError, PatchDimError, ) @@ -422,7 +421,7 @@ def get_multiple_dim_value_from_kwargs(patch, kwargs): overlap = set(dims) & set(kwargs) if not overlap: msg = "You must specify one or more dimension in keyword args." - raise ParameterError(msg) + raise PatchDimError(msg) out = {} for dim in overlap: axis = patch.dims.index(dim) diff --git a/docs/recipes/smoothing.qmd b/docs/recipes/smoothing.qmd new file mode 100644 index 00000000..269c380c --- /dev/null +++ b/docs/recipes/smoothing.qmd @@ -0,0 +1,125 @@ +--- +title: " Patch Smoothing" +execute: + warn: false +--- + +This recipe compares various smoothing strategies. + +A few methods useful for smoothing are: + +- [`Patch.rolling`](`dascore.Patch.rolling`) +- [`Patch.savgol_filter`](`dascore.Patch.savgol_filter`) +- [`Patch.gaussian_filter`](`dascore.Patch.gaussian_filter`) + + +:::{.callout-note} +[`Patch.rolling`](`dascore.Patch.rolling`) is quite powerful and can be used for filtering in a variety of ways. However, as mentioned in the [rolling section of the tutorial](/tutorial/processing.qmd#rolling), [`Patch.rolling`](`dascore.Patch.rolling`) includes `NaN` entries in the output due to edge effects. This can require some additional thought to properly deal with. + +The other methods mentioned above generally deal with edge effects differently (governed by the `mode` parameter) and so don't have the same issue. +::: + +## Example Data + +We will use [example_event_2](`dascore.examples.example_event_2`) to demonstrate the effects of the smoothing methods. + +```{python} +import numpy as np + +import dascore as dc + +patch = dc.get_example_patch("example_event_2") +ax = patch.viz.waterfall() +ax.set_title("un-smoothed patch"); +``` + +## Rolling + +[`Patch.rolling`](`dascore.Patch.rolling`) can be used for applying several different aggregation functions to rolling windows along one dimension of the `Patch` instance. For instance, to apply a rolling mean to the time axis: + +```{python} +smoothed_patch = ( + patch.rolling(time=0.005).mean() +) +ax = smoothed_patch.viz.waterfall() +ax.set_title("rolling time mean"); +``` + +or a median filter to the distance axis: + +```{python} +smoothed_patch = ( + patch.rolling(distance=20, samples=True).median() +) +ax = smoothed_patch.viz.waterfall() +ax.set_title("rolling distance median"); +``` + +Or both: +```{python} +smoothed_patch = ( + patch.rolling(distance=20, samples=True).median() + .rolling(time=0.005).mean() +) +ax = smoothed_patch.viz.waterfall() +ax.set_title("rolling time mean distance median"); +``` + +Again though, the output array has `NaN` entries along some of the edges: + +```{python} +nan_data = np.isnan(smoothed_patch.data) +print(f"Number of NaN = {nan_data.sum()}") + +# Create and plot a patch to highlight NaN values. +nan_patch = ( + smoothed_patch.update(data=nan_data.astype(np.int32)) + .update_attrs(data_type='', data_units=None) +) +ax = nan_patch.viz.waterfall() +ax.set_title("NaNs in Patch"); +``` + +## Savgol filter + +[`Patch.savgol_filter`](`dascore.Patch.savgol_filter`) uses [SciPy's savgol_filter](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html), which is an implementation of the [Savitzky-Golay Filter](https://en.wikipedia.org/wiki/Savitzky%E2%80%93Golay_filter) to apply smoothing along a single dimension. + +```{python} +smoothed_patch = ( + patch.savgol_filter(time=0.01, polyorder=3) +) +ax = smoothed_patch.viz.waterfall() +ax.set_title("savgol time"); +``` + +Multidimensional smoothing is achieved by applying the 1D filter sequentially along each desired dimension. + +```{python} +smoothed_patch = ( + patch.savgol_filter( + time=25, + distance=25, + samples=True, + polyorder=2, + ) +) +ax = smoothed_patch.viz.waterfall() +ax.set_title("savgol time and distance"); +``` + +## Gaussian filter + +[`Patch.gaussian_filter`](`dascore.Patch.gaussian_filter`) uses [SciPy's gaussian_filter](https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.gaussian_filter.html) to apply a gaussian smoothing kernel to a patch. Note that the keyword arguments here specify standard deviation, and the ` + + +```{python} +smoothed_patch = ( + patch.gaussian_filter( + time=6, + distance=6, + samples=True, + ) +) +ax = smoothed_patch.viz.waterfall() +ax.set_title("gaussian time and distance"); +``` diff --git a/pyproject.toml b/pyproject.toml index 50de5f74..9271c8ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ dependencies = [ "pooch>=1.2", "pydantic>2.1", "rich", - "scipy>=1.10.0", + "scipy>=1.11.0", "tables>=3.7", "typing_extensions", "pint", diff --git a/scripts/_templates/_quarto.yml b/scripts/_templates/_quarto.yml index 6f5df81c..f85b9780 100644 --- a/scripts/_templates/_quarto.yml +++ b/scripts/_templates/_quarto.yml @@ -135,6 +135,7 @@ website: - recipes/parallelization.qmd - recipes/edge_effects.qmd - recipes/fk.qmd + - recipes/smoothing.qmd - section: "IO" contents: diff --git a/tests/conftest.py b/tests/conftest.py index 6e5f68f3..bf749ebb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -223,6 +223,13 @@ def event_patch_1(): return dc.get_example_patch("example_event_1") +@pytest.fixture(scope="session") +@register_func(PATCH_FIXTURES) +def event_patch_2(): + """Fetch event patch 2.""" + return dc.get_example_patch("example_event_2") + + @pytest.fixture(scope="session") @register_func(PATCH_FIXTURES) def dispersion_patch(): diff --git a/tests/test_proc/test_filter.py b/tests/test_proc/test_filter.py index cfd54e00..c30ae412 100644 --- a/tests/test_proc/test_filter.py +++ b/tests/test_proc/test_filter.py @@ -9,7 +9,6 @@ from dascore.exceptions import ( CoordDataError, FilterValueError, - ParameterError, PatchDimError, UnitError, ) @@ -200,7 +199,7 @@ class TestMedianFilter: def test_median_no_kwargs_raises(self, random_patch): """Apply default values.""" msg = "You must specify one or more dimension in keyword args." - with pytest.raises(ParameterError, match=msg): + with pytest.raises(PatchDimError, match=msg): random_patch.median_filter() def test_median_filter_time(self, random_patch): @@ -226,7 +225,7 @@ class TestSavgolFilter: def test_savgol_no_kwargs_raises(self, random_patch): """Apply default values.""" - msg = "You must use exactly one dimension name in kwargs." + msg = "You must specify one or more" with pytest.raises(PatchDimError, match=msg): random_patch.savgol_filter(polyorder=2) @@ -254,3 +253,31 @@ def test_savgol_smoothing(self, random_patch): assert np.allclose(out.data[-5:], 0) middle = out.data[midpoint - 10 : midpoint + 10] assert np.any((middle < 1) & (middle > 0)) + + def test_savgol_filter_multiple_dims(self, event_patch_2): + """Ensure multiple dimensions can be filtered.""" + out = event_patch_2.savgol_filter(distance=10, time=0.001, polyorder=4) + assert out.shape == event_patch_2.shape + assert not np.allclose(out.data, event_patch_2.data) + + +class TestGaussianFilter: + """Test the Guassian Filter.""" + + def test_filter_time(self, event_patch_2): + """Test for simple filter along time axis.""" + out = event_patch_2.gaussian_filter(time=0.001) + assert isinstance(out, dc.Patch) + assert out.shape == event_patch_2.shape + + def test_filter_distance(self, event_patch_2): + """Ensure filter can be applied along distance axis with samples.""" + out = event_patch_2.gaussian_filter(distance=5, samples=True) + assert isinstance(out, dc.Patch) + assert out.shape == event_patch_2.shape + + def test_filter_time_distance(self, event_patch_2): + """Ensure both time and distance can be filtered.""" + out = event_patch_2.gaussian_filter(time=5, distance=5, samples=True) + assert isinstance(out, dc.Patch) + assert out.shape == event_patch_2.shape