Skip to content

Commit

Permalink
add gaussian_filter and smoothing recipe (#337)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers authored Jan 5, 2024
1 parent 8190ec7 commit 87de2c5
Show file tree
Hide file tree
Showing 10 changed files with 275 additions and 41 deletions.
1 change: 1 addition & 0 deletions dascore/core/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def iselect(self, *args, **kwargs):
sobel_filter = dascore.proc.sobel_filter
median_filter = dascore.proc.median_filter
savgol_filter = dascore.proc.savgol_filter
gaussian_filter = dascore.proc.gaussian_filter
aggregate = dascore.proc.aggregate
abs = dascore.proc.abs
real = dascore.proc.real
Expand Down
2 changes: 1 addition & 1 deletion dascore/proc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .coords import * # noqa
from .correlate import correlate
from .detrend import detrend
from .filter import median_filter, pass_filter, sobel_filter, savgol_filter
from .filter import median_filter, pass_filter, sobel_filter, savgol_filter, gaussian_filter
from .resample import decimate, interpolate, resample
from .rolling import rolling
from .select import select
Expand Down
113 changes: 90 additions & 23 deletions dascore/proc/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pandas as pd
from scipy import ndimage
from scipy.ndimage import gaussian_filter as np_gauss
from scipy.ndimage import median_filter as nd_median_filter
from scipy.signal import iirfilter, sosfilt, sosfiltfilt, zpk2sos
from scipy.signal import savgol_filter as np_savgol_filter
Expand All @@ -20,7 +21,6 @@
from dascore.utils.misc import check_filter_kwargs, check_filter_range
from dascore.utils.patch import (
get_dim_sampling_rate,
get_dim_value_from_kwargs,
get_multiple_dim_value_from_kwargs,
patch_function,
)
Expand Down Expand Up @@ -190,7 +190,12 @@ def sobel_filter(patch: PatchType, dim: str, mode="reflect", cval=0.0) -> PatchT


def _create_size_and_axes(patch, kwargs, samples):
"""Return"""
"""
Return a tuple of (size) and (axes).
Note: size will always have the same size as the patch, but
1s will be used if axis is not used.
"""
dimfo = get_multiple_dim_value_from_kwargs(patch, kwargs)
axes = [x["axis"] for x in dimfo.values()]
size = [1] * len(patch.dims)
Expand Down Expand Up @@ -252,7 +257,7 @@ def median_filter(
"""
size, _ = _create_size_and_axes(patch, kwargs, samples)
new_data = nd_median_filter(patch.data, size=size, mode=mode, cval=cval)
return patch.new(data=new_data)
return patch.update(data=new_data)


@patch_function()
Expand All @@ -261,7 +266,9 @@ def savgol_filter(
patch: PatchType, polyorder, samples=False, mode="interp", cval=0.0, **kwargs
) -> PatchType:
"""
Applies Savgol filter along one dimension.
Applies Savgol filter along spenfied dimensions.
The filter will be applied over each selected dimension sequentially.
Parameters
----------
Expand All @@ -277,38 +284,98 @@ def savgol_filter(
cval
The constant value for when mode == constant.
**kwargs
Used to specify the shape of the median filter in each dimension.
See examples for more info.
Used to specify the shape of the savgol filter in each dimension.
Notes
-----
See scipy.signal.savgol_filter for more info on implementation
and arguments.
Examples
--------
>>> import dascore
>>> from dascore.units import m, s
>>> pa = dascore.get_example_patch()
>>> # 1. Apply second order polynomial Savgol filter
>>> # only over time dimension with 0.10 sec window
>>>
>>> # Apply second order polynomial Savgol filter
>>> # over time dimension with 0.10 sec window.
>>> filtered_pa_1 = pa.savgol_filter(polyorder=2, time=0.1)
>>>
>>> # Apply Savgol filter over distance dimension using a 5 sample
>>> # distance window.
>>> filtered_pa_2 = pa.median_filter(distance=5, samples=True, polyorder=2)
>>>
>>> # Combine distance and time filter
>>> filtered_pa_3 = pa.savgol_filter(distance=10, time=0.1, polyorder=4)
"""
data = patch.data
size, axes = _create_size_and_axes(patch, kwargs, samples)
for ax in axes:
data = np_savgol_filter(
x=patch.data,
window_length=size[ax],
polyorder=polyorder,
mode=mode,
cval=cval,
axis=ax,
)
return patch.update(data=data)


@patch_function()
@compose_docstring(sample_explination=samples_arg_description)
def gaussian_filter(
patch: PatchType, samples=False, mode="reflect", cval=0.0, truncate=4.0, **kwargs
) -> PatchType:
"""
Applies a Gaussian filter along specified dimensions.
>>> # 2. Apply Savgol filter over distance dimension
>>> # using a 5 sample distance window
>>> filtered_pa_2 = pa.median_filter(samples=True, polyorder=2, distance=5)
Parameters
----------
patch
The patch to filter
samples
If True samples are specified
If False coordinate of dimension
mode
The mode for handling edges.
cval
The constant value for when mode == constant.
truncate
Truncate the filter kernel length to this many standard deviations.
**kwargs
Used to specify the sigma value (standard deviation) for desired
dimensions.
Examples
--------
>>> import dascore
>>> from dascore.units import m, s
>>> pa = dascore.get_example_patch()
>>>
>>> # Apply Gaussian smoothing along time axis.
>>> pa_1 = pa.gaussian_filter(time=0.1)
>>>
>>> # Apply Gaussian filter over distance dimension
>>> # using a 3 sample standard deviation.
>>> pa_2 = pa.gaussian_filter(samples=True, distance=3)
>>>
>>> # Apply filter to time and distance axis.
>>> pa_3 = pa.gaussian_filter(time=0.1, distance=3)
Notes
-----
See scipy.signal.savgol_filter for more info on implementation
See scipy.ndimage.gaussian_filter for more info on implementation
and arguments.
"""
dim, axis, value = get_dim_value_from_kwargs(patch, kwargs)
coord = patch.get_coord(dim)
window = coord.get_sample_count(value, samples=samples)
new_data = np_savgol_filter(
x=patch.data,
window_length=window,
polyorder=polyorder,
size, axes = _create_size_and_axes(patch, kwargs, samples)
used_size = tuple(size[x] for x in axes)
data = np_gauss(
input=patch.data,
sigma=used_size,
axes=axes,
mode=mode,
cval=cval,
axis=axis,
truncate=truncate,
)
return patch.new(data=new_data)
return patch.update(data=data)
29 changes: 18 additions & 11 deletions dascore/proc/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,14 @@ def rolling(
engine: Literal["numpy", "pandas", None] = None,
samples=False,
**kwargs,
) -> _NumpyPatchRoller:
) -> _NumpyPatchRoller | _PandasPatchRoller:
"""
Apply a rolling function along a specified dimension.
See also the
[rolling section of the processing tutorial](/tutorial/processing.qmd#rolling)
and the [smoothing recipe](/recipes/smoothing.qmd).
Parameters
----------
step
Expand All @@ -243,16 +247,6 @@ def rolling(
For example `time=10` represents window size of
10*(default unit) along the time axis.
Examples
--------
>>> # Simple example for rolling mean function
>>> import dascore as dc
>>> patch = dc.get_example_patch()
>>> # apply rolling over 1 second with 0.5 step
>>> mean_patch = patch.rolling(time=1, step=0.5).mean()
>>> # drop nan at the start of the time axis.
>>> out = mean_patch.dropna("time")
Notes
-----
This class behaves like pandas.rolling
Expand Down Expand Up @@ -280,6 +274,19 @@ def rolling(
[NaN, 1.0, 3.0]
if time = 3 * dt and step = 3 * dt
[NaN, 2.0]
Examples
--------
>>> import dascore as dc
>>>
>>> # Simple example for rolling mean function
>>> patch = dc.get_example_patch()
>>>
>>> # apply rolling over 1 second with 0.5 step
>>> mean_patch = patch.rolling(time=1, step=0.5).mean()
>>>
>>> # drop nan at the start of the time axis.
>>> out = mean_patch.dropna("time")
"""

def _get_engine(step, engine, patch):
Expand Down
3 changes: 1 addition & 2 deletions dascore/utils/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from dascore.core.coordmanager import merge_coord_managers
from dascore.exceptions import (
CoordDataError,
ParameterError,
PatchAttributeError,
PatchDimError,
)
Expand Down Expand Up @@ -422,7 +421,7 @@ def get_multiple_dim_value_from_kwargs(patch, kwargs):
overlap = set(dims) & set(kwargs)
if not overlap:
msg = "You must specify one or more dimension in keyword args."
raise ParameterError(msg)
raise PatchDimError(msg)
out = {}
for dim in overlap:
axis = patch.dims.index(dim)
Expand Down
125 changes: 125 additions & 0 deletions docs/recipes/smoothing.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
---
title: " Patch Smoothing"
execute:
warn: false
---

This recipe compares various smoothing strategies.

A few methods useful for smoothing are:

- [`Patch.rolling`](`dascore.Patch.rolling`)
- [`Patch.savgol_filter`](`dascore.Patch.savgol_filter`)
- [`Patch.gaussian_filter`](`dascore.Patch.gaussian_filter`)


:::{.callout-note}
[`Patch.rolling`](`dascore.Patch.rolling`) is quite powerful and can be used for filtering in a variety of ways. However, as mentioned in the [rolling section of the tutorial](/tutorial/processing.qmd#rolling), [`Patch.rolling`](`dascore.Patch.rolling`) includes `NaN` entries in the output due to edge effects. This can require some additional thought to properly deal with.

The other methods mentioned above generally deal with edge effects differently (governed by the `mode` parameter) and so don't have the same issue.
:::

## Example Data

We will use [example_event_2](`dascore.examples.example_event_2`) to demonstrate the effects of the smoothing methods.

```{python}
import numpy as np
import dascore as dc
patch = dc.get_example_patch("example_event_2")
ax = patch.viz.waterfall()
ax.set_title("un-smoothed patch");
```

## Rolling

[`Patch.rolling`](`dascore.Patch.rolling`) can be used for applying several different aggregation functions to rolling windows along one dimension of the `Patch` instance. For instance, to apply a rolling mean to the time axis:

```{python}
smoothed_patch = (
patch.rolling(time=0.005).mean()
)
ax = smoothed_patch.viz.waterfall()
ax.set_title("rolling time mean");
```

or a median filter to the distance axis:

```{python}
smoothed_patch = (
patch.rolling(distance=20, samples=True).median()
)
ax = smoothed_patch.viz.waterfall()
ax.set_title("rolling distance median");
```

Or both:
```{python}
smoothed_patch = (
patch.rolling(distance=20, samples=True).median()
.rolling(time=0.005).mean()
)
ax = smoothed_patch.viz.waterfall()
ax.set_title("rolling time mean distance median");
```

Again though, the output array has `NaN` entries along some of the edges:

```{python}
nan_data = np.isnan(smoothed_patch.data)
print(f"Number of NaN = {nan_data.sum()}")
# Create and plot a patch to highlight NaN values.
nan_patch = (
smoothed_patch.update(data=nan_data.astype(np.int32))
.update_attrs(data_type='', data_units=None)
)
ax = nan_patch.viz.waterfall()
ax.set_title("NaNs in Patch");
```

## Savgol filter

[`Patch.savgol_filter`](`dascore.Patch.savgol_filter`) uses [SciPy's savgol_filter](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html), which is an implementation of the [Savitzky-Golay Filter](https://en.wikipedia.org/wiki/Savitzky%E2%80%93Golay_filter) to apply smoothing along a single dimension.

```{python}
smoothed_patch = (
patch.savgol_filter(time=0.01, polyorder=3)
)
ax = smoothed_patch.viz.waterfall()
ax.set_title("savgol time");
```

Multidimensional smoothing is achieved by applying the 1D filter sequentially along each desired dimension.

```{python}
smoothed_patch = (
patch.savgol_filter(
time=25,
distance=25,
samples=True,
polyorder=2,
)
)
ax = smoothed_patch.viz.waterfall()
ax.set_title("savgol time and distance");
```

## Gaussian filter

[`Patch.gaussian_filter`](`dascore.Patch.gaussian_filter`) uses [SciPy's gaussian_filter](https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.gaussian_filter.html) to apply a gaussian smoothing kernel to a patch. Note that the keyword arguments here specify standard deviation, and the `

This comment has been minimized.

Copy link
@ariellellouch

ariellellouch Jan 6, 2024

Contributor

Seems like the line is cut in the middle


```{python}
smoothed_patch = (
patch.gaussian_filter(
time=6,
distance=6,
samples=True,
)
)
ax = smoothed_patch.viz.waterfall()
ax.set_title("gaussian time and distance");
```
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ dependencies = [
"pooch>=1.2",
"pydantic>2.1",
"rich",
"scipy>=1.10.0",
"scipy>=1.11.0",
"tables>=3.7",
"typing_extensions",
"pint",
Expand Down
1 change: 1 addition & 0 deletions scripts/_templates/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ website:
- recipes/parallelization.qmd
- recipes/edge_effects.qmd
- recipes/fk.qmd
- recipes/smoothing.qmd

- section: "IO"
contents:
Expand Down
Loading

0 comments on commit 87de2c5

Please sign in to comment.