ISISComputingGroup · jackbdoughty · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/doc/architectural_decisions/006-peak-stats.md b/doc/architectural_decisions/006-peak-stats.md
@@ -0,0 +1,21 @@
+# Using our own Centre of Mass Callback
+
+## Status
+
+Current
+
+## Context
+
+A decision needs to be made about whether to make changes to upstream Bluesky so that their `CoM` callback works for us, or we make our own.
+
+## Decision
+
+We will be making our own `CoM` callback.
+
+## Justification & Consequences
+
+We attempted to make changes to upstream Bluesky which were rejected, as it adds limits to the functionality of the callback. We also found other limitations with using their callback, such as not being able to have disordered and non-continuous data sent to it without it skewing the calculated value- we need it to work with disordered and non-continuous data as we need to be able to run continuous scans.
+
+This will mean that...
+- Our version of the callback will not be supported by Bluesky and may need changes as Bluesky updates.
+- We can have a version of the callback that is made bespokely for our use cases.
diff --git a/doc/fitting/peakstats.md b/doc/fitting/peakstats.md
@@ -0,0 +1,31 @@
+# PeakStats Callback
+
+Similar to [`LivePlot`](../callbacks/plotting.md), `ibex_bluesky_core` provides a thin wrapper around Bluesky's `PeakStats` class, swapping out their Centre of Mass (CoM) algorithm for one that is better suited to our use cases.
+
+`PeakStats` returns a dictionary of the following statistical properties... `com`, `cen`, `max`, `min`, `crossing` and `fwhm`. More info on this [here](https://blueskyproject.io/bluesky/main/callbacks.html#peakstats).
+
+In order to use the wrapper, import `PeakStats` from `ibex_bluesky_core` rather than 
+`bluesky` directly:
+```py
+from ibex_bluesky_core.callbacks.fitting import LiveFit
+```
+
+## Our CoM Algorithm
+
+Given non-continuous arrays of collected data `x` and `y`, ({py:obj}`ibex_bluesky_core.callbacks.fitting.center_of_mass`) returns the `x` value of the centre of mass.
+
+Our use cases require that our algorithm abides to the following rules:
+- Any background on data does not skew the centre of mass
+- The order in which data is received does not skew the centre of mass
+- Should support non-constant point spacing without skewing the centre of mass
+
+*Note that this is designed for only **positive** peaks.*
+
+### Step-by-step
+
+1) Sort `x` and `y` arrays in respect of `x` ascending. This is so that data can be received in any order.
+2) From each `y` element, subtract `min(y)`. This means that any constant background over data is ignored. (Does not work for negative peaks)
+3) Calculate weight for each point; based on it's `x` distances from neighbouring points. This ensures non-constant point spacing is accounted for in our calculation.
+4) `CoM` is calculated as ```{math}
+com_x = \frac{\sum_{}^{}x * y * \text{weight}}{\sum_{}^{}y * \text{weight}}
+```
diff --git a/src/ibex_bluesky_core/callbacks/fitting/__init__.py b/src/ibex_bluesky_core/callbacks/fitting/__init__.py
@@ -2,13 +2,15 @@
 
 import logging
 import warnings
-from typing import Callable
+from collections import namedtuple
+from typing import Any, Callable
 
 import lmfit
 import numpy as np
 import numpy.typing as npt
 from bluesky.callbacks import LiveFit as _DefaultLiveFit
 from bluesky.callbacks.core import make_class_safe
+from bluesky.callbacks.fitting import PeakStats as _DefaultPeakStats
 from event_model.documents.event import Event
 
 logger = logging.getLogger(__name__)
@@ -127,3 +129,57 @@ def update_fit(self) -> None:
                 self.ydata, weights=None if self.yerr is None else self.weight_data, **kwargs
             )
             self.__stale = False
+
+
+def center_of_mass(x: npt.NDArray[np.float64], y: npt.NDArray[np.float64]) -> float:
+    """Compute our own centre of mass.
+
+    Follow these rules:
+    Background does not skew CoM
+    Order of data does not skew CoM
+    Non constant point spacing does not skew CoM
+    Assumes that the peak is positive
+    """
+    # Offset points for any background
+    # Sort points in terms of x
+    arg_sorted = np.argsort(x)
+    x_sorted = np.take_along_axis(x, arg_sorted, axis=None)
+    y_sorted = np.take_along_axis(y - np.min(y), arg_sorted, axis=None)
+
+    # Each point has its own weight given by its distance to its neighbouring point
+    # Edge cases are calculated as x_1 - x_0 and x_-1 - x_-2
+
+    x_diff = np.diff(x_sorted)
+
+    weight = np.array([x_diff[0]])
+    weight = np.append(weight, (x_diff[1:] + x_diff[:-1]) / 2)
+    weight = np.append(weight, [x_diff[-1]])
+
+    weight /= np.max(weight)  # Normalise weights in terms of max(weights)
+
+    sum_xyw = np.sum(x_sorted * y_sorted * weight)  # Weighted CoM calculation
+    sum_yw = np.sum(y_sorted * weight)
+    com_x = sum_xyw / sum_yw
+
+    return com_x
+
+
+@make_class_safe(logger=logger)  # pyright: ignore (pyright doesn't understand this decorator)
+class PeakStats(_DefaultPeakStats):
+    """PeakStats, customized for IBEX."""
+
+    @staticmethod
+    def _calc_stats(
+        x: npt.NDArray[np.float64],
+        y: npt.NDArray[np.float64],
+        fields: dict[str, float | npt.NDArray[np.float64] | None],
+        edge_count: int | None = None,
+    ) -> Any:  # noqa: ANN401 Pyright will not understand as return type depends on arguments
+        """Call on bluesky PeakStats but calculate our own centre of mass."""
+        stats = _DefaultPeakStats._calc_stats(x, y, fields, edge_count)  # noqa: SLF001
+        (fields["com"],) = (center_of_mass(x, y),)
+        # This will calculate CoM twice, once for Bluesky's calc and one for us
+        # but keeps our value. Done this way for sleekness.
+        Stats = namedtuple("Stats", field_names=fields.keys())
+        stats = Stats(**fields)
+        return stats
diff --git a/tests/callbacks/fitting/test_centre_of_mass.py b/tests/callbacks/fitting/test_centre_of_mass.py
@@ -0,0 +1,114 @@
+import numpy as np
+import numpy.typing as npt
+import pytest
+
+from ibex_bluesky_core.callbacks.fitting import PeakStats
+
+# Tests:
+# Test with normal scan with gaussian data
+# Check that asymmetrical data does not skew CoM
+# Check that having a background on data does not skew CoM
+# Check that order of documents does not skew CoM
+# Check that point spacing does not skew CoM
+
+
+def gaussian(
+    x: npt.NDArray[np.float64], amp: float, sigma: float, x0: float, bg: float
+) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
+    return (x, amp * np.exp(-((x - x0) ** 2) / (2 * sigma**2)) + bg)
+
+
+def simulate_run_and_return_com(xy: tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]):
+    ps = PeakStats("x", "y")
+
+    ps.start({})  # pyright: ignore
+
+    for x, y in np.vstack(xy).T:
+        ps.event({"data": {"x": x, "y": y}})  # pyright: ignore
+
+    ps.stop({})  # pyright: ignore
+
+    return ps["com"]
+
+
+@pytest.mark.parametrize(
+    ("x", "amp", "sigma", "x0", "bg"),
+    [
+        (np.arange(-2, 3), 1, 1, 0, 0),
+        (np.arange(-4, 1), 1, 1, -2, 0),
+    ],
+)
+def test_normal_scan(x: npt.NDArray[np.float64], amp: float, sigma: float, x0: float, bg: float):
+    xy = gaussian(x, amp, sigma, x0, bg)
+    com = simulate_run_and_return_com(xy)
+    assert com == pytest.approx(x0, abs=1e-4)
+
+
+@pytest.mark.parametrize(
+    ("x", "amp", "sigma", "x0", "bg"),
+    [
+        (np.arange(-4, 10), 1, 1, 0, 0),
+        (np.arange(-6, 20), 1, 1, -2, 0),
+    ],
+)
+def test_asymmetrical_scan(
+    x: npt.NDArray[np.float64], amp: float, sigma: float, x0: float, bg: float
+):
+    xy = gaussian(x, amp, sigma, x0, bg)
+    com = simulate_run_and_return_com(xy)
+    assert com == pytest.approx(x0, abs=1e-4)
+
+
+@pytest.mark.parametrize(
+    ("x", "amp", "sigma", "x0", "bg"),
+    [
+        (np.arange(-2, 3), 1, 1, 0, 3),
+        (np.arange(-4, 1), 1, 1, -2, -0.5),
+        (np.arange(-4, 1), 1, 1, -2, -3),
+    ],
+)
+def test_background_gaussian_scan(
+    x: npt.NDArray[np.float64], amp: float, sigma: float, x0: float, bg: float
+):
+    xy = gaussian(x, amp, sigma, x0, bg)
+    com = simulate_run_and_return_com(xy)
+    assert com == pytest.approx(x0, abs=1e-4)
+
+
+@pytest.mark.parametrize(
+    ("x", "amp", "sigma", "x0", "bg"),
+    [
+        (np.array([0, -2, 2, -1, 1]), 1, 1, 0, 0),
+        (np.array([-4, 0, -2, -3, -1]), 1, 1, -2, 0),
+    ],
+)
+def test_non_continuous_scan(
+    x: npt.NDArray[np.float64], amp: float, sigma: float, x0: float, bg: float
+):
+    xy = gaussian(x, amp, sigma, x0, bg)
+    com = simulate_run_and_return_com(xy)
+    assert com == pytest.approx(x0, abs=1e-4)
+
+
+@pytest.mark.parametrize(
+    ("x", "amp", "sigma", "x0", "bg"),
+    [
+        (np.append(np.arange(-10, -2, 0.05), np.arange(-2, 4, 0.5)), 1, 0.5, 0, 0),
+        (
+            np.concatenate(
+                (np.arange(-5, -2.0, 0.5), np.arange(-2.5, -1.45, 0.05), np.arange(-1.5, 1, 0.5)),
+                axis=0,
+            ),
+            1,
+            0.25,
+            0,
+            0,
+        ),
+    ],
+)
+def test_non_constant_point_spacing_scan(
+    x: npt.NDArray[np.float64], amp: float, sigma: float, x0: float, bg: float
+):
+    xy = gaussian(x, amp, sigma, x0, bg)
+    com = simulate_run_and_return_com(xy)
+    assert com == pytest.approx(x0, abs=1e-3)