Fix logic in how metric states referencing is handled in MetricCollection (#2990)

SkafteNicki · mergify[bot] · web-flow · commit cafd7cff47f2 · 2025-03-13T10:31:05.000Z
* fix logic + tests

* changelog

* fix tests

---------

Co-authored-by: mergify[bot] &lt;37929162+mergify[bot]@users.noreply.github.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,6 +36,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- Fixed logic in how metric states referencing is handled in `MetricCollection` ([#2990](https://github.com/PyTorchLightning/metrics/pull/2990))
+
+
 - Fixed integration between classwise wrapper and metric tracker ([#3004](https://github.com/PyTorchLightning/metrics/pull/3004))
 
 
diff --git a/src/torchmetrics/collections.py b/src/torchmetrics/collections.py
@@ -247,10 +247,8 @@ def update(self, *args: Any, **kwargs: Any) -> None:
                 # only update the first member
                 m0 = getattr(self, cg[0])
                 m0.update(*args, **m0._filter_kwargs(**kwargs))
-            if self._state_is_copy:
-                # If we have deep copied state in between updates, reestablish link
-                self._compute_groups_create_state_ref()
-                self._state_is_copy = False
+            self._state_is_copy = False
+            self._compute_groups_create_state_ref()
         else:  # the first update always do per metric to form compute groups
             for m in self.values(copy_state=False):
                 m_kwargs = m._filter_kwargs(**kwargs)
@@ -259,6 +257,7 @@ def update(self, *args: Any, **kwargs: Any) -> None:
             if self._enable_compute_groups:
                 self._merge_compute_groups()
                 # create reference between states
+                self._state_is_copy = False
                 self._compute_groups_create_state_ref()
                 self._groups_checked = True
 
@@ -339,7 +338,7 @@ def _compute_groups_create_state_ref(self, copy: bool = False) -> None:
                 of just passed by reference
 
         """
-        if not self._state_is_copy and self._groups_checked:
+        if not self._state_is_copy:  # only create reference if not already copied
             for cg in self._groups.values():
                 m0 = getattr(self, cg[0])
                 for i in range(1, len(cg)):
diff --git a/tests/unittests/bases/test_collections.py b/tests/unittests/bases/test_collections.py
@@ -33,6 +33,7 @@
     MultilabelAUROC,
     MultilabelAveragePrecision,
 )
+from torchmetrics.regression import PearsonCorrCoef
 from torchmetrics.text import BLEUScore
 from torchmetrics.utilities.checks import _allclose_recursive
 from unittests._helpers import seed_all
@@ -328,30 +329,35 @@ def compute(self):
     "metrics, expected, preds, target",
     [
         # single metric forms its own compute group
-        (MulticlassAccuracy(num_classes=3), {0: ["MulticlassAccuracy"]}, _mc_preds, _mc_target),
+        pytest.param(
+            MulticlassAccuracy(num_classes=3), {0: ["MulticlassAccuracy"]}, _mc_preds, _mc_target, id="single_metric"
+        ),
         # two metrics of same class forms a compute group
-        (
+        pytest.param(
             {"acc0": MulticlassAccuracy(num_classes=3), "acc1": MulticlassAccuracy(num_classes=3)},
             {0: ["acc0", "acc1"]},
             _mc_preds,
             _mc_target,
+            id="two_metrics_of_same_class",
         ),
         # two metrics from registry forms a compute group
-        (
+        pytest.param(
             [MulticlassPrecision(num_classes=3), MulticlassRecall(num_classes=3)],
             {0: ["MulticlassPrecision", "MulticlassRecall"]},
             _mc_preds,
             _mc_target,
+            id="two_metrics_from_registry",
         ),
         # two metrics from different classes gives two compute groups
-        (
+        pytest.param(
             [MulticlassConfusionMatrix(num_classes=3), MulticlassRecall(num_classes=3)],
             {0: ["MulticlassConfusionMatrix"], 1: ["MulticlassRecall"]},
             _mc_preds,
             _mc_target,
+            id="two_metrics_from_different_classes",
         ),
         # multi group multi metric
-        (
+        pytest.param(
             [
                 MulticlassConfusionMatrix(num_classes=3),
                 MulticlassCohenKappa(num_classes=3),
@@ -361,9 +367,10 @@ def compute(self):
             {0: ["MulticlassConfusionMatrix", "MulticlassCohenKappa"], 1: ["MulticlassRecall", "MulticlassPrecision"]},
             _mc_preds,
             _mc_target,
+            id="multi_group_multi_metric",
         ),
         # Complex example
-        (
+        pytest.param(
             {
                 "acc": MulticlassAccuracy(num_classes=3),
                 "acc2": MulticlassAccuracy(num_classes=3),
@@ -375,19 +382,21 @@ def compute(self):
             {0: ["acc", "acc2", "f1", "recall"], 1: ["acc3"], 2: ["confmat"]},
             _mc_preds,
             _mc_target,
+            id="complex_example",
         ),
         # With list states
-        (
+        pytest.param(
             [
                 MulticlassAUROC(num_classes=3, average="macro"),
                 MulticlassAveragePrecision(num_classes=3, average="macro"),
             ],
             {0: ["MulticlassAUROC", "MulticlassAveragePrecision"]},
             _mc_preds,
             _mc_target,
+            id="with_list_states",
         ),
         # Nested collections
-        (
+        pytest.param(
             [
                 MetricCollection(
                     MultilabelAUROC(num_labels=3, average="micro"),
@@ -410,6 +419,7 @@ def compute(self):
             },
             _ml_preds,
             _ml_target,
+            id="nested_collections",
         ),
     ],
 )
@@ -796,3 +806,39 @@ def test_collection_update():
 
     for k, v in expected.items():
         torch.testing.assert_close(actual=actual.get(k), expected=v, rtol=1e-4, atol=1e-4)
+
+
+def test_collection_state_being_re_established_after_copy():
+    """Check that shared metrics states when using compute groups are re-established after a copy.
+
+    See issue: https://github.com/Lightning-AI/torchmetrics/issues/2896
+
+    """
+    m1, m2 = PearsonCorrCoef(), PearsonCorrCoef()
+    m12 = MetricCollection({"m1": m1, "m2": m2}, compute_groups=True)
+    x1, y1 = torch.randn(100), torch.randn(100)
+    m12.update(x1, y1)
+    assert m12.compute_groups == {0: ["m1", "m2"]}
+
+    # Check that the states are pointing to the same location
+    assert not m12._state_is_copy
+    assert m12.m1.mean_x.data_ptr() == m12.m2.mean_x.data_ptr(), "States should point to the same location"
+
+    # Break the references between the states
+    _ = m12.items()
+    assert m12._state_is_copy
+    assert m12.m1.mean_x.data_ptr() != m12.m2.mean_x.data_ptr(), "States should not point to the same location"
+
+    # Update should restore the references between the states
+    x2, y2 = torch.randn(100), torch.randn(100)
+
+    m12.update(x2, y2)
+    assert not m12._state_is_copy
+    assert m12.m1.mean_x.data_ptr() == m12.m2.mean_x.data_ptr(), "States should point to the same location"
+
+    x3, y3 = torch.randn(100), torch.randn(100)
+    m12.update(x3, y3)
+
+    assert not m12._state_is_copy
+    assert m12.m1.mean_x.data_ptr() == m12.m2.mean_x.data_ptr(), "States should point to the same location"
+    assert m12._equal_metric_states(m12.m1, m12.m2)