From 5fd675cbea997d25d83910baa8d55b11d272ef6a Mon Sep 17 00:00:00 2001
From: Alan Chu <alanchu@Alans-Air.lan>
Date: Sun, 17 Nov 2024 22:51:00 -0800
Subject: [PATCH 1/8] call configure_module before freeze_before_training

---
 src/lightning/pytorch/callbacks/finetuning.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/lightning/pytorch/callbacks/finetuning.py b/src/lightning/pytorch/callbacks/finetuning.py
index 46a90986c091c..308e9b8dc2c82 100644
--- a/src/lightning/pytorch/callbacks/finetuning.py
+++ b/src/lightning/pytorch/callbacks/finetuning.py
@@ -274,6 +274,9 @@ def unfreeze_and_add_param_group(
 
     @override
     def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: str) -> None:
+        if hasattr(pl_module, "configure_model"):
+            pl_module.configure_model()
+
         self.freeze_before_training(pl_module)
 
         from lightning.pytorch.strategies import DeepSpeedStrategy

From 90ff8f0552ee4adff93c8beccef87d41686b6c76 Mon Sep 17 00:00:00 2001
From: Alan Chu <alanchu@Alans-MacBook-Air.local>
Date: Wed, 20 Nov 2024 17:02:51 -0800
Subject: [PATCH 2/8] remove bad fix

---
 src/lightning/pytorch/callbacks/finetuning.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/lightning/pytorch/callbacks/finetuning.py b/src/lightning/pytorch/callbacks/finetuning.py
index 308e9b8dc2c82..46a90986c091c 100644
--- a/src/lightning/pytorch/callbacks/finetuning.py
+++ b/src/lightning/pytorch/callbacks/finetuning.py
@@ -274,9 +274,6 @@ def unfreeze_and_add_param_group(
 
     @override
     def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: str) -> None:
-        if hasattr(pl_module, "configure_model"):
-            pl_module.configure_model()
-
         self.freeze_before_training(pl_module)
 
         from lightning.pytorch.strategies import DeepSpeedStrategy

From a205c4ad068bc3c47c6b9d7f7e72dca3a49860c1 Mon Sep 17 00:00:00 2001
From: Alan Chu <alanchu@Alans-Air.lan>
Date: Fri, 22 Nov 2024 02:38:31 -0800
Subject: [PATCH 3/8] second fix and test case

---
 src/lightning/pytorch/trainer/trainer.py      |  7 +-
 .../callbacks/test_finetuning_callback.py     | 64 +++++++++++++++++--
 2 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/src/lightning/pytorch/trainer/trainer.py b/src/lightning/pytorch/trainer/trainer.py
index 23db90fd45b38..210d63bea6b12 100644
--- a/src/lightning/pytorch/trainer/trainer.py
+++ b/src/lightning/pytorch/trainer/trainer.py
@@ -28,10 +28,8 @@
 from typing import Any, Dict, Generator, Iterable, List, Optional, Union
 from weakref import proxy
 
-import torch
-from torch.optim import Optimizer
-
 import lightning.pytorch as pl
+import torch
 from lightning.fabric.utilities.apply_func import convert_tensors_to_scalars
 from lightning.fabric.utilities.cloud_io import _is_local_file_protocol
 from lightning.fabric.utilities.types import _PATH
@@ -79,6 +77,7 @@
     LRSchedulerConfig,
 )
 from lightning.pytorch.utilities.warnings import PossibleUserWarning
+from torch.optim import Optimizer
 
 log = logging.getLogger(__name__)
 
@@ -940,9 +939,9 @@ def _run(
         log.debug(f"{self.__class__.__name__}: preparing data")
         self._data_connector.prepare_data()
 
-        call._call_setup_hook(self)  # allow user to set up LightningModule in accelerator environment
         log.debug(f"{self.__class__.__name__}: configuring model")
         call._call_configure_model(self)
+        call._call_setup_hook(self)  # allow user to set up LightningModule in accelerator environment
 
         # check if we should delay restoring checkpoint till later
         if not self.strategy.restore_checkpoint_after_setup:
diff --git a/tests/tests_pytorch/callbacks/test_finetuning_callback.py b/tests/tests_pytorch/callbacks/test_finetuning_callback.py
index 0c09ae5d5042a..519246a9ed0ce 100644
--- a/tests/tests_pytorch/callbacks/test_finetuning_callback.py
+++ b/tests/tests_pytorch/callbacks/test_finetuning_callback.py
@@ -19,12 +19,11 @@
 from lightning.pytorch import LightningModule, Trainer, seed_everything
 from lightning.pytorch.callbacks import BackboneFinetuning, BaseFinetuning, ModelCheckpoint
 from lightning.pytorch.demos.boring_classes import BoringModel, RandomDataset
+from tests_pytorch.helpers.runif import RunIf
 from torch import nn
 from torch.optim import SGD, Optimizer
 from torch.utils.data import DataLoader
 
-from tests_pytorch.helpers.runif import RunIf
-
 
 class TestBackboneFinetuningCallback(BackboneFinetuning):
     def on_train_epoch_start(self, trainer, pl_module):
@@ -283,10 +282,12 @@ def test_complex_nested_model():
     directly themselves rather than exclusively their submodules containing parameters."""
 
     model = nn.Sequential(
-        OrderedDict([
-            ("encoder", nn.Sequential(ConvBlockParam(3, 64), ConvBlock(64, 128))),
-            ("decoder", ConvBlock(128, 10)),
-        ])
+        OrderedDict(
+            [
+                ("encoder", nn.Sequential(ConvBlockParam(3, 64), ConvBlock(64, 128))),
+                ("decoder", ConvBlock(128, 10)),
+            ]
+        )
     )
 
     # There are 10 leaf modules or parent modules w/ parameters in the test model
@@ -346,6 +347,8 @@ def test_callbacks_restore(tmp_path):
     assert len(callback._internal_optimizer_metadata) == 1
 
     # only 2 param groups
+    print("##########")
+    print(callback._internal_optimizer_metadata[0])
     assert len(callback._internal_optimizer_metadata[0]) == 2
 
     # original parameters
@@ -431,3 +434,52 @@ def test_unsupported_strategies(tmp_path):
     trainer = Trainer(accelerator="cpu", strategy="deepspeed", callbacks=[callback])
     with pytest.raises(NotImplementedError, match="does not support running with the DeepSpeed strategy"):
         callback.setup(trainer, model, stage=None)
+
+
+def test_finetuning_with_configure_model(tmp_path):
+    """Test that BaseFinetuning works correctly with configure_model by ensuring freeze_before_training
+    is called after configure_model but before training starts."""
+
+    class TrackingFinetuningCallback(BaseFinetuning):
+        def __init__(self):
+            super().__init__()
+
+        def freeze_before_training(self, pl_module):
+            assert hasattr(pl_module, "backbone"), "backbone should be configured before freezing"
+            self.freeze(pl_module.backbone)
+
+        def finetune_function(self, pl_module, epoch, optimizer):
+            pass
+
+    class TestModel(LightningModule):
+        def __init__(self):
+            super().__init__()
+            self.configure_model_called_count = 0
+
+        def configure_model(self):
+            self.backbone = nn.Linear(32, 32)
+            self.classifier = nn.Linear(32, 2)
+            self.configure_model_called_count += 1
+
+        def forward(self, x):
+            x = self.backbone(x)
+            return self.classifier(x)
+
+        def training_step(self, batch, batch_idx):
+            return self.forward(batch).sum()
+
+        def configure_optimizers(self):
+            return torch.optim.SGD(self.parameters(), lr=0.1)
+
+    print("start of the test")
+    model = TestModel()
+    callback = TrackingFinetuningCallback()
+    trainer = Trainer(
+        default_root_dir=tmp_path,
+        callbacks=[callback],
+        max_epochs=1,
+        limit_train_batches=1,
+    )
+
+    trainer.fit(model, torch.randn(10, 32))
+    assert model.configure_model_called_count == 1

From ef35dca9042bd5b79eead9b1fed765d1e3b89804 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 22 Nov 2024 10:40:00 +0000
Subject: [PATCH 4/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/lightning/pytorch/trainer/trainer.py        |  5 +++--
 .../callbacks/test_finetuning_callback.py       | 17 ++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/lightning/pytorch/trainer/trainer.py b/src/lightning/pytorch/trainer/trainer.py
index 210d63bea6b12..0fbb753203a1b 100644
--- a/src/lightning/pytorch/trainer/trainer.py
+++ b/src/lightning/pytorch/trainer/trainer.py
@@ -28,8 +28,10 @@
 from typing import Any, Dict, Generator, Iterable, List, Optional, Union
 from weakref import proxy
 
-import lightning.pytorch as pl
 import torch
+from torch.optim import Optimizer
+
+import lightning.pytorch as pl
 from lightning.fabric.utilities.apply_func import convert_tensors_to_scalars
 from lightning.fabric.utilities.cloud_io import _is_local_file_protocol
 from lightning.fabric.utilities.types import _PATH
@@ -77,7 +79,6 @@
     LRSchedulerConfig,
 )
 from lightning.pytorch.utilities.warnings import PossibleUserWarning
-from torch.optim import Optimizer
 
 log = logging.getLogger(__name__)
 
diff --git a/tests/tests_pytorch/callbacks/test_finetuning_callback.py b/tests/tests_pytorch/callbacks/test_finetuning_callback.py
index 519246a9ed0ce..4d7e133e63e68 100644
--- a/tests/tests_pytorch/callbacks/test_finetuning_callback.py
+++ b/tests/tests_pytorch/callbacks/test_finetuning_callback.py
@@ -19,11 +19,12 @@
 from lightning.pytorch import LightningModule, Trainer, seed_everything
 from lightning.pytorch.callbacks import BackboneFinetuning, BaseFinetuning, ModelCheckpoint
 from lightning.pytorch.demos.boring_classes import BoringModel, RandomDataset
-from tests_pytorch.helpers.runif import RunIf
 from torch import nn
 from torch.optim import SGD, Optimizer
 from torch.utils.data import DataLoader
 
+from tests_pytorch.helpers.runif import RunIf
+
 
 class TestBackboneFinetuningCallback(BackboneFinetuning):
     def on_train_epoch_start(self, trainer, pl_module):
@@ -282,12 +283,10 @@ def test_complex_nested_model():
     directly themselves rather than exclusively their submodules containing parameters."""
 
     model = nn.Sequential(
-        OrderedDict(
-            [
-                ("encoder", nn.Sequential(ConvBlockParam(3, 64), ConvBlock(64, 128))),
-                ("decoder", ConvBlock(128, 10)),
-            ]
-        )
+        OrderedDict([
+            ("encoder", nn.Sequential(ConvBlockParam(3, 64), ConvBlock(64, 128))),
+            ("decoder", ConvBlock(128, 10)),
+        ])
     )
 
     # There are 10 leaf modules or parent modules w/ parameters in the test model
@@ -437,8 +436,8 @@ def test_unsupported_strategies(tmp_path):
 
 
 def test_finetuning_with_configure_model(tmp_path):
-    """Test that BaseFinetuning works correctly with configure_model by ensuring freeze_before_training
-    is called after configure_model but before training starts."""
+    """Test that BaseFinetuning works correctly with configure_model by ensuring freeze_before_training is called after
+    configure_model but before training starts."""
 
     class TrackingFinetuningCallback(BaseFinetuning):
         def __init__(self):

From 56d05a347c50097c238c674793c4612956dd48a6 Mon Sep 17 00:00:00 2001
From: Alan Chu <alanchu@Alans-Air.lan>
Date: Fri, 22 Nov 2024 02:42:05 -0800
Subject: [PATCH 5/8] remove print statement

---
 .../callbacks/test_finetuning_callback.py        | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/tests/tests_pytorch/callbacks/test_finetuning_callback.py b/tests/tests_pytorch/callbacks/test_finetuning_callback.py
index 4d7e133e63e68..d7e5bb304dfa8 100644
--- a/tests/tests_pytorch/callbacks/test_finetuning_callback.py
+++ b/tests/tests_pytorch/callbacks/test_finetuning_callback.py
@@ -19,12 +19,11 @@
 from lightning.pytorch import LightningModule, Trainer, seed_everything
 from lightning.pytorch.callbacks import BackboneFinetuning, BaseFinetuning, ModelCheckpoint
 from lightning.pytorch.demos.boring_classes import BoringModel, RandomDataset
+from tests_pytorch.helpers.runif import RunIf
 from torch import nn
 from torch.optim import SGD, Optimizer
 from torch.utils.data import DataLoader
 
-from tests_pytorch.helpers.runif import RunIf
-
 
 class TestBackboneFinetuningCallback(BackboneFinetuning):
     def on_train_epoch_start(self, trainer, pl_module):
@@ -283,10 +282,12 @@ def test_complex_nested_model():
     directly themselves rather than exclusively their submodules containing parameters."""
 
     model = nn.Sequential(
-        OrderedDict([
-            ("encoder", nn.Sequential(ConvBlockParam(3, 64), ConvBlock(64, 128))),
-            ("decoder", ConvBlock(128, 10)),
-        ])
+        OrderedDict(
+            [
+                ("encoder", nn.Sequential(ConvBlockParam(3, 64), ConvBlock(64, 128))),
+                ("decoder", ConvBlock(128, 10)),
+            ]
+        )
     )
 
     # There are 10 leaf modules or parent modules w/ parameters in the test model
@@ -346,8 +347,6 @@ def test_callbacks_restore(tmp_path):
     assert len(callback._internal_optimizer_metadata) == 1
 
     # only 2 param groups
-    print("##########")
-    print(callback._internal_optimizer_metadata[0])
     assert len(callback._internal_optimizer_metadata[0]) == 2
 
     # original parameters
@@ -470,7 +469,6 @@ def training_step(self, batch, batch_idx):
         def configure_optimizers(self):
             return torch.optim.SGD(self.parameters(), lr=0.1)
 
-    print("start of the test")
     model = TestModel()
     callback = TrackingFinetuningCallback()
     trainer = Trainer(

From 1c040d721b8197ee2e1b9f72906fdc4eb6d00637 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 22 Nov 2024 10:43:10 +0000
Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../callbacks/test_finetuning_callback.py           | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tests/tests_pytorch/callbacks/test_finetuning_callback.py b/tests/tests_pytorch/callbacks/test_finetuning_callback.py
index d7e5bb304dfa8..dde5924fda394 100644
--- a/tests/tests_pytorch/callbacks/test_finetuning_callback.py
+++ b/tests/tests_pytorch/callbacks/test_finetuning_callback.py
@@ -19,11 +19,12 @@
 from lightning.pytorch import LightningModule, Trainer, seed_everything
 from lightning.pytorch.callbacks import BackboneFinetuning, BaseFinetuning, ModelCheckpoint
 from lightning.pytorch.demos.boring_classes import BoringModel, RandomDataset
-from tests_pytorch.helpers.runif import RunIf
 from torch import nn
 from torch.optim import SGD, Optimizer
 from torch.utils.data import DataLoader
 
+from tests_pytorch.helpers.runif import RunIf
+
 
 class TestBackboneFinetuningCallback(BackboneFinetuning):
     def on_train_epoch_start(self, trainer, pl_module):
@@ -282,12 +283,10 @@ def test_complex_nested_model():
     directly themselves rather than exclusively their submodules containing parameters."""
 
     model = nn.Sequential(
-        OrderedDict(
-            [
-                ("encoder", nn.Sequential(ConvBlockParam(3, 64), ConvBlock(64, 128))),
-                ("decoder", ConvBlock(128, 10)),
-            ]
-        )
+        OrderedDict([
+            ("encoder", nn.Sequential(ConvBlockParam(3, 64), ConvBlock(64, 128))),
+            ("decoder", ConvBlock(128, 10)),
+        ])
     )
 
     # There are 10 leaf modules or parent modules w/ parameters in the test model

From 8ba644ad6ed623b9c5df7c6b7886470d16aec521 Mon Sep 17 00:00:00 2001
From: Alan Chu <alanchu@Alans-Air.Home>
Date: Mon, 25 Nov 2024 18:32:16 +0000
Subject: [PATCH 7/8] change assertion order for setup() and configure_model()
 in test_hooks.py

---
 tests/tests_pytorch/models/test_hooks.py | 197 ++++++++++++-----------
 1 file changed, 102 insertions(+), 95 deletions(-)

diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py
index 685bd6c0bdaef..da39d44f6b131 100644
--- a/tests/tests_pytorch/models/test_hooks.py
+++ b/tests/tests_pytorch/models/test_hooks.py
@@ -21,11 +21,10 @@
 from lightning.pytorch import Callback, LightningDataModule, LightningModule, Trainer, __version__
 from lightning.pytorch.demos.boring_classes import BoringDataModule, BoringModel, RandomDataset
 from lightning.pytorch.utilities.model_helpers import is_overridden
+from tests_pytorch.helpers.runif import RunIf
 from torch import Tensor
 from torch.utils.data import DataLoader
 
-from tests_pytorch.helpers.runif import RunIf
-
 
 class HookedDataModule(BoringDataModule):
     def __init__(self, called):
@@ -267,50 +266,52 @@ def _auto_train_batch(trainer, model, batches, device, current_epoch=0, current_
         using_deepspeed = kwargs.get("strategy") == "deepspeed"
         out = []
         for i in range(current_batch, batches):
-            out.extend([
-                {"name": "on_before_batch_transfer", "args": (ANY, 0)},
-                {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
-                {"name": "on_after_batch_transfer", "args": (ANY, 0)},
-                {"name": "Callback.on_train_batch_start", "args": (trainer, model, ANY, i)},
-                {"name": "on_train_batch_start", "args": (ANY, i)},
-                {"name": "forward", "args": (ANY,)},
-                {"name": "training_step", "args": (ANY, i)},
-                {"name": "Callback.on_before_zero_grad", "args": (trainer, model, ANY)},
-                {"name": "on_before_zero_grad", "args": (ANY,)},
-                {"name": "optimizer_zero_grad", "args": (current_epoch, i, ANY)},
-                {"name": "Callback.on_before_backward", "args": (trainer, model, ANY)},
-                {"name": "on_before_backward", "args": (ANY,)},
-                # DeepSpeed handles backward internally
-                *([{"name": "backward", "args": (ANY,)}] if not using_deepspeed else []),
-                {"name": "Callback.on_after_backward", "args": (trainer, model)},
-                {"name": "on_after_backward"},
-                # note: unscaling happens here in the case of AMP
-                {"name": "Callback.on_before_optimizer_step", "args": (trainer, model, ANY)},
-                {"name": "on_before_optimizer_step", "args": (ANY,)},
-                {
-                    "name": "clip_gradients",
-                    "args": (ANY,),
-                    "kwargs": {"gradient_clip_val": None, "gradient_clip_algorithm": None},
-                },
-                {
-                    "name": "configure_gradient_clipping",
-                    "args": (ANY,),
-                    "kwargs": {"gradient_clip_val": None, "gradient_clip_algorithm": None},
-                },
-                # this is after because it refers to the `LightningModule.optimizer_step` hook which encapsulates
-                # the actual call to `Precision.optimizer_step`
-                {
-                    "name": "optimizer_step",
-                    "args": (current_epoch, i, ANY, ANY),
-                },
-                *(
-                    [{"name": "lr_scheduler_step", "args": (ANY, None)}]
-                    if i == (trainer.num_training_batches - 1)
-                    else []
-                ),
-                {"name": "Callback.on_train_batch_end", "args": (trainer, model, {"loss": ANY}, ANY, i)},
-                {"name": "on_train_batch_end", "args": ({"loss": ANY}, ANY, i)},
-            ])
+            out.extend(
+                [
+                    {"name": "on_before_batch_transfer", "args": (ANY, 0)},
+                    {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
+                    {"name": "on_after_batch_transfer", "args": (ANY, 0)},
+                    {"name": "Callback.on_train_batch_start", "args": (trainer, model, ANY, i)},
+                    {"name": "on_train_batch_start", "args": (ANY, i)},
+                    {"name": "forward", "args": (ANY,)},
+                    {"name": "training_step", "args": (ANY, i)},
+                    {"name": "Callback.on_before_zero_grad", "args": (trainer, model, ANY)},
+                    {"name": "on_before_zero_grad", "args": (ANY,)},
+                    {"name": "optimizer_zero_grad", "args": (current_epoch, i, ANY)},
+                    {"name": "Callback.on_before_backward", "args": (trainer, model, ANY)},
+                    {"name": "on_before_backward", "args": (ANY,)},
+                    # DeepSpeed handles backward internally
+                    *([{"name": "backward", "args": (ANY,)}] if not using_deepspeed else []),
+                    {"name": "Callback.on_after_backward", "args": (trainer, model)},
+                    {"name": "on_after_backward"},
+                    # note: unscaling happens here in the case of AMP
+                    {"name": "Callback.on_before_optimizer_step", "args": (trainer, model, ANY)},
+                    {"name": "on_before_optimizer_step", "args": (ANY,)},
+                    {
+                        "name": "clip_gradients",
+                        "args": (ANY,),
+                        "kwargs": {"gradient_clip_val": None, "gradient_clip_algorithm": None},
+                    },
+                    {
+                        "name": "configure_gradient_clipping",
+                        "args": (ANY,),
+                        "kwargs": {"gradient_clip_val": None, "gradient_clip_algorithm": None},
+                    },
+                    # this is after because it refers to the `LightningModule.optimizer_step` hook which encapsulates
+                    # the actual call to `Precision.optimizer_step`
+                    {
+                        "name": "optimizer_step",
+                        "args": (current_epoch, i, ANY, ANY),
+                    },
+                    *(
+                        [{"name": "lr_scheduler_step", "args": (ANY, None)}]
+                        if i == (trainer.num_training_batches - 1)
+                        else []
+                    ),
+                    {"name": "Callback.on_train_batch_end", "args": (trainer, model, {"loss": ANY}, ANY, i)},
+                    {"name": "on_train_batch_end", "args": ({"loss": ANY}, ANY, i)},
+                ]
+            )
         return out
 
     @staticmethod
@@ -318,28 +319,30 @@ def _manual_train_batch(trainer, model, batches, device, **kwargs):
         using_deepspeed = kwargs.get("strategy") == "deepspeed"
         out = []
         for i in range(batches):
-            out.extend([
-                {"name": "on_before_batch_transfer", "args": (ANY, 0)},
-                {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
-                {"name": "on_after_batch_transfer", "args": (ANY, 0)},
-                {"name": "Callback.on_train_batch_start", "args": (trainer, model, ANY, i)},
-                {"name": "on_train_batch_start", "args": (ANY, i)},
-                {"name": "forward", "args": (ANY,)},
-                {"name": "Callback.on_before_backward", "args": (trainer, model, ANY)},
-                {"name": "on_before_backward", "args": (ANY,)},
-                # DeepSpeed handles backward internally
-                *([{"name": "backward", "args": (ANY,)}] if not using_deepspeed else []),
-                {"name": "Callback.on_after_backward", "args": (trainer, model)},
-                {"name": "on_after_backward"},
-                # `manual_backward` calls the previous 3
-                {"name": "manual_backward", "args": (ANY,)},
-                {"name": "closure"},
-                {"name": "Callback.on_before_optimizer_step", "args": (trainer, model, ANY)},
-                {"name": "on_before_optimizer_step", "args": (ANY,)},
-                {"name": "training_step", "args": (ANY, i)},
-                {"name": "Callback.on_train_batch_end", "args": (trainer, model, {"loss": ANY}, ANY, i)},
-                {"name": "on_train_batch_end", "args": ({"loss": ANY}, ANY, i)},
-            ])
+            out.extend(
+                [
+                    {"name": "on_before_batch_transfer", "args": (ANY, 0)},
+                    {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
+                    {"name": "on_after_batch_transfer", "args": (ANY, 0)},
+                    {"name": "Callback.on_train_batch_start", "args": (trainer, model, ANY, i)},
+                    {"name": "on_train_batch_start", "args": (ANY, i)},
+                    {"name": "forward", "args": (ANY,)},
+                    {"name": "Callback.on_before_backward", "args": (trainer, model, ANY)},
+                    {"name": "on_before_backward", "args": (ANY,)},
+                    # DeepSpeed handles backward internally
+                    *([{"name": "backward", "args": (ANY,)}] if not using_deepspeed else []),
+                    {"name": "Callback.on_after_backward", "args": (trainer, model)},
+                    {"name": "on_after_backward"},
+                    # `manual_backward` calls the previous 3
+                    {"name": "manual_backward", "args": (ANY,)},
+                    {"name": "closure"},
+                    {"name": "Callback.on_before_optimizer_step", "args": (trainer, model, ANY)},
+                    {"name": "on_before_optimizer_step", "args": (ANY,)},
+                    {"name": "training_step", "args": (ANY, i)},
+                    {"name": "Callback.on_train_batch_end", "args": (trainer, model, {"loss": ANY}, ANY, i)},
+                    {"name": "on_train_batch_end", "args": ({"loss": ANY}, ANY, i)},
+                ]
+            )
         return out
 
     @staticmethod
@@ -357,34 +360,38 @@ def _eval_batch(fn, trainer, model, batches, key, device):
         out = []
         outputs = {key: ANY}
         for i in range(batches):
-            out.extend([
-                {"name": "on_before_batch_transfer", "args": (ANY, 0)},
-                {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
-                {"name": "on_after_batch_transfer", "args": (ANY, 0)},
-                {"name": f"Callback.on_{fn}_batch_start", "args": (trainer, model, ANY, i)},
-                {"name": f"on_{fn}_batch_start", "args": (ANY, i)},
-                {"name": "forward", "args": (ANY,)},
-                {"name": f"{fn}_step", "args": (ANY, i)},
-                {"name": f"Callback.on_{fn}_batch_end", "args": (trainer, model, outputs, ANY, i)},
-                {"name": f"on_{fn}_batch_end", "args": (outputs, ANY, i)},
-            ])
+            out.extend(
+                [
+                    {"name": "on_before_batch_transfer", "args": (ANY, 0)},
+                    {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
+                    {"name": "on_after_batch_transfer", "args": (ANY, 0)},
+                    {"name": f"Callback.on_{fn}_batch_start", "args": (trainer, model, ANY, i)},
+                    {"name": f"on_{fn}_batch_start", "args": (ANY, i)},
+                    {"name": "forward", "args": (ANY,)},
+                    {"name": f"{fn}_step", "args": (ANY, i)},
+                    {"name": f"Callback.on_{fn}_batch_end", "args": (trainer, model, outputs, ANY, i)},
+                    {"name": f"on_{fn}_batch_end", "args": (outputs, ANY, i)},
+                ]
+            )
         return out
 
     @staticmethod
     def _predict_batch(trainer, model, batches, device):
         out = []
         for i in range(batches):
-            out.extend([
-                {"name": "on_before_batch_transfer", "args": (ANY, 0)},
-                {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
-                {"name": "on_after_batch_transfer", "args": (ANY, 0)},
-                {"name": "Callback.on_predict_batch_start", "args": (trainer, model, ANY, i)},
-                {"name": "on_predict_batch_start", "args": (ANY, i)},
-                {"name": "forward", "args": (ANY,)},
-                {"name": "predict_step", "args": (ANY, i)},
-                {"name": "Callback.on_predict_batch_end", "args": (trainer, model, ANY, ANY, i)},
-                {"name": "on_predict_batch_end", "args": (ANY, ANY, i)},
-            ])
+            out.extend(
+                [
+                    {"name": "on_before_batch_transfer", "args": (ANY, 0)},
+                    {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
+                    {"name": "on_after_batch_transfer", "args": (ANY, 0)},
+                    {"name": "Callback.on_predict_batch_start", "args": (trainer, model, ANY, i)},
+                    {"name": "on_predict_batch_start", "args": (ANY, i)},
+                    {"name": "forward", "args": (ANY,)},
+                    {"name": "predict_step", "args": (ANY, i)},
+                    {"name": "Callback.on_predict_batch_end", "args": (trainer, model, ANY, ANY, i)},
+                    {"name": "on_predict_batch_end", "args": (ANY, ANY, i)},
+                ]
+            )
         return out
 
     # override so that it gets called
@@ -472,11 +479,11 @@ def training_step(self, batch, batch_idx):
     expected = [
         {"name": "configure_callbacks"},
         {"name": "prepare_data"},
+        {"name": "configure_model"},
         {"name": "Callback.setup", "args": (trainer, model), "kwargs": {"stage": "fit"}},
         {"name": "setup", "kwargs": {"stage": "fit"}},
         # DeepSpeed needs the batch size to figure out throughput logging
         *([{"name": "train_dataloader"}] if using_deepspeed else []),
-        {"name": "configure_model"},
         {"name": "configure_optimizers"},
         {"name": "Callback.on_fit_start", "args": (trainer, model)},
         {"name": "on_fit_start"},
@@ -569,9 +576,9 @@ def test_trainer_model_hook_system_fit_no_val_and_resume_max_epochs(tmp_path):
     expected = [
         {"name": "configure_callbacks"},
         {"name": "prepare_data"},
+        {"name": "configure_model"},
         {"name": "Callback.setup", "args": (trainer, model), "kwargs": {"stage": "fit"}},
         {"name": "setup", "kwargs": {"stage": "fit"}},
-        {"name": "configure_model"},
         {"name": "on_load_checkpoint", "args": (loaded_ckpt,)},
         {"name": "Callback.on_load_checkpoint", "args": (trainer, model, loaded_ckpt)},
         {"name": "Callback.load_state_dict", "args": ({"foo": True},)},
@@ -647,9 +654,9 @@ def test_trainer_model_hook_system_fit_no_val_and_resume_max_steps(tmp_path):
     expected = [
         {"name": "configure_callbacks"},
         {"name": "prepare_data"},
+        {"name": "configure_model"},
         {"name": "Callback.setup", "args": (trainer, model), "kwargs": {"stage": "fit"}},
         {"name": "setup", "kwargs": {"stage": "fit"}},
-        {"name": "configure_model"},
         {"name": "on_load_checkpoint", "args": (loaded_ckpt,)},
         {"name": "Callback.on_load_checkpoint", "args": (trainer, model, loaded_ckpt)},
         {"name": "Callback.load_state_dict", "args": ({"foo": True},)},
@@ -714,9 +721,9 @@ def test_trainer_model_hook_system_eval(tmp_path, override_on_x_model_train, bat
     expected = [
         {"name": "configure_callbacks"},
         {"name": "prepare_data"},
+        {"name": "configure_model"},
         {"name": "Callback.setup", "args": (trainer, model), "kwargs": {"stage": verb}},
         {"name": "setup", "kwargs": {"stage": verb}},
-        {"name": "configure_model"},
         {"name": "zero_grad"},
         *(hooks if batches else []),
         {"name": "Callback.teardown", "args": (trainer, model), "kwargs": {"stage": verb}},
@@ -737,9 +744,9 @@ def test_trainer_model_hook_system_predict(tmp_path):
     expected = [
         {"name": "configure_callbacks"},
         {"name": "prepare_data"},
+        {"name": "configure_model"},
         {"name": "Callback.setup", "args": (trainer, model), "kwargs": {"stage": "predict"}},
         {"name": "setup", "kwargs": {"stage": "predict"}},
-        {"name": "configure_model"},
         {"name": "zero_grad"},
         {"name": "predict_dataloader"},
         {"name": "train", "args": (False,)},

From 1d8ef667d3c9c67c6b84bcac1c9bfd297d65a385 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 25 Nov 2024 18:32:38 +0000
Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/tests_pytorch/models/test_hooks.py | 187 +++++++++++------------
 1 file changed, 90 insertions(+), 97 deletions(-)

diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py
index da39d44f6b131..4d7f0ec114fc9 100644
--- a/tests/tests_pytorch/models/test_hooks.py
+++ b/tests/tests_pytorch/models/test_hooks.py
@@ -21,10 +21,11 @@
 from lightning.pytorch import Callback, LightningDataModule, LightningModule, Trainer, __version__
 from lightning.pytorch.demos.boring_classes import BoringDataModule, BoringModel, RandomDataset
 from lightning.pytorch.utilities.model_helpers import is_overridden
-from tests_pytorch.helpers.runif import RunIf
 from torch import Tensor
 from torch.utils.data import DataLoader
 
+from tests_pytorch.helpers.runif import RunIf
+
 
 class HookedDataModule(BoringDataModule):
     def __init__(self, called):
@@ -266,52 +267,50 @@ def _auto_train_batch(trainer, model, batches, device, current_epoch=0, current_
         using_deepspeed = kwargs.get("strategy") == "deepspeed"
         out = []
         for i in range(current_batch, batches):
-            out.extend(
-                [
-                    {"name": "on_before_batch_transfer", "args": (ANY, 0)},
-                    {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
-                    {"name": "on_after_batch_transfer", "args": (ANY, 0)},
-                    {"name": "Callback.on_train_batch_start", "args": (trainer, model, ANY, i)},
-                    {"name": "on_train_batch_start", "args": (ANY, i)},
-                    {"name": "forward", "args": (ANY,)},
-                    {"name": "training_step", "args": (ANY, i)},
-                    {"name": "Callback.on_before_zero_grad", "args": (trainer, model, ANY)},
-                    {"name": "on_before_zero_grad", "args": (ANY,)},
-                    {"name": "optimizer_zero_grad", "args": (current_epoch, i, ANY)},
-                    {"name": "Callback.on_before_backward", "args": (trainer, model, ANY)},
-                    {"name": "on_before_backward", "args": (ANY,)},
-                    # DeepSpeed handles backward internally
-                    *([{"name": "backward", "args": (ANY,)}] if not using_deepspeed else []),
-                    {"name": "Callback.on_after_backward", "args": (trainer, model)},
-                    {"name": "on_after_backward"},
-                    # note: unscaling happens here in the case of AMP
-                    {"name": "Callback.on_before_optimizer_step", "args": (trainer, model, ANY)},
-                    {"name": "on_before_optimizer_step", "args": (ANY,)},
-                    {
-                        "name": "clip_gradients",
-                        "args": (ANY,),
-                        "kwargs": {"gradient_clip_val": None, "gradient_clip_algorithm": None},
-                    },
-                    {
-                        "name": "configure_gradient_clipping",
-                        "args": (ANY,),
-                        "kwargs": {"gradient_clip_val": None, "gradient_clip_algorithm": None},
-                    },
-                    # this is after because it refers to the `LightningModule.optimizer_step` hook which encapsulates
-                    # the actual call to `Precision.optimizer_step`
-                    {
-                        "name": "optimizer_step",
-                        "args": (current_epoch, i, ANY, ANY),
-                    },
-                    *(
-                        [{"name": "lr_scheduler_step", "args": (ANY, None)}]
-                        if i == (trainer.num_training_batches - 1)
-                        else []
-                    ),
-                    {"name": "Callback.on_train_batch_end", "args": (trainer, model, {"loss": ANY}, ANY, i)},
-                    {"name": "on_train_batch_end", "args": ({"loss": ANY}, ANY, i)},
-                ]
-            )
+            out.extend([
+                {"name": "on_before_batch_transfer", "args": (ANY, 0)},
+                {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
+                {"name": "on_after_batch_transfer", "args": (ANY, 0)},
+                {"name": "Callback.on_train_batch_start", "args": (trainer, model, ANY, i)},
+                {"name": "on_train_batch_start", "args": (ANY, i)},
+                {"name": "forward", "args": (ANY,)},
+                {"name": "training_step", "args": (ANY, i)},
+                {"name": "Callback.on_before_zero_grad", "args": (trainer, model, ANY)},
+                {"name": "on_before_zero_grad", "args": (ANY,)},
+                {"name": "optimizer_zero_grad", "args": (current_epoch, i, ANY)},
+                {"name": "Callback.on_before_backward", "args": (trainer, model, ANY)},
+                {"name": "on_before_backward", "args": (ANY,)},
+                # DeepSpeed handles backward internally
+                *([{"name": "backward", "args": (ANY,)}] if not using_deepspeed else []),
+                {"name": "Callback.on_after_backward", "args": (trainer, model)},
+                {"name": "on_after_backward"},
+                # note: unscaling happens here in the case of AMP
+                {"name": "Callback.on_before_optimizer_step", "args": (trainer, model, ANY)},
+                {"name": "on_before_optimizer_step", "args": (ANY,)},
+                {
+                    "name": "clip_gradients",
+                    "args": (ANY,),
+                    "kwargs": {"gradient_clip_val": None, "gradient_clip_algorithm": None},
+                },
+                {
+                    "name": "configure_gradient_clipping",
+                    "args": (ANY,),
+                    "kwargs": {"gradient_clip_val": None, "gradient_clip_algorithm": None},
+                },
+                # this is after because it refers to the `LightningModule.optimizer_step` hook which encapsulates
+                # the actual call to `Precision.optimizer_step`
+                {
+                    "name": "optimizer_step",
+                    "args": (current_epoch, i, ANY, ANY),
+                },
+                *(
+                    [{"name": "lr_scheduler_step", "args": (ANY, None)}]
+                    if i == (trainer.num_training_batches - 1)
+                    else []
+                ),
+                {"name": "Callback.on_train_batch_end", "args": (trainer, model, {"loss": ANY}, ANY, i)},
+                {"name": "on_train_batch_end", "args": ({"loss": ANY}, ANY, i)},
+            ])
         return out
 
     @staticmethod
@@ -319,30 +318,28 @@ def _manual_train_batch(trainer, model, batches, device, **kwargs):
         using_deepspeed = kwargs.get("strategy") == "deepspeed"
         out = []
         for i in range(batches):
-            out.extend(
-                [
-                    {"name": "on_before_batch_transfer", "args": (ANY, 0)},
-                    {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
-                    {"name": "on_after_batch_transfer", "args": (ANY, 0)},
-                    {"name": "Callback.on_train_batch_start", "args": (trainer, model, ANY, i)},
-                    {"name": "on_train_batch_start", "args": (ANY, i)},
-                    {"name": "forward", "args": (ANY,)},
-                    {"name": "Callback.on_before_backward", "args": (trainer, model, ANY)},
-                    {"name": "on_before_backward", "args": (ANY,)},
-                    # DeepSpeed handles backward internally
-                    *([{"name": "backward", "args": (ANY,)}] if not using_deepspeed else []),
-                    {"name": "Callback.on_after_backward", "args": (trainer, model)},
-                    {"name": "on_after_backward"},
-                    # `manual_backward` calls the previous 3
-                    {"name": "manual_backward", "args": (ANY,)},
-                    {"name": "closure"},
-                    {"name": "Callback.on_before_optimizer_step", "args": (trainer, model, ANY)},
-                    {"name": "on_before_optimizer_step", "args": (ANY,)},
-                    {"name": "training_step", "args": (ANY, i)},
-                    {"name": "Callback.on_train_batch_end", "args": (trainer, model, {"loss": ANY}, ANY, i)},
-                    {"name": "on_train_batch_end", "args": ({"loss": ANY}, ANY, i)},
-                ]
-            )
+            out.extend([
+                {"name": "on_before_batch_transfer", "args": (ANY, 0)},
+                {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
+                {"name": "on_after_batch_transfer", "args": (ANY, 0)},
+                {"name": "Callback.on_train_batch_start", "args": (trainer, model, ANY, i)},
+                {"name": "on_train_batch_start", "args": (ANY, i)},
+                {"name": "forward", "args": (ANY,)},
+                {"name": "Callback.on_before_backward", "args": (trainer, model, ANY)},
+                {"name": "on_before_backward", "args": (ANY,)},
+                # DeepSpeed handles backward internally
+                *([{"name": "backward", "args": (ANY,)}] if not using_deepspeed else []),
+                {"name": "Callback.on_after_backward", "args": (trainer, model)},
+                {"name": "on_after_backward"},
+                # `manual_backward` calls the previous 3
+                {"name": "manual_backward", "args": (ANY,)},
+                {"name": "closure"},
+                {"name": "Callback.on_before_optimizer_step", "args": (trainer, model, ANY)},
+                {"name": "on_before_optimizer_step", "args": (ANY,)},
+                {"name": "training_step", "args": (ANY, i)},
+                {"name": "Callback.on_train_batch_end", "args": (trainer, model, {"loss": ANY}, ANY, i)},
+                {"name": "on_train_batch_end", "args": ({"loss": ANY}, ANY, i)},
+            ])
         return out
 
     @staticmethod
@@ -360,38 +357,34 @@ def _eval_batch(fn, trainer, model, batches, key, device):
         out = []
         outputs = {key: ANY}
         for i in range(batches):
-            out.extend(
-                [
-                    {"name": "on_before_batch_transfer", "args": (ANY, 0)},
-                    {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
-                    {"name": "on_after_batch_transfer", "args": (ANY, 0)},
-                    {"name": f"Callback.on_{fn}_batch_start", "args": (trainer, model, ANY, i)},
-                    {"name": f"on_{fn}_batch_start", "args": (ANY, i)},
-                    {"name": "forward", "args": (ANY,)},
-                    {"name": f"{fn}_step", "args": (ANY, i)},
-                    {"name": f"Callback.on_{fn}_batch_end", "args": (trainer, model, outputs, ANY, i)},
-                    {"name": f"on_{fn}_batch_end", "args": (outputs, ANY, i)},
-                ]
-            )
+            out.extend([
+                {"name": "on_before_batch_transfer", "args": (ANY, 0)},
+                {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
+                {"name": "on_after_batch_transfer", "args": (ANY, 0)},
+                {"name": f"Callback.on_{fn}_batch_start", "args": (trainer, model, ANY, i)},
+                {"name": f"on_{fn}_batch_start", "args": (ANY, i)},
+                {"name": "forward", "args": (ANY,)},
+                {"name": f"{fn}_step", "args": (ANY, i)},
+                {"name": f"Callback.on_{fn}_batch_end", "args": (trainer, model, outputs, ANY, i)},
+                {"name": f"on_{fn}_batch_end", "args": (outputs, ANY, i)},
+            ])
         return out
 
     @staticmethod
     def _predict_batch(trainer, model, batches, device):
         out = []
         for i in range(batches):
-            out.extend(
-                [
-                    {"name": "on_before_batch_transfer", "args": (ANY, 0)},
-                    {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
-                    {"name": "on_after_batch_transfer", "args": (ANY, 0)},
-                    {"name": "Callback.on_predict_batch_start", "args": (trainer, model, ANY, i)},
-                    {"name": "on_predict_batch_start", "args": (ANY, i)},
-                    {"name": "forward", "args": (ANY,)},
-                    {"name": "predict_step", "args": (ANY, i)},
-                    {"name": "Callback.on_predict_batch_end", "args": (trainer, model, ANY, ANY, i)},
-                    {"name": "on_predict_batch_end", "args": (ANY, ANY, i)},
-                ]
-            )
+            out.extend([
+                {"name": "on_before_batch_transfer", "args": (ANY, 0)},
+                {"name": "transfer_batch_to_device", "args": (ANY, device, 0)},
+                {"name": "on_after_batch_transfer", "args": (ANY, 0)},
+                {"name": "Callback.on_predict_batch_start", "args": (trainer, model, ANY, i)},
+                {"name": "on_predict_batch_start", "args": (ANY, i)},
+                {"name": "forward", "args": (ANY,)},
+                {"name": "predict_step", "args": (ANY, i)},
+                {"name": "Callback.on_predict_batch_end", "args": (trainer, model, ANY, ANY, i)},
+                {"name": "on_predict_batch_end", "args": (ANY, ANY, i)},
+            ])
         return out
 
     # override so that it gets called