Allow DPO reference model to be loaded from LoadCheckpoint callback (#80)

dakinggg · web-flow · commit fe9fee43e4b4 · 2025-06-02T17:26:05.000-07:00
This is not the cleanest solution of all time, but does unblock this niche use case without significant rearchitecting of the code. The issue is that we sometimes use a callback to load the checkpoint in Composer (https://github.com/mosaicml/composer/blob/main/composer/callbacks/load_checkpoint.py). This is useful when the base model is saved in a composer checkpoint, and you want to only save lora checkpoints during training for autoresume. The callback will load a checkpoint on `BEFORE_LOAD` event, so that any autoresume checkpoint would overwrite it. None of that really applies to the reference model loading here, and we just want to grab the base checkpoint from the callback and load it as an additional step. Testing: Before (fails with nan loss because weights are not properly loaded): `daniel-matt-failure-1-tRAWIE` After with load checkpoint callback (init device meta, pretrained false): `daniel-matt-callback-1-VevoUT` After without load checkpoint callback (init device mixed, pretrained true): `daniel-matt-no-callback-1-G6P1po` <img width="1648" alt="Screenshot 2025-06-02 at 3 59 28 PM" src="https://github.com/user-attachments/assets/898089ed-71cd-4874-8ae4-7d36c19addc2" />
diff --git a/compose_rl/dpo/callback.py b/compose_rl/dpo/callback.py
@@ -8,8 +8,11 @@
 
 import torch
 from composer import Trainer
+from composer.callbacks import LoadCheckpoint
 from composer.core import State, get_precision_context
 from composer.loggers import Logger
+from composer.models.huggingface import HuggingFaceModel
+from composer.utils.checkpoint import load_checkpoint
 from llmfoundry.interfaces import CallbackWithConfig
 from llmfoundry.utils import build_composer_model
 # pyright does not recognize process_init_device though it is a declared export
@@ -47,9 +50,10 @@ def after_load(self, state: State, logger: Logger) -> None:
         )
 
         original_load_path = self.train_config.get('load_path', None)
+
         # For HF checkpoint, load_path is unset and should be handled in llmfoundry code.
         # Create a Trainer object to load model into FSDP
-        _ = Trainer(
+        fake_trainer = Trainer(
             model=self.reference_model,
             parallelism_config={'fsdp': state.fsdp_config},
             precision=state.precision,
@@ -58,6 +62,42 @@ def after_load(self, state: State, logger: Logger) -> None:
             load_path=original_load_path,
         )
 
+        # The base model checkpoint may have been supplied by a LoadCheckpoint callback,
+        # so we need to check and apply that checkpoint to the reference model.
+        load_checkpoint_callbacks = [
+            callback for callback in state.callbacks
+            if isinstance(callback, LoadCheckpoint)
+        ]
+
+        if original_load_path is not None and len(
+            load_checkpoint_callbacks,
+        ) > 0:
+            raise ValueError(
+                'Cannot use `load_path` in the train config when using `LoadCheckpoint` callback. '
+                + 'Please remove `load_path` from the train config.',
+            )
+
+        # For any LoadCheckpoint callbacks we found, we will load the checkpoint into the reference model.
+        # If none are found, this for loop is a no-op.
+        for load_checkpoint_callback in load_checkpoint_callbacks:
+            assert isinstance(self.reference_model, HuggingFaceModel)
+
+            # If using PEFT, we need to _not_ filter the state dict to only include the PEFT weights.
+            # This is so the checkpoint can load the base model weights. Since the reference model is
+            # not being update, we don't need to respect the `should_save_peft_only` flag from the original model
+            # and can just hardcode it to False.
+            self.reference_model.should_save_peft_only = False
+            load_checkpoint(
+                path=load_checkpoint_callback.parsed_path,
+                state=fake_trainer.state,
+                logger=logger,
+                object_store=load_checkpoint_callback.load_object_store,
+                strict_model_weights=load_checkpoint_callback.
+                strict_model_weights,
+                ignore_keys=load_checkpoint_callback.ignore_keys,
+                load_weights_only=load_checkpoint_callback.load_weights_only,
+            )
+
     def before_forward(self, state: State, logger: Logger) -> Optional[int]:
         # Before every batch we need to do a forwards pass over the reference model
         with get_precision_context(state.precision):