Reduce memory overhead when capturing tensors

baconsaur · baconsaur · commit 4aad8e3c140f · 2025-02-17T12:06:55.000-04:00
diff --git a/llm_steer.py b/llm_steer.py
@@ -227,7 +227,8 @@ def _add_steer_vector(self, layer_idx: int, steerElem: SteerElement):
 
     def _capture_tensor(self, layer_idx: int, tokens: Tensor):
         self._set_forward_fn(ActivationMode.CAPTURE, layer_idx)
-        self.model(tokens)
+        with torch.inference_mode():
+            self.model(tokens)
         result = self.captured_tensor
         print(f"captured tensor: {result}")
         return result