|
| 1 | +############################################## |
| 2 | +Truncated Backpropagation Through Time (TBPTT) |
| 3 | +############################################## |
| 4 | + |
| 5 | +Truncated Backpropagation Through Time (TBPTT) performs backpropogation every k steps of |
| 6 | +a much longer sequence. This is made possible by passing training batches |
| 7 | +split along the time-dimensions into splits of size k to the |
| 8 | +``training_step``. In order to keep the same forward propagation behavior, all |
| 9 | +hidden states should be kept in-between each time-dimension split. |
| 10 | + |
| 11 | + |
| 12 | +.. code-block:: python |
| 13 | +
|
| 14 | + import torch |
| 15 | + import torch.optim as optim |
| 16 | + import pytorch_lightning as pl |
| 17 | + from pytorch_lightning import LightningModule |
| 18 | +
|
| 19 | + class LitModel(LightningModule): |
| 20 | +
|
| 21 | + def __init__(self): |
| 22 | + super().__init__() |
| 23 | +
|
| 24 | + # 1. Switch to manual optimization |
| 25 | + self.automatic_optimization = False |
| 26 | +
|
| 27 | + self.truncated_bptt_steps = 10 |
| 28 | + self.my_rnn = ParityModuleRNN() # Define RNN model using ParityModuleRNN |
| 29 | +
|
| 30 | + # 2. Remove the `hiddens` argument |
| 31 | + def training_step(self, batch, batch_idx): |
| 32 | +
|
| 33 | + # 3. Split the batch in chunks along the time dimension |
| 34 | + split_batches = split_batch(batch, self.truncated_bptt_steps) |
| 35 | +
|
| 36 | + batch_size = 10 |
| 37 | + hidden_dim = 20 |
| 38 | + hiddens = torch.zeros(1, batch_size, hidden_dim, device=self.device) |
| 39 | + for split_batch in range(split_batches): |
| 40 | + # 4. Perform the optimization in a loop |
| 41 | + loss, hiddens = self.my_rnn(split_batch, hiddens) |
| 42 | + self.backward(loss) |
| 43 | + self.optimizer.step() |
| 44 | + self.optimizer.zero_grad() |
| 45 | +
|
| 46 | + # 5. "Truncate" |
| 47 | + hiddens = hiddens.detach() |
| 48 | +
|
| 49 | + # 6. Remove the return of `hiddens` |
| 50 | + # Returning loss in manual optimization is not needed |
| 51 | + return None |
| 52 | +
|
| 53 | + def configure_optimizers(self): |
| 54 | + return optim.Adam(self.my_rnn.parameters(), lr=0.001) |
| 55 | +
|
| 56 | + if __name__ == "__main__": |
| 57 | + model = LitModel() |
| 58 | + trainer = pl.Trainer(max_epochs=5) |
| 59 | + trainer.fit(model, train_dataloader) # Define your own dataloader |
0 commit comments