Skip to content

Commit 67f280b

Browse files
authored
Merge branch 'master' into bugfix/throughput-divisible
2 parents 42af40e + 99fe656 commit 67f280b

File tree

197 files changed

+1270
-1093
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

197 files changed

+1270
-1093
lines changed

.pre-commit-config.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,12 @@ repos:
8484
- flake8-return
8585

8686
- repo: https://github.com/astral-sh/ruff-pre-commit
87-
rev: "v0.1.15"
87+
rev: "v0.2.0"
8888
hooks:
8989
- id: ruff
9090
args: ["--fix", "--preview"]
91+
- id: ruff-format
92+
args: ["--preview"]
9193

9294
- repo: https://github.com/executablebooks/mdformat
9395
rev: 0.7.17

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,14 @@ pip install -iU https://test.pypi.org/simple/ pytorch-lightning
9292

9393
______________________________________________________________________
9494

95-
## Lightning has 3 core packages
95+
## Lightning has 4 core packages
9696

9797
[PyTorch Lightning: Train and deploy PyTorch at scale](#pytorch-lightning-train-and-deploy-pytorch-at-scale).
9898
<br/>
9999
[Lightning Fabric: Expert control](#lightning-fabric-expert-control).
100100
<br/>
101+
[Lightning Data: Blazing fast, distributed streaming of training data from cloud storage](https://github.com/Lightning-AI/pytorch-lightning/tree/master/src/lightning/data).
102+
<br/>
101103
[Lightning Apps: Build AI products and ML workflows](#lightning-apps-build-ai-products-and-ml-workflows).
102104

103105
Lightning gives you granular control over how much abstraction you want to add over PyTorch.

docs/source-fabric/advanced/distributed_communication.rst

+5
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,11 @@ Full example:
236236
result = fabric.all_gather(data)
237237
print("Result of all-gather:", result) # tensor([ 0, 10, 20, 30])
238238
239+
.. warning::
240+
241+
For the special case where ``world_size`` is 1, no additional dimension is added to the tensor(s). This inconsistency
242+
is kept for backward compatibility and you may need to handle this special case in your code to make it agnostic.
243+
239244

240245
----
241246

examples/app/dag/app.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,9 @@ def __init__(self, models_paths: list):
6565
)
6666

6767
# Step 3: Create the work to train the models_paths in parallel.
68-
self.dict = Dict(
69-
**{model_path.split(".")[-1]: ModelWork(model_path, parallel=True) for model_path in models_paths}
70-
)
68+
self.dict = Dict(**{
69+
model_path.split(".")[-1]: ModelWork(model_path, parallel=True) for model_path in models_paths
70+
})
7171

7272
# Step 4: Some element to track components progress.
7373
self.has_completed = False

examples/app/server/app.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,11 @@ def setup(self):
2020
def predict(self, request):
2121
image = base64.b64decode(request.image.encode("utf-8"))
2222
image = Image.open(io.BytesIO(image))
23-
transforms = torchvision.transforms.Compose(
24-
[
25-
torchvision.transforms.Resize(224),
26-
torchvision.transforms.ToTensor(),
27-
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
28-
]
29-
)
23+
transforms = torchvision.transforms.Compose([
24+
torchvision.transforms.Resize(224),
25+
torchvision.transforms.ToTensor(),
26+
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
27+
])
3028
image = transforms(image)
3129
image = image.to(self._device)
3230
prediction = self._model(image.unsqueeze(0))

examples/app/server_with_auto_scaler/app.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,11 @@ def setup(self):
3434
self._model = torchvision.models.resnet18(pretrained=True).to(self._device)
3535

3636
def predict(self, requests: BatchRequestModel):
37-
transforms = torchvision.transforms.Compose(
38-
[
39-
torchvision.transforms.Resize(224),
40-
torchvision.transforms.ToTensor(),
41-
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
42-
]
43-
)
37+
transforms = torchvision.transforms.Compose([
38+
torchvision.transforms.Resize(224),
39+
torchvision.transforms.ToTensor(),
40+
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
41+
])
4442
images = []
4543
for request in requests.inputs:
4644
image = app.components.serve.types.image.Image.deserialize(request.image)

examples/fabric/dcgan/train_fabric.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
Code adapted from the official PyTorch DCGAN tutorial:
55
https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html
66
"""
7+
78
import os
89
import time
910
from pathlib import Path
@@ -55,14 +56,12 @@ def main():
5556
root=dataroot,
5657
split="all",
5758
download=True,
58-
transform=transforms.Compose(
59-
[
60-
transforms.Resize(image_size),
61-
transforms.CenterCrop(image_size),
62-
transforms.ToTensor(),
63-
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
64-
]
65-
),
59+
transform=transforms.Compose([
60+
transforms.Resize(image_size),
61+
transforms.CenterCrop(image_size),
62+
transforms.ToTensor(),
63+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
64+
]),
6665
)
6766

6867
# Create the dataloader
@@ -227,7 +226,7 @@ def __init__(self):
227226
nn.ReLU(True),
228227
# state size. (ngf) x 32 x 32
229228
nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
230-
nn.Tanh()
229+
nn.Tanh(),
231230
# state size. (nc) x 64 x 64
232231
)
233232

examples/fabric/dcgan/train_torch.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
Code adapted from the official PyTorch DCGAN tutorial:
55
https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html
66
"""
7+
78
import os
89
import random
910
import time
@@ -55,14 +56,12 @@ def main():
5556
root=dataroot,
5657
split="all",
5758
download=True,
58-
transform=transforms.Compose(
59-
[
60-
transforms.Resize(image_size),
61-
transforms.CenterCrop(image_size),
62-
transforms.ToTensor(),
63-
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
64-
]
65-
),
59+
transform=transforms.Compose([
60+
transforms.Resize(image_size),
61+
transforms.CenterCrop(image_size),
62+
transforms.ToTensor(),
63+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
64+
]),
6665
)
6766

6867
# Create the dataloader
@@ -236,7 +235,7 @@ def __init__(self):
236235
nn.ReLU(True),
237236
# state size. (ngf) x 32 x 32
238237
nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
239-
nn.Tanh()
238+
nn.Tanh(),
240239
# state size. (nc) x 64 x 64
241240
)
242241

examples/fabric/meta_learning/train_fabric.py

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Run it with:
1515
lightning run model train_fabric.py --accelerator=cuda --devices=2 --strategy=ddp
1616
"""
17+
1718
import cherry
1819
import learn2learn as l2l
1920
import torch

examples/fabric/meta_learning/train_torch.py

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Run it with:
1616
torchrun --nproc_per_node=2 --standalone train_torch.py
1717
"""
18+
1819
import os
1920
import random
2021

examples/fabric/reinforcement_learning/train_fabric.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,10 @@ def main(args: argparse.Namespace):
8484
)
8585

8686
# Environment setup
87-
envs = gym.vector.SyncVectorEnv(
88-
[
89-
make_env(
90-
args.env_id, args.seed + rank * args.num_envs + i, rank, args.capture_video, logger.log_dir, "train"
91-
)
92-
for i in range(args.num_envs)
93-
]
94-
)
87+
envs = gym.vector.SyncVectorEnv([
88+
make_env(args.env_id, args.seed + rank * args.num_envs + i, rank, args.capture_video, logger.log_dir, "train")
89+
for i in range(args.num_envs)
90+
])
9591
assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
9692

9793
# Define the agent and the optimizer and setup them with Fabric

examples/fabric/reinforcement_learning/train_fabric_decoupled.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ def player(args, world_collective: TorchCollective, player_trainer_collective: T
5959
)
6060

6161
# Environment setup
62-
envs = gym.vector.SyncVectorEnv(
63-
[make_env(args.env_id, args.seed + i, 0, args.capture_video, log_dir, "train") for i in range(args.num_envs)]
64-
)
62+
envs = gym.vector.SyncVectorEnv([
63+
make_env(args.env_id, args.seed + i, 0, args.capture_video, log_dir, "train") for i in range(args.num_envs)
64+
])
6565
assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
6666

6767
# Define the agent

examples/fabric/reinforcement_learning/train_torch.py

+11-13
Original file line numberDiff line numberDiff line change
@@ -142,19 +142,17 @@ def main(args: argparse.Namespace):
142142
)
143143

144144
# Environment setup
145-
envs = gym.vector.SyncVectorEnv(
146-
[
147-
make_env(
148-
args.env_id,
149-
args.seed + global_rank * args.num_envs + i,
150-
global_rank,
151-
args.capture_video,
152-
logger.log_dir if global_rank == 0 else None,
153-
"train",
154-
)
155-
for i in range(args.num_envs)
156-
]
157-
)
145+
envs = gym.vector.SyncVectorEnv([
146+
make_env(
147+
args.env_id,
148+
args.seed + global_rank * args.num_envs + i,
149+
global_rank,
150+
args.capture_video,
151+
logger.log_dir if global_rank == 0 else None,
152+
"train",
153+
)
154+
for i in range(args.num_envs)
155+
])
158156
assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
159157

160158
# Define the agent and the optimizer and setup them with DistributedDataParallel

examples/pytorch/basics/autoencoder.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
To run: python autoencoder.py --trainer.max_epochs=50
1717
1818
"""
19+
1920
from os import path
2021
from typing import Optional, Tuple
2122

examples/pytorch/basics/backbone_image_classifier.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
To run: python backbone_image_classifier.py --trainer.max_epochs=50
1717
1818
"""
19+
1920
from os import path
2021
from typing import Optional
2122

examples/pytorch/domain_templates/computer_vision_fine_tuning.py

+11-15
Original file line numberDiff line numberDiff line change
@@ -119,14 +119,12 @@ def normalize_transform(self):
119119

120120
@property
121121
def train_transform(self):
122-
return transforms.Compose(
123-
[
124-
transforms.Resize((224, 224)),
125-
transforms.RandomHorizontalFlip(),
126-
transforms.ToTensor(),
127-
self.normalize_transform,
128-
]
129-
)
122+
return transforms.Compose([
123+
transforms.Resize((224, 224)),
124+
transforms.RandomHorizontalFlip(),
125+
transforms.ToTensor(),
126+
self.normalize_transform,
127+
])
130128

131129
@property
132130
def valid_transform(self):
@@ -269,13 +267,11 @@ def add_arguments_to_parser(self, parser):
269267
parser.link_arguments("data.batch_size", "model.batch_size")
270268
parser.link_arguments("finetuning.milestones", "model.milestones")
271269
parser.link_arguments("finetuning.train_bn", "model.train_bn")
272-
parser.set_defaults(
273-
{
274-
"trainer.max_epochs": 15,
275-
"trainer.enable_model_summary": False,
276-
"trainer.num_sanity_val_steps": 0,
277-
}
278-
)
270+
parser.set_defaults({
271+
"trainer.max_epochs": 15,
272+
"trainer.enable_model_summary": False,
273+
"trainer.num_sanity_val_steps": 0,
274+
})
279275

280276

281277
def cli_main():

examples/pytorch/domain_templates/generative_adversarial_net.py

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
tensorboard --logdir default
1919
2020
"""
21+
2122
from argparse import ArgumentParser, Namespace
2223

2324
import numpy as np

examples/pytorch/domain_templates/imagenet.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
python imagenet.py fit --help
3131
3232
"""
33+
3334
import os
3435
from typing import Optional
3536

@@ -139,14 +140,12 @@ def setup(self, stage: str):
139140
train_dir = os.path.join(self.data_path, "train")
140141
self.train_dataset = datasets.ImageFolder(
141142
train_dir,
142-
transforms.Compose(
143-
[
144-
transforms.RandomResizedCrop(224),
145-
transforms.RandomHorizontalFlip(),
146-
transforms.ToTensor(),
147-
normalize,
148-
]
149-
),
143+
transforms.Compose([
144+
transforms.RandomResizedCrop(224),
145+
transforms.RandomHorizontalFlip(),
146+
transforms.ToTensor(),
147+
normalize,
148+
]),
150149
)
151150
# all stages will use the eval dataset
152151
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

examples/pytorch/domain_templates/reinforce_learn_ppo.py

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
[3] https://github.com/sid-sundrani/ppo_lightning
2929
3030
"""
31+
3132
import argparse
3233
from typing import Callable, Iterator, List, Tuple
3334

examples/pytorch/domain_templates/semantic_segmentation.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -333,14 +333,10 @@ def __init__(
333333
self.net = UNet(
334334
num_classes=19, num_layers=self.num_layers, features_start=self.features_start, bilinear=self.bilinear
335335
)
336-
self.transform = transforms.Compose(
337-
[
338-
transforms.ToTensor(),
339-
transforms.Normalize(
340-
mean=[0.35675976, 0.37380189, 0.3764753], std=[0.32064945, 0.32098866, 0.32325324]
341-
),
342-
]
343-
)
336+
self.transform = transforms.Compose([
337+
transforms.ToTensor(),
338+
transforms.Normalize(mean=[0.35675976, 0.37380189, 0.3764753], std=[0.32064945, 0.32098866, 0.32325324]),
339+
])
344340
self.trainset = KITTI(self.data_path, split="train", transform=self.transform)
345341
self.validset = KITTI(self.data_path, split="valid", transform=self.transform)
346342

0 commit comments

Comments
 (0)