Skip to content

Commit 65042bd

Browse files
authored
Merge branch 'master' into bugfix/csv-file-exists
2 parents ed7abef + b097a4d commit 65042bd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+430
-329
lines changed

requirements/data/data.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33

44
lightning-utilities >=0.8.0, <0.10.0
55
# to be able to include also PL 2.0 and preserve `>` needed for CI min version bypass
6-
torch >0.14.0, <2.2.0
6+
torch >0.14.0, <=2.2.0
77
lightning-cloud
88
filelock

requirements/data/examples.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
Pillow >= 9.5.0
22
# min version to match torch >= 2.0.1
3-
torchvision >=0.15.2, <0.17.0
3+
torchvision >=0.15.2, <0.18.0

requirements/data/test.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ pytest-timeout ==2.1.0
55
pytest-rerunfailures ==12.0
66
pytest-random-order ==1.1.0
77
viztracer
8+
pandas
89
pyarrow
9-
polars

requirements/fabric/base.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
33

44
numpy >=1.17.2, <1.27.0
5-
torch >=1.13.0, <2.2.0
5+
torch >=1.13.0, <=2.2.0
66
fsspec[http] >=2022.5.0, <2023.11.0
77
packaging >=20.0, <=23.1
88
typing-extensions >=4.4.0, <4.10.0

requirements/fabric/examples.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
22
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
33

4-
torchvision >=0.14.0, <0.17.0
4+
torchvision >=0.14.0, <0.18.0
55
torchmetrics >=0.10.0, <1.3.0
66
lightning-utilities >=0.8.0, <0.10.0

requirements/pytorch/base.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
33

44
numpy >=1.17.2, <1.27.0
5-
torch >=1.13.0, <2.2.0
5+
torch >=1.13.0, <=2.2.0
66
tqdm >=4.57.0, <4.67.0
77
PyYAML >=5.4, <6.1.0
88
fsspec[http] >=2022.5.0, <2023.11.0

requirements/pytorch/examples.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
33

44
requests <2.32.0
5-
torchvision >=0.14.0, <0.17.0
5+
torchvision >=0.14.0, <0.18.0
66
gym[classic_control] >=0.17.0, <0.27.0
77
ipython[all] <8.15.0
88
torchmetrics >=0.10.0, <1.3.0

requirements/typing.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
mypy==1.5.1
2-
torch==2.1.0
1+
mypy==1.8.0
2+
torch==2.2.0
33

44
types-Markdown
55
types-PyYAML

src/lightning/app/cli/lightning_cli.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def init_pl_app(source: Union[Tuple[str], Tuple[str, str]], name: str, overwrite
363363
source_dir = str(Path(script_path).resolve().parent)
364364
elif len(source) == 2:
365365
# enable type checking once https://github.com/python/mypy/issues/1178 is available
366-
source_dir, script_path = source # type: ignore
366+
source_dir, script_path = source
367367
else:
368368
click.echo(
369369
f"Incorrect number of arguments. You passed ({', '.join(source)}) but only either one argument"

src/lightning/data/__init__.py

+7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from lightning_utilities.core.imports import RequirementCache
2+
13
from lightning.data.processing.functions import map, optimize, walk
24
from lightning.data.streaming.combined import CombinedStreamingDataset
35
from lightning.data.streaming.dataloader import StreamingDataLoader
@@ -13,3 +15,8 @@
1315
"optimize",
1416
"walk",
1517
]
18+
19+
if RequirementCache('lightning_sdk'):
20+
from lightning_sdk import Machine # noqa: F401
21+
22+
__all__.append("Machine")

src/lightning/data/processing/data_processor.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,6 @@ def __init__(
372372
self._counter = 0
373373
self._last_time = time()
374374
self._index_counter = 0
375-
self._current_item: Any = None
376375

377376
def run(self) -> None:
378377
try:
@@ -477,6 +476,7 @@ def _try_upload(self, data: Optional[Union[str, Tuple[str, str]]]) -> None:
477476
assert os.path.exists(data), data
478477
else:
479478
assert os.path.exists(data[-1]), data
479+
480480
self.to_upload_queues[self._counter % self.num_uploaders].put(data)
481481

482482
def _collect_paths(self) -> None:
@@ -588,8 +588,8 @@ def _start_uploaders(self) -> None:
588588

589589
def _handle_data_chunk_recipe(self, index: int) -> None:
590590
try:
591-
self._current_item = self.items[index] if self.reader is None else self.reader.read(self.items[index])
592-
item_data_or_generator = self.data_recipe.prepare_item(self._current_item)
591+
current_item = self.items[index] if self.reader is None else self.reader.read(self.items[index])
592+
item_data_or_generator = self.data_recipe.prepare_item(current_item)
593593
if isinstance(item_data_or_generator, types.GeneratorType):
594594
for item_data in item_data_or_generator:
595595
if item_data is not None:
@@ -713,14 +713,19 @@ def _done(self, size: int, delete_cached_files: bool, output_dir: Dir) -> _Resul
713713
size = sum([c["dim"] if c["dim"] is not None else c["chunk_size"] for c in config["chunks"]])
714714
num_bytes = sum([c["chunk_bytes"] for c in config["chunks"]])
715715
data_format = tree_unflatten(config["config"]["data_format"], treespec_loads(config["config"]["data_spec"]))
716+
num_chunks = len(config["chunks"])
717+
718+
# The platform can't store more than 1024 entries.
719+
# Note: This isn't really used right now, so it is fine to skip if too big.
720+
num_bytes_per_chunk = [c["chunk_size"] for c in config["chunks"]] if num_chunks < 1024 else []
716721

717722
return _Result(
718723
size=size,
719724
num_bytes=num_bytes,
720725
data_format=data_format,
721726
compression=config["config"]["compression"],
722727
num_chunks=len(config["chunks"]),
723-
num_bytes_per_chunk=[c["chunk_size"] for c in config["chunks"]],
728+
num_bytes_per_chunk=num_bytes_per_chunk,
724729
)
725730
return _Result(
726731
size=size,
@@ -866,9 +871,9 @@ def run(self, data_recipe: DataRecipe) -> None:
866871
raise ValueError("The `prepare_structure` should return a list of item metadata.")
867872

868873
if self.reader:
869-
workers_user_items = self.reader.items_to_workers(user_items, self.num_workers)
874+
user_items = self.reader.remap_items(user_items, self.num_workers)
870875

871-
elif self.weights is not None:
876+
if self.weights is not None:
872877
if len(self.weights) != len(user_items):
873878
raise ValueError("The provided weights length should match the inputs' length.")
874879
workers_user_items = _map_items_to_workers_weighted(

src/lightning/data/processing/dns.py

-47
This file was deleted.

src/lightning/data/processing/functions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424

2525
from lightning.data.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0
2626
from lightning.data.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe
27-
from lightning.data.processing.dns import optimize_dns_context
2827
from lightning.data.processing.readers import BaseReader
28+
from lightning.data.processing.utilities import optimize_dns_context
2929
from lightning.data.streaming.resolver import (
3030
Dir,
3131
_assert_dir_has_index_file,

src/lightning/data/processing/image.py

-47
This file was deleted.

0 commit comments

Comments
 (0)