@@ -316,13 +316,24 @@ def downloaded_size(self) -> int:
316
316
return sum (url_info .size for url_info in self ._recorded_url_infos .values ())
317
317
318
318
def _get_dl_path (
319
- self , resource : resource_lib .Resource , checksum : str | None = None
319
+ self ,
320
+ resource : resource_lib .Resource ,
321
+ checksum : str | None = None ,
322
+ legacy_mode : bool = False ,
320
323
) -> epath .Path :
321
- return (
322
- self ._download_dir
323
- / resource .relative_download_dir
324
- / resource_lib .get_dl_fname (resource .url , checksum )
325
- )
324
+ """Returns the path where the resource should be downloaded.
325
+
326
+ Args:
327
+ resource: The resource to download.
328
+ checksum: The checksum of the resource.
329
+ legacy_mode: If True, returns path in the legacy format without dataset
330
+ name in the path.
331
+ """
332
+ download_dir = self ._download_dir
333
+ if not legacy_mode :
334
+ download_dir /= self ._dataset_name
335
+ download_dir /= resource .relative_download_dir
336
+ return download_dir / resource_lib .get_dl_fname (resource .url , checksum )
326
337
327
338
@property
328
339
def register_checksums (self ):
@@ -353,6 +364,50 @@ def _get_manually_downloaded_path(
353
364
354
365
return manual_path
355
366
367
+ def _get_checksum_dl_result (
368
+ self , resource : resource_lib .Resource , legacy_mode : bool = False
369
+ ) -> downloader .DownloadResult | None :
370
+ """Checks if the download has been cached and checksum is known."""
371
+ expected_url_info = self ._url_infos .get (resource .url )
372
+
373
+ if not expected_url_info :
374
+ return None
375
+
376
+ checksum_path = self ._get_dl_path (
377
+ resource , expected_url_info .checksum , legacy_mode = legacy_mode
378
+ )
379
+ if not resource_lib .is_locally_cached (checksum_path ):
380
+ return None
381
+
382
+ return downloader .DownloadResult (
383
+ path = checksum_path , url_info = expected_url_info
384
+ )
385
+
386
+ def _get_url_dl_result (
387
+ self , resource : resource_lib .Resource , legacy_mode : bool = False
388
+ ) -> downloader .DownloadResult | None :
389
+ """Checks if the download has been cached and checksum is unknown."""
390
+ url_path = self ._get_dl_path (resource , legacy_mode = legacy_mode )
391
+ if not resource_lib .is_locally_cached (url_path ):
392
+ return None
393
+
394
+ expected_url_info = self ._url_infos .get (resource .url )
395
+ url_info = downloader .read_url_info (url_path )
396
+
397
+ if expected_url_info and expected_url_info != url_info :
398
+ # If checksums are registered but do not match, trigger a new
399
+ # download (e.g. previous file corrupted, checksums updated)
400
+ return None
401
+ elif self ._is_checksum_registered (url = resource .url ):
402
+ # Checksums were registered: Rename -> checksum_path
403
+ path = self ._get_dl_path (resource , url_info .checksum )
404
+ resource_lib .replace_info_file (url_path , path )
405
+ url_path .replace (path )
406
+ return downloader .DownloadResult (path = path , url_info = url_info )
407
+ else :
408
+ # Checksums not registered: -> do nothing
409
+ return downloader .DownloadResult (path = url_path , url_info = url_info )
410
+
356
411
# Synchronize and memoize decorators ensure same resource will only be
357
412
# processed once, even if passed twice to download_manager.
358
413
@utils .build_synchronize_decorator ()
@@ -399,32 +454,20 @@ def _download_or_get_cache(
399
454
dl_result = None
400
455
401
456
# Download has been cached (checksum known)
402
- elif expected_url_info and resource_lib . is_locally_cached (
403
- checksum_path := self . _get_dl_path ( resource , expected_url_info . checksum )
404
- ):
405
- dl_result = downloader . DownloadResult (
406
- path = checksum_path , url_info = expected_url_info
407
- )
457
+ elif dl_result := self . _get_checksum_dl_result ( resource ):
458
+ pass
459
+
460
+ # Download has been cached (checksum known, legacy mode)
461
+ elif dl_result := self . _get_checksum_dl_result ( resource , legacy_mode = True ):
462
+ pass
408
463
409
464
# Download has been cached (checksum unknown)
410
- elif resource_lib .is_locally_cached (
411
- url_path := self ._get_dl_path (resource )
412
- ):
413
- url_info = downloader .read_url_info (url_path )
414
-
415
- if expected_url_info and expected_url_info != url_info :
416
- # If checksums are registered but do not match, trigger a new
417
- # download (e.g. previous file corrupted, checksums updated)
418
- dl_result = None
419
- elif self ._is_checksum_registered (url = url ):
420
- # Checksums were registered: Rename -> checksum_path
421
- path = self ._get_dl_path (resource , url_info .checksum )
422
- resource_lib .replace_info_file (url_path , path )
423
- url_path .replace (path )
424
- dl_result = downloader .DownloadResult (path = path , url_info = url_info )
425
- else :
426
- # Checksums not registered: -> do nothing
427
- dl_result = downloader .DownloadResult (path = url_path , url_info = url_info )
465
+ elif dl_result := self ._get_url_dl_result (resource ):
466
+ pass
467
+
468
+ # Download has been cached (checksum unknown, legacy mode)
469
+ elif dl_result := self ._get_url_dl_result (resource , legacy_mode = True ):
470
+ pass
428
471
429
472
# Cache not found
430
473
else :
@@ -504,7 +547,7 @@ def _download(
504
547
download_tmp_dir = (
505
548
url_path .parent / f'{ url_path .name } .tmp.{ uuid .uuid4 ().hex } '
506
549
)
507
- download_tmp_dir .mkdir ()
550
+ download_tmp_dir .mkdir (parents = True , exist_ok = True )
508
551
logging .info (f'Downloading { url } into { download_tmp_dir } ...' )
509
552
future = self ._downloader .download (
510
553
url , download_tmp_dir , verify = self ._verify_ssl
0 commit comments