|
12 | 12 | # limitations under the License.
|
13 | 13 | import os
|
14 | 14 | import shutil
|
| 15 | +import subprocess |
15 | 16 | from abc import ABC
|
16 | 17 | from typing import Any, Dict, List
|
17 | 18 | from urllib import parse
|
@@ -40,29 +41,45 @@ def download_file(self, remote_chunkpath: str, local_chunkpath: str) -> None:
|
40 | 41 | class S3Downloader(Downloader):
|
41 | 42 | def __init__(self, remote_dir: str, cache_dir: str, chunks: List[Dict[str, Any]]):
|
42 | 43 | super().__init__(remote_dir, cache_dir, chunks)
|
43 |
| - self._client = S3Client() |
| 44 | + self._s5cmd_available = os.system("s5cmd > /dev/null 2>&1") == 0 |
| 45 | + |
| 46 | + if not self._s5cmd_available: |
| 47 | + self._client = S3Client() |
44 | 48 |
|
45 | 49 | def download_file(self, remote_filepath: str, local_filepath: str) -> None:
|
46 | 50 | obj = parse.urlparse(remote_filepath)
|
47 | 51 |
|
48 | 52 | if obj.scheme != "s3":
|
49 | 53 | raise ValueError(f"Expected obj.scheme to be `s3`, instead, got {obj.scheme} for remote={remote_filepath}")
|
50 | 54 |
|
51 |
| - from boto3.s3.transfer import TransferConfig |
52 |
| - |
53 |
| - extra_args: Dict[str, Any] = {} |
| 55 | + if os.path.exists(local_filepath): |
| 56 | + return |
54 | 57 |
|
55 | 58 | try:
|
56 |
| - with FileLock(local_filepath + ".lock", timeout=1): |
57 |
| - if not os.path.exists(local_filepath): |
58 |
| - # Issue: https://github.com/boto/boto3/issues/3113 |
59 |
| - self._client.client.download_file( |
60 |
| - obj.netloc, |
61 |
| - obj.path.lstrip("/"), |
62 |
| - local_filepath, |
63 |
| - ExtraArgs=extra_args, |
64 |
| - Config=TransferConfig(use_threads=False), |
| 59 | + with FileLock(local_filepath + ".lock", timeout=0): |
| 60 | + if self._s5cmd_available: |
| 61 | + proc = subprocess.Popen( |
| 62 | + f"s5cmd --numworkers 64 cp {remote_filepath} {local_filepath}", |
| 63 | + shell=True, |
| 64 | + stdout=subprocess.PIPE, |
65 | 65 | )
|
| 66 | + proc.wait() |
| 67 | + else: |
| 68 | + from boto3.s3.transfer import TransferConfig |
| 69 | + |
| 70 | + extra_args: Dict[str, Any] = {} |
| 71 | + |
| 72 | + # try: |
| 73 | + # with FileLock(local_filepath + ".lock", timeout=1): |
| 74 | + if not os.path.exists(local_filepath): |
| 75 | + # Issue: https://github.com/boto/boto3/issues/3113 |
| 76 | + self._client.client.download_file( |
| 77 | + obj.netloc, |
| 78 | + obj.path.lstrip("/"), |
| 79 | + local_filepath, |
| 80 | + ExtraArgs=extra_args, |
| 81 | + Config=TransferConfig(use_threads=False), |
| 82 | + ) |
66 | 83 | except Timeout:
|
67 | 84 | # another process is responsible to download that file, continue
|
68 | 85 | pass
|
|
0 commit comments