Skip to content

Commit 347b7e6

Browse files
authored
Merge pull request #8 from AllenInstitute/bugfix/s3-check-paths-sort-error
fix sort discrepancy between Path and str
2 parents 605a242 + 6f0a396 commit 347b7e6

File tree

2 files changed

+45
-8
lines changed
  • src/aibs_informatics_aws_utils
  • test/aibs_informatics_aws_utils

2 files changed

+45
-8
lines changed

src/aibs_informatics_aws_utils/s3.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def download_s3_object(
275275
if force or should_sync(
276276
source_path=s3_path, destination_path=local_path, size_only=size_only, **kwargs
277277
):
278-
if local_path.exists() and not exist_ok:
278+
if local_path.exists() and not exist_ok and not force:
279279
raise ValueError(f"Unable to download S3 object to {local_path}. Path exists.")
280280
elif local_path.exists() and local_path.is_dir() and exist_ok:
281281
logger.warning(
@@ -1109,36 +1109,48 @@ def check_paths_in_sync(
11091109
"""
11101110
source_paths: Union[List[Path], List[S3URI]] = (
11111111
(
1112-
sorted(map(Path, find_paths(source_path, include_dirs=False)))
1112+
[Path(p) for p in sorted(find_paths(source_path, include_dirs=False))]
11131113
if source_path.is_dir()
11141114
else [source_path]
11151115
)
11161116
if isinstance(source_path, Path)
11171117
else (
1118-
list_s3_paths(source_path, **kwargs) if not is_object(source_path) else [source_path]
1118+
sorted(list_s3_paths(source_path, **kwargs)) # type: ignore[arg-type]
1119+
if not is_object(source_path)
1120+
else [source_path] # type: ignore
11191121
)
11201122
)
11211123
destination_paths: Union[List[Path], List[S3URI]] = (
11221124
(
1123-
sorted(map(Path, find_paths(destination_path, include_dirs=False)))
1125+
list(map(Path, sorted(find_paths(destination_path, include_dirs=False))))
11241126
if destination_path.is_dir()
11251127
else [destination_path]
11261128
)
11271129
if isinstance(destination_path, Path)
11281130
else (
1129-
list_s3_paths(destination_path, **kwargs)
1131+
sorted(list_s3_paths(destination_path, **kwargs)) # type: ignore[arg-type]
11301132
if not is_object(destination_path)
1131-
else [destination_path]
1133+
else [destination_path] # type: ignore
11321134
)
11331135
)
11341136
if len(source_paths) == 0:
11351137
raise ValueError(f"Source path {source_path} does not exist")
11361138
if len(source_paths) != len(destination_paths):
1139+
logger.info(
1140+
"Source and destination paths have different number of paths. "
1141+
f"Source path {source_path} has {len(source_paths)} paths, "
1142+
f"destination path {destination_path} has {len(destination_paths)} paths"
1143+
)
11371144
return False
11381145
for sp, dp in zip(source_paths, destination_paths):
11391146
if str(sp).removeprefix(str(source_path)) != str(dp).removeprefix(str(destination_path)):
1147+
logger.info(
1148+
f"Source path {sp} (relative={str(sp).removeprefix(str(source_path))}) does not match "
1149+
f"destination path {dp} (relative={str(dp).removeprefix(str(destination_path))})"
1150+
)
11401151
return False
11411152
if should_sync(source_path=sp, destination_path=dp, size_only=size_only, **kwargs):
1153+
logger.info(f"Source path {sp} content does not match destination path {dp}")
11421154
return False
11431155
return True
11441156

test/aibs_informatics_aws_utils/test_s3.py

+27-2
Original file line numberDiff line numberDiff line change
@@ -400,8 +400,8 @@ def test__upload_path__download_s3_path__handles_file(self):
400400
upload_path(previous_file, s3_path)
401401
self.assertTrue(is_object(s3_path))
402402
with self.assertRaises(Exception):
403-
download_s3_path(s3_path, existing_file, exist_ok=False)
404-
download_s3_path(s3_path, existing_file, exist_ok=True)
403+
download_s3_path(s3_path, existing_file, force=False, exist_ok=False)
404+
download_s3_path(s3_path, existing_file, force=False, exist_ok=True)
405405
download_s3_path(s3_path, non_existing_file, exist_ok=False)
406406
self.assertEqual(existing_file.read_text(), previous_file.read_text())
407407
self.assertEqual(non_existing_file.read_text(), previous_file.read_text())
@@ -1063,6 +1063,31 @@ def test__check_paths_in_sync__simple__folders_different(self):
10631063
assert check_paths_in_sync(source_s3_path, destination_path2) is False
10641064
assert check_paths_in_sync(source_s3_path, destination_s3_path2) is False
10651065

1066+
def test__check_paths_in_sync__handles_sorting_issues__folders_same(self):
1067+
source_path = self.tmp_path()
1068+
(source_path / "a.txt").write_text("hello")
1069+
(source_path / "a").mkdir()
1070+
(source_path / "a" / "b.txt").write_text("again")
1071+
1072+
source_s3_path = self.get_s3_path("source")
1073+
self.put_object(key="source/a.txt", content="hello")
1074+
self.put_object(key="source/a/b.txt", content="again")
1075+
1076+
destination_path = self.tmp_path()
1077+
(destination_path / "a.txt").write_text("hello")
1078+
(destination_path / "a").mkdir()
1079+
(destination_path / "a" / "b.txt").write_text("again")
1080+
1081+
destination_s3_path = self.get_s3_path("destination")
1082+
self.put_object(key="destination/a.txt", content="hello")
1083+
self.put_object(key="destination/a/b.txt", content="again")
1084+
1085+
# Should succeed
1086+
assert check_paths_in_sync(source_path, destination_path) is True
1087+
assert check_paths_in_sync(source_path, destination_s3_path) is True
1088+
assert check_paths_in_sync(source_s3_path, destination_path) is True
1089+
assert check_paths_in_sync(source_s3_path, destination_s3_path) is True
1090+
10661091

10671092
@fixture(scope="function")
10681093
def s3_bucket_fixture(aws_credentials_fixture, request):

0 commit comments

Comments
 (0)