|
| 1 | +import string |
| 2 | +from typing import ( |
| 3 | + TYPE_CHECKING, |
| 4 | +) |
| 5 | + |
| 6 | +from azul import ( |
| 7 | + CatalogName, |
| 8 | + cache, |
| 9 | + cached_property, |
| 10 | + config, |
| 11 | +) |
| 12 | +from azul.deployment import ( |
| 13 | + R, |
| 14 | + aws, |
| 15 | +) |
| 16 | +from azul.drs import ( |
| 17 | + AccessMethod, |
| 18 | +) |
| 19 | +from azul.http import ( |
| 20 | + HasCachedHttpClient, |
| 21 | +) |
| 22 | +from azul.plugins import ( |
| 23 | + File, |
| 24 | + RepositoryPlugin, |
| 25 | +) |
| 26 | +from azul.plugins.metadata.hca import ( |
| 27 | + HCAFile, |
| 28 | +) |
| 29 | +from azul.service.storage_service import ( |
| 30 | + StorageService, |
| 31 | +) |
| 32 | + |
| 33 | +if TYPE_CHECKING: |
| 34 | + from mypy_boto3_s3.service_resource import ( |
| 35 | + MultipartUpload, |
| 36 | + ) |
| 37 | + |
| 38 | + |
| 39 | +class MirrorService(HasCachedHttpClient): |
| 40 | + |
| 41 | + @cached_property |
| 42 | + def _storage(self) -> StorageService: |
| 43 | + return StorageService(bucket_name=aws.mirror_bucket) |
| 44 | + |
| 45 | + @cache |
| 46 | + def repository_plugin(self, catalog: CatalogName) -> RepositoryPlugin: |
| 47 | + return RepositoryPlugin.load(catalog).create(catalog) |
| 48 | + |
| 49 | + def begin_mirroring_file(self, file: File) -> str: |
| 50 | + """ |
| 51 | + Initiate a multipart upload of the given file and return the upload ID. |
| 52 | + """ |
| 53 | + assert isinstance(file, HCAFile), R('Only HCA catalogs are supported') |
| 54 | + upload = self._storage.create_multipart_upload(object_key=self.mirror_object_key(file), |
| 55 | + content_type=file.content_type) |
| 56 | + return upload.upload_id |
| 57 | + |
| 58 | + def mirror_file_part(self, |
| 59 | + catalog: CatalogName, |
| 60 | + file: File, |
| 61 | + part: File.PartRange, |
| 62 | + upload_id: str): |
| 63 | + """ |
| 64 | + Upload a part of a file to a multipart upload begun with |
| 65 | + :meth:`begin_mirroring_file` |
| 66 | + """ |
| 67 | + download_url = self._get_repository_url(catalog, file) |
| 68 | + upload = self._get_upload(file, upload_id) |
| 69 | + response = self._http_client.request('GET', |
| 70 | + download_url, |
| 71 | + headers={'Range': f'bytes={part.start}-{part.end}'}) |
| 72 | + if response.status == 206: |
| 73 | + return self._storage.upload_multipart_part(response.data, |
| 74 | + part.part_number + 1, |
| 75 | + upload) |
| 76 | + else: |
| 77 | + raise RuntimeError('Unexpected response from repository', response.status) |
| 78 | + |
| 79 | + def finish_mirroring_file(self, |
| 80 | + file: File, |
| 81 | + upload_id: str, |
| 82 | + *, |
| 83 | + etags: list[str] | None): |
| 84 | + """ |
| 85 | + Complete a multipart upload begun with :meth:`begin_mirroring_file`. |
| 86 | + If ETags are not provided, the caller is responsible for ensuring that |
| 87 | + all previous calls to :meth:`mirror_file_part` were successful. |
| 88 | + """ |
| 89 | + upload = self._get_upload(file, upload_id) |
| 90 | + self._storage.complete_multipart_upload(upload, etags) |
| 91 | + |
| 92 | + def get_mirror_url(self, file: File) -> str: |
| 93 | + return self._storage.get_presigned_url(key=self.mirror_object_key(file), |
| 94 | + file_name=file.name) |
| 95 | + |
| 96 | + def mirror_object_key(self, file: File) -> str: |
| 97 | + return self._file_key('file', file) |
| 98 | + |
| 99 | + def _file_key(self, prefix: str, file: File) -> str: |
| 100 | + digest, digest_type = file.digest() |
| 101 | + assert all(c in string.hexdigits for c in digest), R( |
| 102 | + 'Expected a hexadecimal digest', digest) |
| 103 | + return f'{prefix}/{digest.lower()}.{digest_type}' |
| 104 | + |
| 105 | + def _get_repository_url(self, catalog: CatalogName, file: File): |
| 106 | + assert config.is_tdr_enabled(catalog), R('Only TDR catalogs are supported') |
| 107 | + drs = self.repository_plugin(catalog).drs_client(authentication=None) |
| 108 | + access = drs.get_object(file.drs_uri, AccessMethod.gs) |
| 109 | + assert access.method is AccessMethod.https, access |
| 110 | + return access.url |
| 111 | + |
| 112 | + def _get_upload(self, file: File, upload_id: str) -> 'MultipartUpload': |
| 113 | + return self._storage.load_multipart_upload(object_key=self.mirror_object_key(file), |
| 114 | + upload_id=upload_id) |
0 commit comments