From 1f520506b57342b35ab527146d34464874dedf4f Mon Sep 17 00:00:00 2001 From: Javier Romero Castro Date: Fri, 3 Nov 2023 17:08:46 +0100 Subject: [PATCH] file manager: copy from other bucket * closes https://github.com/zenodo/rdm-project/issues/508 Co-authored-by: Javier Romero Castro Co-authored-by: Karolina Przerwa --- invenio_records_resources/records/api.py | 11 +-- .../records/systemfields/files/manager.py | 67 +++++++++++++++---- setup.cfg | 2 +- 3 files changed, 61 insertions(+), 19 deletions(-) diff --git a/invenio_records_resources/records/api.py b/invenio_records_resources/records/api.py index 8919e62d..a097a5a0 100644 --- a/invenio_records_resources/records/api.py +++ b/invenio_records_resources/records/api.py @@ -66,13 +66,14 @@ def get_by_key(cls, record_id, key): @classmethod def list_by_record(cls, record_id, with_deleted=False): """List all record files by record ID.""" - query = cls.model_cls.query.filter(cls.model_cls.record_id == record_id) + with db.session.no_autoflush: + query = cls.model_cls.query.filter(cls.model_cls.record_id == record_id) - if not with_deleted: - query = query.filter(cls.model_cls.is_deleted != True) + if not with_deleted: + query = query.filter(cls.model_cls.is_deleted != True) - for obj in query: - yield cls(obj.data, model=obj) + for obj in query: + yield cls(obj.data, model=obj) @property def file(self): diff --git a/invenio_records_resources/records/systemfields/files/manager.py b/invenio_records_resources/records/systemfields/files/manager.py index e4ab244b..1716a75b 100644 --- a/invenio_records_resources/records/systemfields/files/manager.py +++ b/invenio_records_resources/records/systemfields/files/manager.py @@ -50,15 +50,19 @@ } """ +import uuid from collections.abc import MutableMapping +from datetime import datetime from functools import wraps +from invenio_db import db from invenio_files_rest.errors import ( BucketLockedError, InvalidKeyError, InvalidOperationError, ) from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion +from sqlalchemy import insert def ensure_enabled(func): @@ -266,24 +270,61 @@ def teardown(self, full=True): self._order = [] def copy(self, src_files, copy_obj=True): - """Copy from another file manager.""" + """Copy from another file manager. + + This method will copy all object versions to the `self.bucket` assuming + that the latter is a new empty bucket. + """ self.enabled = src_files.enabled if not self.enabled: return - for key, rf in src_files.items(): - # Copy object version of link existing? - if copy_obj: - dst_obj = rf.object_version.copy(bucket=self.bucket) - else: - dst_obj = rf.object_version - - # Copy file record - if rf.metadata is not None: - self[key] = dst_obj, rf.metadata - else: - self[key] = dst_obj + bucket_objects = ObjectVersion.query.filter_by(bucket_id=self.bucket_id).count() + if bucket_objects == 0: + # bucket is empty + # copy all object versions to self.bucket + objs = ObjectVersion.copy_from(src_files.bucket_id, self.bucket_id) + ovs_by_key = {obj["key"]: obj for obj in objs} + rf_to_bulk_insert = [] + + record_id = self.record.id + for key, rf in src_files.items(): + new_rf = { + "id": uuid.uuid4(), + "created": datetime.utcnow(), + "updated": datetime.utcnow(), + "key": key, + "record_id": record_id, + "version_id": 1, + "object_version_id": ovs_by_key[key]["version_id"], + "json": rf.metadata or {}, + } + rf_to_bulk_insert.append(new_rf) + + if rf_to_bulk_insert: + db.session.execute(insert(self.file_cls.model_cls), rf_to_bulk_insert) + # we need to populate entries from DB so we store the record file model + # instance + if not self._entries: + self._entries = {} + for rf in self.file_cls.list_by_record(self.record.id): + self._entries[rf.key] = rf + else: + # if bucket is not empty then we fallback to the slow process of copying + # files + for key, rf in src_files.items(): + # Copy object version of link existing? + if copy_obj: + dst_obj = rf.object_version.copy(bucket=self.bucket) + else: + dst_obj = rf.object_version + + # Copy file record + if rf.metadata is not None: + self[key] = dst_obj, rf.metadata + else: + self[key] = dst_obj self.default_preview = src_files.default_preview self.order = src_files.order diff --git a/setup.cfg b/setup.cfg index d2cd654e..e2078b46 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,7 +54,7 @@ tests = invenio-db[postgresql,mysql,versioning]>=1.0.14,<2.0.0 pytest-invenio>=2.1.0,<3.0.0 pytest-mock>=1.6.0 - sphinx>=4.2.0,<5 + sphinx>=5,<6 elasticsearch7 = invenio-search[elasticsearch7]>=2.1.0,<3.0.0 opensearch1 =