Skip to content

Commit 13b51f1

Browse files
authored
Merge pull request #390 from projectcaluma/feat-upload-file-to-s3storage
Use Django storage backend to handle file-upload to s3 storage
2 parents 8467b90 + 4f39f90 commit 13b51f1

33 files changed

+1307
-935
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ repos:
1111
stages: [commit]
1212
name: isort
1313
language: system
14-
entry: isort -y
14+
entry: isort
1515
types: [python]
1616
- id: flake8
1717
stages: [commit]

README.md

+30-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ Different environment variable types are explained at [django-environ](https://g
4949

5050
Additional authorization and validation of the models is handled by [DGAP](https://github.com/adfinis/django-generic-api-permissions/?tab=readme-ov-file#usage---for-people-deploying-a-dgap-equipped-app).
5151

52-
5352
#### Common
5453

5554
A list of configuration options which you need
@@ -76,12 +75,42 @@ A list of configuration options which you need
7675
- Data validation configuration
7776
- `ALEXANDRIA_VALIDATION_CLASSES`: Comma-separated list of [DGAP](https://github.com/adfinis/django-generic-api-permissions/?tab=readme-ov-file#data-validation) classes that define custom validations
7877
- Thumbnail configuration (optional)
78+
7979
- `ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION`: Set to `false` to disable thumbnail generation
8080
- Check the docker-compose file for an example on how to set up generation with s3 hooks
8181
- `ALEXANDRIA_THUMBNAIL_WIDTH`: Width of generated thumbnails
8282
- `ALEXANDRIA_THUMBNAIL_HEIGHT`: Height of generated thumbnails
8383
- `ALEXANDRIA_ENABLE_CHECKSUM`: Set to `false` to disable file checksums. Checksums are calculated after upload to allow later verification (not implemented in Alexandria)
8484

85+
- Storage configuration
86+
87+
Storage backends are configured globally. The storable object bears information on the encryption status allowing the ORM appropriate handling of the data.
88+
89+
- `FILE_STORAGE_BACKEND`: Set the backend for file uploads. `django-storages` is available (default: `django.core.files.storage.FileSystemStorage`)
90+
91+
Encryption:
92+
93+
- `ALEXANDRIA_ENABLE_AT_REST_ENCRYPTION`: Set to `true` to enable at-rest encryption of files (enabling this causes an error unless `ALEXANDRIA_ENCRYPTRION_METHOD` is set to a supported method)
94+
- `ALEXANDRIA_ENCRYPTION_METHOD`: Define encryption method that is applied to uploaded objects. Available values depend on storage backend's capabilities (default: `None`)
95+
- available methods
96+
- None: no at-rest encryption
97+
- `ssec-global`: encrypt all files with the same key (requires: `FILE_STORAGE_BACKEND`: `alexandria.storages.s3.S3Storage)
98+
99+
Supported backends:
100+
101+
- `FileSystemStorage`: files are stored to the `MEDIA_ROOT` directory
102+
- `S3Storage`: files are uploaded to the S3 object storage configured accordingly
103+
104+
required configuations:
105+
106+
- `AWS_S3_ACCESS_KEY_ID`: identity
107+
- `AWS_S3_SECRET_ACCESS_KEY`: password to authorize identity
108+
- `AWS_S3_ENDPOINT_URL`: the url of the service
109+
- `AWS_STORAGE_BUCKET_NAME`: the bucket name of the storage to access objects in path notation (not subdomain)
110+
111+
The development setup features a minio service, implementing the S3 protocol.
112+
To use SSE-C in development make sure to generate a certificate for the minio container and set `AWS_S3_VERIFY` to `false`.
113+
85114
For development, you can also set the following environemnt variables to help you:
86115

87116
- `ALEXANDRIA_DEV_AUTH_BACKEND`: Set this to "true" to enable a fake auth backend that simulates an authenticated user. Requires `DEBUG` to be set to `True` as well.

alexandria/conftest.py

+17-62
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,17 @@
11
import importlib
22
import inspect
3+
import shutil
34
import sys
4-
import time
5-
from io import BytesIO
5+
from pathlib import Path
66

77
import pytest
88
from django.apps import apps
99
from django.core.cache import cache
1010
from factory.base import FactoryMetaClass
11-
from minio import Minio
12-
from minio.datatypes import Object as MinioStatObject
13-
from minio.helpers import ObjectWriteResult
1411
from pytest_factoryboy import register
1512
from pytest_factoryboy.fixture import Box
1613
from rest_framework.test import APIClient
17-
from urllib3 import HTTPResponse
1814

19-
from alexandria.core.storage_clients import Minio as MinioStorageClient
20-
from alexandria.core.tests import file_data
2115
from alexandria.oidc_auth.models import OIDCUser
2216

2317

@@ -35,6 +29,21 @@ def register_module(module):
3529
register_module(importlib.import_module(".core.factories", "alexandria"))
3630

3731

32+
@pytest.fixture(autouse=True)
33+
def _default_file_storage_backend(settings):
34+
settings.DEFAULT_FILE_STORAGE = "django.core.files.storage.FileSystemStorage"
35+
settings.ALEXANDRIA_ENABLE_AT_REST_ENCRYPTION = False
36+
37+
38+
@pytest.fixture(autouse=True)
39+
def _make_clean_media_dir(settings):
40+
test_media_root = Path(settings.MEDIA_ROOT) / "test"
41+
test_media_root.mkdir(parents=True, exist_ok=True)
42+
settings.MEDIA_ROOT = str(test_media_root)
43+
pytest.yield_fixture
44+
shutil.rmtree(test_media_root)
45+
46+
3847
@pytest.fixture
3948
def admin_groups():
4049
return ["admin"]
@@ -89,57 +98,3 @@ def reset_config_classes(settings):
8998
# First, set config to original value
9099
core_config = apps.get_app_config("generic_permissions")
91100
core_config.ready()
92-
93-
94-
@pytest.fixture
95-
def minio_mock(mocker, settings):
96-
def presigned_get_object_side_effect(bucket, object_name, expires):
97-
return f"http://minio/download-url/{object_name}"
98-
99-
def get_object_side_effect(bucket, object_name):
100-
file = object_name.split("_", 1)[1].encode()
101-
if object_name.endswith(".unsupported"):
102-
file = file_data.unsupported
103-
return HTTPResponse(
104-
body=BytesIO(file),
105-
preload_content=False,
106-
)
107-
108-
stat_response = MinioStatObject(
109-
settings.ALEXANDRIA_MINIO_STORAGE_MEDIA_BUCKET_NAME,
110-
"some-file.pdf",
111-
time.struct_time((2019, 4, 5, 7, 0, 49, 4, 95, 0)),
112-
"0c81da684e6aaef48e8f3113e5b8769b",
113-
8200,
114-
content_type="application/pdf",
115-
metadata={"X-Amz-Meta-Testtag": "super_file"},
116-
)
117-
mocker.patch.object(Minio, "presigned_get_object")
118-
mocker.patch.object(Minio, "presigned_put_object")
119-
mocker.patch.object(Minio, "stat_object")
120-
mocker.patch.object(Minio, "bucket_exists")
121-
mocker.patch.object(Minio, "make_bucket")
122-
mocker.patch.object(Minio, "remove_object")
123-
mocker.patch.object(Minio, "copy_object")
124-
mocker.patch.object(Minio, "get_object")
125-
mocker.patch.object(Minio, "put_object")
126-
Minio.get_object.side_effect = get_object_side_effect
127-
Minio.presigned_get_object.side_effect = presigned_get_object_side_effect
128-
Minio.put_object.return_value = ObjectWriteResult(
129-
bucket_name=settings.ALEXANDRIA_MINIO_STORAGE_MEDIA_BUCKET_NAME,
130-
object_name="some-file.pdf",
131-
version_id="",
132-
etag="af1421c17294eed533ec99eb82b468fb",
133-
http_headers="",
134-
)
135-
Minio.presigned_put_object.return_value = "http://minio/upload-url"
136-
Minio.stat_object.return_value = stat_response
137-
Minio.bucket_exists.return_value = True
138-
return Minio
139-
140-
141-
@pytest.fixture
142-
def mock_s3storage(minio_mock, requests_mock):
143-
minio = MinioStorageClient()
144-
mock = requests_mock.put(minio.upload_url("the-object"), status_code=201)
145-
return mock

alexandria/core/factories.py

+41-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
import factory
12
from factory import Faker, SubFactory, post_generation
23
from factory.django import DjangoModelFactory
34

5+
from alexandria.core.tests import file_data
6+
47
from . import models
58

69

@@ -89,8 +92,45 @@ def marks(self, create, extracted, **kwargs): # pragma: todo cover
8992

9093

9194
class FileFactory(BaseFactory):
92-
name = Faker("name")
95+
"""Factory for File.
96+
97+
Usage:
98+
If you want a nice set of Document, File with thumbnails:
99+
100+
`thumb = FileFactory(variant=File.Variant.THUMBNAIL)`
101+
102+
for multiple versions:
103+
`thumb_v2 = FileFactory(variant=File.Variant.THUMBNAIL, document=thumb.document)`
104+
105+
106+
"""
107+
108+
name = factory.Maybe(
109+
factory.LazyAttribute(lambda o: o.variant == models.File.Variant.ORIGINAL),
110+
yes_declaration=Faker("name"),
111+
no_declaration=factory.LazyAttribute(
112+
lambda o: f"{o.original.name}_preview.jpg"
113+
),
114+
)
93115
document = SubFactory(DocumentFactory)
116+
variant = models.File.Variant.ORIGINAL
117+
content = factory.Maybe(
118+
factory.LazyAttribute(lambda o: o.variant == models.File.Variant.THUMBNAIL),
119+
yes_declaration=factory.django.ImageField(
120+
filename="thumb_preview.jpg", width=256, height=256, format="JPEG"
121+
),
122+
no_declaration=factory.django.FileField(
123+
filename="the_file.png", data=file_data.png
124+
),
125+
)
126+
original = factory.Maybe(
127+
factory.LazyAttribute(lambda o: o.variant == models.File.Variant.THUMBNAIL),
128+
SubFactory(
129+
"alexandria.core.factories.FileFactory",
130+
variant=models.File.Variant.ORIGINAL,
131+
document=factory.SelfAttribute("..document"),
132+
),
133+
)
94134

95135
class Meta:
96136
model = models.File

alexandria/core/management/commands/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from django.conf import settings
2+
from django.core.files.storage import get_storage_class
3+
from django.core.management.base import BaseCommand
4+
from django.db import transaction
5+
from django.db.models import Q
6+
from tqdm import tqdm
7+
8+
from alexandria.core.models import File
9+
from alexandria.storages.backends.s3 import SsecGlobalS3Storage
10+
11+
# This is needed to disable the warning about not verifying the SSL certificate.
12+
# It spams the output otherwise.
13+
if not settings.AWS_S3_VERIFY:
14+
import urllib3
15+
16+
urllib3.disable_warnings()
17+
18+
19+
class Command(BaseCommand):
20+
help = "Swaps plain text file content to encrypted content"
21+
22+
def add_arguments(self, parser):
23+
parser.add_argument("--dry", dest="dry", action="store_true", default=False)
24+
25+
@transaction.atomic
26+
def handle(self, *args, **options):
27+
if (
28+
not settings.ALEXANDRIA_ENABLE_AT_REST_ENCRYPTION
29+
or settings.ALEXANDRIA_ENCRYPTION_METHOD
30+
== File.EncryptionStatus.NOT_ENCRYPTED.value
31+
):
32+
return self.stdout.write(
33+
self.style.WARNING(
34+
"Encryption is not enabled. Skipping encryption of files."
35+
)
36+
)
37+
# disable checksums to prevent errors
38+
checksum = settings.ALEXANDRIA_ENABLE_CHECKSUM
39+
settings.ALEXANDRIA_ENABLE_CHECKSUM = False
40+
41+
sid = transaction.savepoint()
42+
43+
# flip between default and encrypted storage to have the correct parameters in the requests
44+
DefaultStorage = get_storage_class()
45+
for file in tqdm(
46+
File.objects.filter(
47+
Q(encryption_status=File.EncryptionStatus.NOT_ENCRYPTED)
48+
| Q(encryption_status__isnull=True)
49+
),
50+
):
51+
# get original file content
52+
file.content.storage = DefaultStorage()
53+
content = file.content.open()
54+
55+
if not options["dry"]:
56+
# overwrite with encrypted content
57+
file.content.storage = SsecGlobalS3Storage()
58+
file.content.save(file.content.name, content)
59+
60+
# set encryption status
61+
file.encryption_status = settings.ALEXANDRIA_ENCRYPTION_METHOD
62+
file.save()
63+
64+
settings.ALEXANDRIA_ENABLE_CHECKSUM = checksum
65+
if options["dry"]: # pragma: no cover
66+
transaction.savepoint_rollback(sid)
67+
else:
68+
transaction.savepoint_commit(sid)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Generated by Django 3.2.23 on 2023-12-13 18:00
2+
3+
from django.db import migrations, models
4+
5+
import alexandria.core.models
6+
import alexandria.storages.fields
7+
8+
9+
def migrate_file_references(apps, schema_editor):
10+
"""Migrate the download_url based content access to storage backend access.
11+
12+
The simple object storage's object_name was simply the file's name. Setting
13+
the name to the file name in the storage attribute will refer retrieve
14+
the same object if it still exists.
15+
Just make sure to point the object storage client to the right endpoint
16+
and bucket before accessing the file.
17+
"""
18+
File = apps.get_model("alexandria_core", "File")
19+
for file in File.objects.iterator():
20+
file.content.name = alexandria.core.models.upload_file_content_to(file, None)
21+
file.save()
22+
23+
24+
class Migration(migrations.Migration):
25+
dependencies = [
26+
("alexandria_core", "0012_tag_uuid_schema"),
27+
]
28+
29+
operations = [
30+
migrations.RemoveField(
31+
model_name="file",
32+
name="upload_status",
33+
),
34+
migrations.AddField(
35+
model_name="file",
36+
name="content",
37+
field=alexandria.storages.fields.DynamicStorageFileField(
38+
default="", upload_to=alexandria.core.models.upload_file_content_to
39+
),
40+
preserve_default=False,
41+
),
42+
migrations.AddField(
43+
model_name="file",
44+
name="encryption_status",
45+
field=models.CharField(
46+
blank=True,
47+
choices=[
48+
(None, "Encryption status not set"),
49+
("none", "No at-rest enryption"),
50+
("ssec-global", "SSE-C global key encryption (AES256)"),
51+
("ssec-object", "SSE-C per object encryption (AES256)"),
52+
],
53+
default=None,
54+
max_length=12,
55+
null=True,
56+
),
57+
),
58+
migrations.RunPython(migrate_file_references, migrations.RunPython.noop),
59+
]

0 commit comments

Comments
 (0)