From ded13fd31daf65243195173fca0ba9805b0d2d9c Mon Sep 17 00:00:00 2001 From: Pablo Tamarit Date: Mon, 24 Feb 2025 16:09:32 +0100 Subject: [PATCH] views: signposting: files: fix filename encoding issues for downloads --- invenio_rdm_records/resources/urls.py | 10 +++++ .../test_signposting_serializer.py | 41 +++++++++++++++---- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/invenio_rdm_records/resources/urls.py b/invenio_rdm_records/resources/urls.py index 4cb2a07bb..7dbe5531f 100644 --- a/invenio_rdm_records/resources/urls.py +++ b/invenio_rdm_records/resources/urls.py @@ -26,6 +26,9 @@ all. """ +import unicodedata +from urllib.parse import quote + from flask import current_app @@ -49,6 +52,13 @@ def download_url_for(pid_value="", filename=""): """Return url for download route.""" url_prefix = current_app.config.get("SITE_UI_URL", "") + # see https://github.com/pallets/werkzeug/blob/main/src/werkzeug/utils.py#L456-L465 + try: + filename.encode("ascii") + except UnicodeEncodeError: + # safe = RFC 5987 attr-char + filename = quote(filename, safe="!#$&+-.^_`|~") + # We use [] so that this fails and brings to attention the configuration # problem if APP_RDM_ROUTES.record_file_download is missing url_path = ( diff --git a/tests/resources/serializers/test_signposting_serializer.py b/tests/resources/serializers/test_signposting_serializer.py index 2a79ce0c0..8aea59bbe 100644 --- a/tests/resources/serializers/test_signposting_serializer.py +++ b/tests/resources/serializers/test_signposting_serializer.py @@ -8,13 +8,38 @@ """Resources serializers tests.""" +import pytest + from invenio_rdm_records.resources.serializers import ( FAIRSignpostingProfileLvl1Serializer, FAIRSignpostingProfileLvl2Serializer, ) -def test_signposting_serializer_full(running_app, full_record_to_dict): +@pytest.fixture +def full_record_to_dict_signposting(full_record_to_dict): + # raise ValueError("boom boom") + full_record_to_dict["files"] = { + "count": 1, + "enabled": True, + "entries": { + "testé.txt": { + "checksum": "md5:e795abeef2c38de2b064be9f6364ceae", + "ext": "txt", + "id": "d22bde05-5a36-48a3-86a7-acf2c4bb6f64", + "key": "testé.txt", + "metadata": None, + "mimetype": "text/plain", + "size": 9, + }, + }, + "order": [], + "total_bytes": 9, + } + return full_record_to_dict + + +def test_signposting_serializer_full(running_app, full_record_to_dict_signposting): expected = { "linkset": [ # Landing page Link Context Object @@ -82,7 +107,7 @@ def test_signposting_serializer_full(running_app, full_record_to_dict): ], "item": [ { - "href": "https://127.0.0.1:5000/records/12345-abcde/files/test.txt", # noqa + "href": "https://127.0.0.1:5000/records/12345-abcde/files/test%C3%A9.txt", # noqa "type": "text/plain", } ], @@ -97,7 +122,7 @@ def test_signposting_serializer_full(running_app, full_record_to_dict): }, # Content Resource (file) Link Context Object { - "anchor": "https://127.0.0.1:5000/records/12345-abcde/files/test.txt", + "anchor": "https://127.0.0.1:5000/records/12345-abcde/files/test%C3%A9.txt", "collection": [ { "href": "https://127.0.0.1:5000/records/12345-abcde", @@ -118,15 +143,17 @@ def test_signposting_serializer_full(running_app, full_record_to_dict): ] } - serialized = FAIRSignpostingProfileLvl2Serializer().dump_obj(full_record_to_dict) + serialized = FAIRSignpostingProfileLvl2Serializer().dump_obj( + full_record_to_dict_signposting + ) assert expected == serialized -def test_signposting_lvl1_serializer_full(running_app, full_record_to_dict): +def test_signposting_lvl1_serializer_full(running_app, full_record_to_dict_signposting): ui_url = "https://127.0.0.1:5000/records/12345-abcde" api_url = "https://127.0.0.1:5000/api/records/12345-abcde" - filename = "test.txt" + filename = "test%C3%A9.txt" expected = [ f' ; rel="author"', @@ -154,7 +181,7 @@ def test_signposting_lvl1_serializer_full(running_app, full_record_to_dict): ] serialized = FAIRSignpostingProfileLvl1Serializer().serialize_object( - full_record_to_dict + full_record_to_dict_signposting ) assert expected == serialized.split(" , ")