diff --git a/opencodelists/forms.py b/opencodelists/forms.py index e12ce7e6..5c33f58f 100644 --- a/opencodelists/forms.py +++ b/opencodelists/forms.py @@ -77,13 +77,21 @@ def clean_csv_data(self): if not f: return + try: + data = f.read().decode("utf-8-sig") + except UnicodeDecodeError as exception: + raise forms.ValidationError( + "File could not be read. Please ensure the file contains CSV " + "data (not Excel, for example). It should be a text file encoded " + f"in the UTF-8 format. Error details: {exception}." + ) + # Eventually coding system version may be a selectable field, but for now it - # just defaults to using the most recent one + # just defaults to using the most recent one. coding_system = CODING_SYSTEMS[ self.cleaned_data["coding_system_id"] ].get_by_release_or_most_recent() - data = f.read().decode("utf-8-sig") codes = [row[0] for row in csv.reader(StringIO(data)) if row] validate_csv_data_codes(coding_system, codes) return codes diff --git a/opencodelists/tests/test_forms.py b/opencodelists/tests/test_forms.py index c3cda63b..3afc2ef0 100644 --- a/opencodelists/tests/test_forms.py +++ b/opencodelists/tests/test_forms.py @@ -1,7 +1,8 @@ import pytest from django.core.exceptions import ValidationError +from django.core.files.base import ContentFile -from ..forms import MembershipCreateForm, UserPasswordForm +from ..forms import CodelistCreateForm, MembershipCreateForm, UserPasswordForm def test_userpasswordform_different_passwords(): @@ -48,3 +49,41 @@ def test_membership_create_form_no_user(organisation): ) assert form.is_valid() is False assert form.errors == {"user_idenitfier": ["User unknown@test.com does not exist"]} + + +class TestCodelistCreateForm: + def _bind_form(self, file): + return CodelistCreateForm( + data={ + "name": "test name", + "coding_system_id": "snomedct", + }, + files={ + "csv_data": file, + }, + owner_choices=[], + ) + + def test_bound_form_valid(self, disorder_of_elbow_codes): + valid_csv_data = "\n".join(disorder_of_elbow_codes) + file = ContentFile(valid_csv_data.encode("utf-8")) + file.name = "valid_data.csv" + form = self._bind_form(file) + + assert form.is_valid() + assert form.cleaned_data["name"] == "test name" + assert form.cleaned_data["coding_system_id"] == "snomedct" + assert form.cleaned_data["csv_data"] == disorder_of_elbow_codes + + def test_bound_form_invalid_file_unicode_error(self): + invalid_utf8_data = b"\xff\xfe\xfd" + file = ContentFile(invalid_utf8_data) + file.name = "invalid_data.csv" + form = self._bind_form(file) + + assert not form.is_valid() + errors = form.errors.get("csv_data") + len(errors) == 1 + assert ( + "File could not be read. Please ensure the file contains CSV" in errors[0] + )