Skip to content

Commit 6e37f0c

Browse files
Markdown xss backport (#8244)
* Update helpers.py * Update mixins.py * format * format * Allow horizontal rule in markdown * More instructive error msg * Specify output_format to markdown.markdown Ref: https://python-markdown.github.io/reference/markdown/serializers/ * Cleanup * Adjust allowable markdown tags * Add unit test for malicious markdown XSS * Allow <pre> tag --------- Co-authored-by: Matthias Mair <code@mjmair.com>
1 parent 1c6d25c commit 6e37f0c

File tree

4 files changed

+94
-4
lines changed

4 files changed

+94
-4
lines changed

src/backend/InvenTree/InvenTree/helpers.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,40 @@ def remove_non_printable_characters(
810810
return cleaned
811811

812812

813+
def clean_markdown(value: str):
814+
"""Clean a markdown string.
815+
816+
This function will remove javascript and other potentially harmful content from the markdown string.
817+
"""
818+
import markdown
819+
from markdownify.templatetags.markdownify import markdownify
820+
821+
try:
822+
markdownify_settings = settings.MARKDOWNIFY['default']
823+
except (AttributeError, KeyError):
824+
markdownify_settings = {}
825+
826+
extensions = markdownify_settings.get('MARKDOWN_EXTENSIONS', [])
827+
extension_configs = markdownify_settings.get('MARKDOWN_EXTENSION_CONFIGS', {})
828+
829+
# Generate raw HTML from provided markdown (without sanitizing)
830+
# Note: The 'html' output_format is required to generate self closing tags, e.g. <tag> instead of <tag />
831+
html = markdown.markdown(
832+
value or '',
833+
extensions=extensions,
834+
extension_configs=extension_configs,
835+
output_format='html',
836+
)
837+
838+
# Clean the HTML content (for comparison). Ideally, this should be the same as the original content
839+
clean_html = markdownify(value)
840+
841+
if html != clean_html:
842+
raise ValidationError(_('Data contains prohibited markdown content'))
843+
844+
return value
845+
846+
813847
def hash_barcode(barcode_data):
814848
"""Calculate a 'unique' hash for a barcode string.
815849

src/backend/InvenTree/InvenTree/mixins.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
from rest_framework.response import Response
77

88
from InvenTree.fields import InvenTreeNotesField
9-
from InvenTree.helpers import remove_non_printable_characters, strip_html_tags
9+
from InvenTree.helpers import (
10+
clean_markdown,
11+
remove_non_printable_characters,
12+
strip_html_tags,
13+
)
1014

1115

1216
class CleanMixin:
@@ -57,18 +61,20 @@ def clean_string(self, field: str, data: str) -> str:
5761

5862
# By default, newline characters are removed
5963
remove_newline = True
64+
is_markdown = False
6065

6166
try:
6267
if hasattr(self, 'serializer_class'):
6368
model = self.serializer_class.Meta.model
6469
field = model._meta.get_field(field)
6570

6671
# The following field types allow newline characters
67-
allow_newline = [InvenTreeNotesField]
72+
allow_newline = [(InvenTreeNotesField, True)]
6873

6974
for field_type in allow_newline:
70-
if issubclass(type(field), field_type):
75+
if issubclass(type(field), field_type[0]):
7176
remove_newline = False
77+
is_markdown = field_type[1]
7278
break
7379

7480
except AttributeError:
@@ -80,6 +86,9 @@ def clean_string(self, field: str, data: str) -> str:
8086
cleaned, remove_newline=remove_newline
8187
)
8288

89+
if is_markdown:
90+
cleaned = clean_markdown(cleaned)
91+
8392
return cleaned
8493

8594
def clean_data(self, data: dict) -> dict:

src/backend/InvenTree/InvenTree/settings.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1231,23 +1231,29 @@
12311231
'abbr',
12321232
'b',
12331233
'blockquote',
1234+
'code',
12341235
'em',
12351236
'h1',
12361237
'h2',
12371238
'h3',
1239+
'h4',
1240+
'h5',
1241+
'hr',
12381242
'i',
12391243
'img',
12401244
'li',
12411245
'ol',
12421246
'p',
1247+
'pre',
1248+
's',
12431249
'strong',
1244-
'ul',
12451250
'table',
12461251
'thead',
12471252
'tbody',
12481253
'th',
12491254
'tr',
12501255
'td',
1256+
'ul',
12511257
],
12521258
}
12531259
}

src/backend/InvenTree/company/test_api.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,47 @@ def test_company_active(self):
156156
len(self.get(url, data={'active': False}, expected_code=200).data), 1
157157
)
158158

159+
def test_company_notes(self):
160+
"""Test the markdown 'notes' field for the Company model."""
161+
pk = Company.objects.first().pk
162+
163+
# Attempt to inject malicious markdown into the "notes" field
164+
xss = [
165+
'[Click me](javascript:alert(123))',
166+
'![x](javascript:alert(123))',
167+
'![Uh oh...]("onerror="alert(\'XSS\'))',
168+
]
169+
170+
for note in xss:
171+
response = self.patch(
172+
reverse('api-company-detail', kwargs={'pk': pk}),
173+
{'notes': note},
174+
expected_code=400,
175+
)
176+
177+
self.assertIn(
178+
'Data contains prohibited markdown content', str(response.data)
179+
)
180+
181+
# The following markdown is safe, and should be accepted
182+
good = [
183+
'This is a **bold** statement',
184+
'This is a *italic* statement',
185+
'This is a [link](https://www.google.com)',
186+
'This is an ![image](https://www.google.com/test.jpg)',
187+
'This is a `code` block',
188+
'This text has ~~strikethrough~~ formatting',
189+
]
190+
191+
for note in good:
192+
response = self.patch(
193+
reverse('api-company-detail', kwargs={'pk': pk}),
194+
{'notes': note},
195+
expected_code=200,
196+
)
197+
198+
self.assertEqual(response.data['notes'], note)
199+
159200

160201
class ContactTest(InvenTreeAPITestCase):
161202
"""Tests for the Contact models."""

0 commit comments

Comments
 (0)