Skip to content

Commit 976fa6b

Browse files
authored
Fix rendering of max. quantifier in regexes (#457)
There are many different syntaxes for regular expressions. Some of them require the min. quantifier to be explicitly specified as 0 even if only the max. quantifier is given. To comply with such syntaxes, we explicitly render the max. quantifier without a min. quantifier as `{0,m}`.
1 parent 61633f7 commit 976fa6b

File tree

5 files changed

+33
-26
lines changed

5 files changed

+33
-26
lines changed

aas_core_codegen/parse/retree/_render.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def transform_quantifier(
178178
if node.maximum == 1:
179179
quantifier = "?"
180180
else:
181-
quantifier = f"{{,{node.maximum}}}"
181+
quantifier = f"{{0,{node.maximum}}}"
182182
else:
183183
quantifier = f"{{{node.minimum},{node.maximum}}}"
184184
else:

aas_core_codegen/xsd/main.py

+9-20
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,12 @@ def _undo_escaping_backslash_x_u_and_U_in_pattern(pattern: str) -> str:
127127
class _AnchorRemover(parse_retree.BaseVisitor):
128128
"""
129129
Remove anchors from a regex in-place.
130+
131+
We need to remove the anchors (``^``, ``$``) since patterns in the XSD are always
132+
anchored.
133+
134+
This is necessary since otherwise the schema validation fails.
135+
See: https://stackoverflow.com/questions/4367914/regular-expression-in-xml-schema-definition-fails
130136
"""
131137

132138
def visit_concatenation(self, node: parse_retree.Concatenation) -> None:
@@ -146,15 +152,10 @@ def visit_concatenation(self, node: parse_retree.Concatenation) -> None:
146152

147153

148154
@ensure(lambda result: (result[0] is not None) ^ (result[1] is not None))
149-
def _remove_anchors_in_pattern(pattern: str) -> Tuple[Optional[str], Optional[str]]:
150-
"""
151-
We need to remove the anchors (``^``, ``$``) since schemas are always anchored.
152-
153-
This is necessary since otherwise the schema validation fails.
154-
See: https://stackoverflow.com/questions/4367914/regular-expression-in-xml-schema-definition-fails
155+
def _translate_pattern(pattern: str) -> Tuple[Optional[str], Optional[str]]:
156+
"""Translate the pattern to obtain the equivalent in XSD."""
157+
pattern = _undo_escaping_backslash_x_in_pattern(pattern)
155158

156-
Return pattern without anchors, or error message.
157-
"""
158159
parsed, error = parse_retree.parse(values=[pattern])
159160
if error is not None:
160161
regex_line, pointer_line = parse_retree.render_pointer(error.cursor)
@@ -176,18 +177,6 @@ def _remove_anchors_in_pattern(pattern: str) -> Tuple[Optional[str], Optional[st
176177
return "".join(parts), None
177178

178179

179-
@ensure(lambda result: (result[0] is not None) ^ (result[1] is not None))
180-
def _translate_pattern(pattern: str) -> Tuple[Optional[str], Optional[str]]:
181-
"""Translate the pattern to obtain the equivalent in XSD."""
182-
result, error = _remove_anchors_in_pattern(
183-
_undo_escaping_backslash_x_in_pattern(pattern)
184-
)
185-
if error is not None:
186-
return None, error
187-
188-
return result, None
189-
190-
191180
def _generate_xs_restriction(
192181
base_type: intermediate.PrimitiveType,
193182
len_constraint: Optional[infer_for_schema.LenConstraint],
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
a{,3}
1+
a{0,3}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
a{,3}?
1+
a{0,3}?

tests/xsd/test_main.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -69,18 +69,36 @@ def test_complex(self) -> None:
6969
)
7070

7171

72-
class Test_removing_anchors_in_patterns(unittest.TestCase):
72+
class Test_translate_pattern(unittest.TestCase):
7373
# NOTE (mristin, 2022-06-18):
7474
# This is relevant since XSD are always anchored.
7575
# See: https://stackoverflow.com/questions/4367914/regular-expression-in-xml-schema-definition-fails
7676

77-
def test_table(self) -> None:
77+
def test_table_for_removing_anchors(self) -> None:
7878
for pattern, expected, identifier in [
7979
("^$", "", "empty"),
8080
("^something$", "something", "simple_literal"),
8181
("(^.*$)", "(.*)", "within_a_group"),
8282
]:
83-
fixed, error = xsd_main._remove_anchors_in_pattern(pattern)
83+
fixed, error = xsd_main._translate_pattern(pattern)
84+
assert error is None, identifier
85+
assert fixed is not None, identifier
86+
87+
self.assertEqual(expected, fixed, identifier)
88+
89+
def test_table_for_rendering_quantifiers(self) -> None:
90+
# NOTE (mristin, 2024-03-22):
91+
# We explicitly test for quantifiers to make sure that they all comply with
92+
# XSD patterns. For example, when only the maximum quantifier is given,
93+
# the minimum quantifier of 0 must be indicated explicitly.
94+
for pattern, expected, identifier in [
95+
("a{1}", "a{1}", "exact repetition"),
96+
("a{1,}", "a+", "min 1 repetition"),
97+
("a{2,}", "a{2,}", "more than 1 min repetition"),
98+
("a{,2}", "a{0,2}", "only max repetition"),
99+
("a{1,2}", "a{1,2}", "min and max repetition"),
100+
]:
101+
fixed, error = xsd_main._translate_pattern(pattern)
84102
assert error is None, identifier
85103
assert fixed is not None, identifier
86104

0 commit comments

Comments
 (0)