Skip to content

Commit 2a17925

Browse files
committed
[GR-55113] TRegex: fix missing recursive back-reference guard on group escapes in OracleDBFlavor.
PullRequest: graal/18220
2 parents 8b88945 + 7c11f86 commit 2a17925

File tree

6 files changed

+76
-55
lines changed

6 files changed

+76
-55
lines changed

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java

Lines changed: 65 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -126,30 +126,30 @@ public void generatedTests() {
126126
expectSyntaxError("x{4294967296}", "", "", getTRegexEncoding(), "x{4294967296}", 0, "invalid interval in regular expression");
127127
expectSyntaxError("x{4294967297}", "", "", getTRegexEncoding(), "x{4294967297}", 0, "invalid interval in regular expression");
128128
test("x??", "", "x", 0, true, 0, 0);
129-
test("x{2}+", "", "x", 0, false);
130-
test("x{2}+", "", "xx", 0, true, 0, 2);
131-
test("x{2}+", "", "xxx", 0, true, 0, 2);
132-
test("x{2}+", "", "xxxx", 0, true, 0, 4);
133-
test("x{2}*", "", "xxxx", 0, true, 0, 4);
134-
test("x{2}*?", "", "xxxx", 0, true, 0, 0);
135-
test("x{2}*???", "", "xxxx", 0, true, 0, 0);
129+
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "x", 0, "nested quantifier in regular expression");
130+
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "xx", 0, "nested quantifier in regular expression");
131+
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "xxx", 0, "nested quantifier in regular expression");
132+
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
133+
expectSyntaxError("x{2}*", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
134+
expectSyntaxError("x{2}*?", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
135+
expectSyntaxError("x{2}*???", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
136136
test("\\A*x\\Z+", "", "x", 0, true, 0, 1);
137137
test("\\A*x\\Z+", "", "xx", 0, true, 1, 2);
138138
test("\\A+x\\Z+", "", "xx", 0, false);
139-
test("x????", "", "x?", 0, true, 0, 0);
140-
test("x????", "", "xx?", 0, true, 0, 0);
141-
test("x??????", "", "x?", 0, true, 0, 0);
142-
test("x??????", "", "xx?", 0, true, 0, 0);
139+
expectSyntaxError("x????", "", "", getTRegexEncoding(), "x?", 0, "nested quantifier in regular expression");
140+
expectSyntaxError("x????", "", "", getTRegexEncoding(), "xx?", 0, "nested quantifier in regular expression");
141+
expectSyntaxError("x??????", "", "", getTRegexEncoding(), "x?", 0, "nested quantifier in regular expression");
142+
expectSyntaxError("x??????", "", "", getTRegexEncoding(), "xx?", 0, "nested quantifier in regular expression");
143143
test("x{2}?", "", "xxxxx", 0, true, 0, 2);
144-
test("x{2}??", "", "xxxxx", 0, true, 0, 2);
145-
test("x{2}+", "", "xxxxx", 0, true, 0, 4);
146-
test("x{2}*", "", "xxxxx", 0, true, 0, 4);
147-
test("x???", "", "x", 0, true, 0, 0);
148-
test("x{2}*??", "", "xxxx", 0, true, 0, 0);
149-
test("x???", "", "x?", 0, true, 0, 0);
150-
test("x???", "", "xx?", 0, true, 0, 0);
151-
test("x?????", "", "x?", 0, true, 0, 0);
152-
test("x?????", "", "xx?", 0, true, 0, 0);
144+
expectSyntaxError("x{2}??", "", "", getTRegexEncoding(), "xxxxx", 0, "nested quantifier in regular expression");
145+
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "xxxxx", 0, "nested quantifier in regular expression");
146+
expectSyntaxError("x{2}*", "", "", getTRegexEncoding(), "xxxxx", 0, "nested quantifier in regular expression");
147+
expectSyntaxError("x???", "", "", getTRegexEncoding(), "x", 0, "nested quantifier in regular expression");
148+
expectSyntaxError("x{2}*??", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
149+
expectSyntaxError("x???", "", "", getTRegexEncoding(), "x?", 0, "nested quantifier in regular expression");
150+
expectSyntaxError("x???", "", "", getTRegexEncoding(), "xx?", 0, "nested quantifier in regular expression");
151+
expectSyntaxError("x?????", "", "", getTRegexEncoding(), "x?", 0, "nested quantifier in regular expression");
152+
expectSyntaxError("x?????", "", "", getTRegexEncoding(), "xx?", 0, "nested quantifier in regular expression");
153153
test("(a{0,1})*b\\1", "", "aab", 0, true, 0, 3, 2, 2);
154154
test("(a{0,1})*b\\1", "", "aaba", 0, true, 0, 3, 2, 2);
155155
test("(a{0,1})*b\\1", "", "aabaa", 0, true, 0, 3, 2, 2);
@@ -988,7 +988,7 @@ public void generatedTests() {
988988
test("a(()|()|b|()|())*c", "", "abbc", 0, true, 0, 4, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1);
989989
test("a(()|()|()|b|())*c", "", "abbc", 0, true, 0, 4, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1);
990990
test("a(()|()|()|()|b)*c", "", "abbc", 0, true, 0, 4, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1);
991-
test("a??+", "", "aaa", 0, true, 0, 0);
991+
expectSyntaxError("a??+", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
992992
test("()??()??()??()??()??()??()??()??\\3\\5\\7", "", "a", 0, true, 0, 0, -1, -1, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1);
993993
test("()*", "", "a", 0, true, 0, 0, 0, 0);
994994
test("(a|)*", "", "a", 0, true, 0, 1, 1, 1);
@@ -1011,24 +1011,24 @@ public void generatedTests() {
10111011
expectSyntaxError("[y-\\{][y-\\{]", "", "", getTRegexEncoding(), "I", 0, "invalid range in regular expression");
10121012
test("a?", "", "aaa", 0, true, 0, 1);
10131013
test("a??", "", "aaa", 0, true, 0, 0);
1014-
test("a???", "", "aaa", 0, true, 0, 0);
1014+
expectSyntaxError("a???", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
10151015
test("a+?", "", "aaa", 0, true, 0, 1);
1016-
test("a+??", "", "aaa", 0, true, 0, 1);
1017-
test("a??+", "", "aaa", 0, true, 0, 0);
1018-
test("a?+", "", "aaa", 0, true, 0, 3);
1019-
test("a?+?", "", "aaa", 0, true, 0, 1);
1020-
test("a?+??", "", "aaa", 0, true, 0, 1);
1021-
test("a?*??", "", "aaa", 0, true, 0, 0);
1022-
test("(a?)*??", "", "aaa", 0, true, 0, 0, -1, -1);
1016+
expectSyntaxError("a+??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1017+
expectSyntaxError("a??+", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1018+
expectSyntaxError("a?+", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1019+
expectSyntaxError("a?+?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1020+
expectSyntaxError("a?+??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1021+
expectSyntaxError("a?*??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1022+
expectSyntaxError("(a?)*??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
10231023
test("((a?)*)??", "", "aaa", 0, true, 0, 0, -1, -1, -1, -1);
10241024
test("((a?)*?)?", "", "aaa", 0, true, 0, 0, 0, 0, -1, -1);
1025-
test("a?*?", "", "aaa", 0, true, 0, 0);
1026-
test("a*??", "", "aaa", 0, true, 0, 0);
1027-
test("a+*?", "", "aaa", 0, true, 0, 0);
1025+
expectSyntaxError("a?*?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1026+
expectSyntaxError("a*??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1027+
expectSyntaxError("a+*?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
10281028
test("(a+)*?", "", "aaa", 0, true, 0, 0, -1, -1);
10291029
test("((a+)*)?", "", "aaa", 0, true, 0, 3, 0, 3, 0, 3);
1030-
test("a+*??", "", "aaa", 0, true, 0, 0);
1031-
test("a++?", "", "aaa", 0, true, 0, 3);
1030+
expectSyntaxError("a+*??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1031+
expectSyntaxError("a++?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
10321032
expectSyntaxError("[[.\\a.]]", "", "", getTRegexEncoding(), ".", 0, "invalid collation class in regular expression");
10331033
test("[[...]]", "", ".", 0, true, 0, 1);
10341034
test("[[...]]", "", "[", 0, false);
@@ -1043,12 +1043,12 @@ public void generatedTests() {
10431043
test("[[...]a]a", "", "a", 0, false);
10441044
test("[[...]a]?a", "", "a", 0, true, 0, 1);
10451045
test("[[...]a]|a", "", "a", 0, true, 0, 1);
1046-
test("a++?", "", "aaa", 0, true, 0, 3);
1047-
test("\\D|++?", "", "9", 0, true, 0, 0);
1048-
test("\\D|++?^", "", "9", 0, true, 0, 0);
1049-
test("\\S|\\D|++?^(3)", "", "9", 0, true, 0, 1, -1, -1);
1050-
test("\\S|\\D|++?^((3)|[R-_\\(/])t[[:alnum:]]c", "", "9", 0, true, 0, 1, -1, -1, -1, -1);
1051-
test("(\\d)|5+*?|[[:lower:]][[=l=]]^%", "", "\u0169\u2113%", 0, true, 0, 0, -1, -1);
1046+
expectSyntaxError("a++?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1047+
expectSyntaxError("\\D|++?", "", "", getTRegexEncoding(), "9", 0, "nested quantifier in regular expression");
1048+
expectSyntaxError("\\D|++?^", "", "", getTRegexEncoding(), "9", 0, "nested quantifier in regular expression");
1049+
expectSyntaxError("\\S|\\D|++?^(3)", "", "", getTRegexEncoding(), "9", 0, "nested quantifier in regular expression");
1050+
expectSyntaxError("\\S|\\D|++?^((3)|[R-_\\(/])t[[:alnum:]]c", "", "", getTRegexEncoding(), "9", 0, "nested quantifier in regular expression");
1051+
expectSyntaxError("(\\d)|5+*?|[[:lower:]][[=l=]]^%", "", "", getTRegexEncoding(), "\u0169\u2113%", 0, "nested quantifier in regular expression");
10521052
test("[[===]]", "", "=", 0, true, 0, 1);
10531053
expectSyntaxError("[[=\\==]]", "", "", getTRegexEncoding(), "=", 0, "invalid equivalence class in regular expression");
10541054
expectSyntaxError("[[=\\==]]", "", "", getTRegexEncoding(), "\\", 0, "invalid equivalence class in regular expression");
@@ -1093,23 +1093,23 @@ public void generatedTests() {
10931093
test("\\[[b-b]", "", "[b-b]", 0, true, 0, 2);
10941094
test("\\[c-b]", "", "[c-b]", 0, true, 0, 5);
10951095
expectSyntaxError("\\[[c-b]", "", "", getTRegexEncoding(), "[c-b]", 0, "invalid range in regular expression");
1096-
test("()?*", "", "c", 0, true, 0, 0, 0, 0);
1097-
test("()?*|", "", "c", 0, true, 0, 0, 0, 0);
1098-
test("()?*||", "", "c", 0, true, 0, 0, 0, 0);
1099-
test("()?*||a", "", "b", 0, true, 0, 0, 0, 0);
1100-
test("()?*||^a\\Zb", "", "c", 0, true, 0, 0, 0, 0);
1096+
expectSyntaxError("()?*", "", "", getTRegexEncoding(), "c", 0, "nested quantifier in regular expression");
1097+
expectSyntaxError("()?*|", "", "", getTRegexEncoding(), "c", 0, "nested quantifier in regular expression");
1098+
expectSyntaxError("()?*||", "", "", getTRegexEncoding(), "c", 0, "nested quantifier in regular expression");
1099+
expectSyntaxError("()?*||a", "", "", getTRegexEncoding(), "b", 0, "nested quantifier in regular expression");
1100+
expectSyntaxError("()?*||^a\\Zb", "", "", getTRegexEncoding(), "c", 0, "nested quantifier in regular expression");
11011101
test("ac??bc?", "", "abc", 0, true, 0, 3);
11021102
test("ac??bc?", "", "acbc", 0, true, 0, 4);
11031103
test("a?", "", "a", 0, true, 0, 1);
11041104
test("a??", "", "a", 0, true, 0, 0);
1105-
test("a???", "", "a", 0, true, 0, 0);
1106-
test("(a)???", "", "a", 0, true, 0, 0, -1, -1);
1105+
expectSyntaxError("a???", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
1106+
expectSyntaxError("(a)???", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
11071107
test("(a?)??", "", "a", 0, true, 0, 0, -1, -1);
11081108
test("(a??)?", "", "a", 0, true, 0, 0, 0, 0);
1109-
test("(a???)", "", "a", 0, true, 0, 0, 0, 0);
1110-
test("a{0,1}??", "", "a", 0, true, 0, 0);
1111-
test("a??{0,1}", "", "a", 0, true, 0, 0);
1112-
test("a{0,1}?{0,1}", "", "a", 0, true, 0, 0);
1109+
expectSyntaxError("(a???)", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
1110+
expectSyntaxError("a{0,1}??", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
1111+
expectSyntaxError("a??{0,1}", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
1112+
expectSyntaxError("a{0,1}?{0,1}", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
11131113
test("(a{0,1})*", "", "aaaaaa", 0, true, 0, 6, 6, 6);
11141114
test("(a{0,2})*", "", "aaaaaa", 0, true, 0, 6, 6, 6);
11151115
test("(a{1,2})*", "", "aaaaaa", 0, true, 0, 6, 4, 6);
@@ -1373,6 +1373,20 @@ public void generatedTests() {
13731373
test("($)*\\s*", "m", "\n ", 0, true, 0, 0, 0, 0);
13741374
test("$*\\s*", "m", "\n ", 0, true, 0, 0);
13751375
test("(^|(|a))b\\z", "", "b", 0, true, 0, 1, 0, 0, -1, -1);
1376+
test("(a*()*)*", "", "aaa", 0, true, 0, 3, 3, 3, 3, 3);
1377+
test("(a*()+)+", "", "aaa", 0, true, 0, 3, 3, 3, 3, 3);
1378+
test("(a*()+?)+", "", "aaa", 0, true, 0, 3, 3, 3, 3, 3);
1379+
test("a((b|)+)+c", "", "abbbc", 0, true, 0, 5, 4, 4, 4, 4);
1380+
test("((a|)+)+(b)+c", "", "aaabc", 0, true, 0, 5, 3, 3, 3, 3, 3, 4);
1381+
test("(a*()+?b*?)+c", "", "aabaac", 0, true, 0, 6, 5, 5, 5, 5);
1382+
test("((\\w|)()+)+", "i", "empty", 0, true, 0, 5, 5, 5, 5, 5, 5, 5);
1383+
test("(a?()?){3,4}", "", "aa", 0, true, 0, 2, 2, 2, 2, 2);
1384+
test("(a?()+){3,4}", "", "aa", 0, true, 0, 2, 2, 2, 2, 2);
1385+
test("(a*()+?)+", "", "aaa", 0, true, 0, 3, 3, 3, 3, 3);
1386+
test("a(b\\1|)*?()+c", "", "abc", 0, false);
1387+
test("()(a*\\1+)*", "", "aaa", 0, true, 0, 3, 0, 0, 3, 3);
1388+
test("(a(\\2b|)?)+\\1c", "", "aaabaaac", 0, true, 0, 8, 5, 6, 6, 6);
1389+
test("((|ab)+?w\\Z|^c)de()d", "", "ffffff", 0, false);
13761390
test("(a{1100,1100})\\1", "i", "a".repeat(2400), 0, true, 0, 2200, 0, 1100);
13771391

13781392
/* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ public void testLazyLastGroup() {
506506
public void generatedTests() {
507507
/* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */
508508

509-
// Generated using sre from CPython 3.12.3
509+
// Generated using sre from CPython 3.12.4
510510
// re._casefix._EXTRA_CASES
511511
test("i", "i", "\u0131", 0, true, 0, 1);
512512
test("s", "i", "\u017f", 0, true, 0, 1);

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/errors/OracleDBErrorMessages.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,6 @@ public interface OracleDBErrorMessages {
5151
String INVALID_RANGE = "invalid range in regular expression";
5252
String INVALID_COLLATION_ELEMENT = "invalid collation class in regular expression";
5353
String INVALID_EQUIVALENCE_CLASS = "invalid equivalence class in regular expression";
54+
String NESTED_QUANTIFIER = "nested quantifier in regular expression";
5455
String KNOWN_BUG = "known bug";
5556
}

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/dfa/DFAGenerator.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -719,8 +719,11 @@ private void tryInnerLiteralOptimization() {
719719
}
720720
bfsSwapLists();
721721
}
722-
assert literalFirstDFAState != null;
723-
assert literalLastDFAState != null;
722+
723+
if (literalFirstDFAState == null || literalLastDFAState == null) {
724+
// may happen when transitions to the literal have been pruned during DFA generation
725+
return;
726+
}
724727

725728
if (literalStart > 0) {
726729
/*

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/visitors/NFATraversalRegexASTVisitor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1092,7 +1092,6 @@ private void calculateTransitionGuards() {
10921092
pushTransitionGuard(TransitionGuard.createExitZeroWidth(quantifier));
10931093
}
10941094
}
1095-
pushRecursiveBackrefUpdates(group);
10961095
} else if (pathIsGroupEscape(element)) {
10971096
if (group.hasQuantifier()) {
10981097
Quantifier quantifier = group.getQuantifier();
@@ -1105,6 +1104,7 @@ private void calculateTransitionGuards() {
11051104
}
11061105
}
11071106
}
1107+
pushRecursiveBackrefUpdates(group);
11081108
if (needsUpdateCGStepByStep(group) && !captureGroupUpdates.get(getBoundaryIndexEnd(group))) {
11091109
pushTransitionGuard(TransitionGuard.createUpdateCG(getBoundaryIndexEnd(group)));
11101110
}

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ public RegexAST parse() throws RegexSyntaxException {
167167
astBuilder.addBackReference((Token.BackReference) token, flags.isIgnoreCase());
168168
break;
169169
case quantifier:
170+
if (prevKind == Token.Kind.quantifier) {
171+
throw syntaxError(OracleDBErrorMessages.NESTED_QUANTIFIER);
172+
}
170173
if (astBuilder.getCurTerm() == null || prevKind == Token.Kind.captureGroupBegin) {
171174
// quantifiers without target are ignored
172175
break;

0 commit comments

Comments
 (0)