From a41f1080d63a159c92280b882d9fb23e1478ccd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20M=C3=BCller?= Date: Wed, 11 Dec 2024 12:48:29 +0100 Subject: [PATCH 1/2] ``: Allow initial ] to start character ranges in POSIX regular expressions --- stl/inc/regex | 19 ++++++++++--------- .../std/tests/VSO_0000000_regex_use/test.cpp | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/stl/inc/regex b/stl/inc/regex index 17fd2ee72c..8a0d9abf9d 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -1734,7 +1734,7 @@ private: void _Do_ex_class(_Meta_type); bool _CharacterClassEscape(bool); _Prs_ret _ClassEscape2(); - _Prs_ret _ClassAtom(); + _Prs_ret _ClassAtom(bool); void _ClassRanges(); void _CharacterClass(); bool _IdentityEscape(); @@ -4111,7 +4111,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape2() { // check for class } template -_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom() { // check for class atom +_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { // check for class atom if (_Mchar == _Meta_esc) { // check for valid escape sequence _Next(); if (_L_flags & _L_grp_esc) { @@ -4134,7 +4134,10 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom() { // check for class at _Val = _Meta_lsq; return _Prs_chr; } - } else if (_Mchar == _Meta_rsq || _Mchar == _Meta_eos) { + } else if ((_Mchar == _Meta_rsq + && (!(_L_flags & _L_brk_rstr) + || !_Initial)) // initial ] does not close the class when it is not special + || _Mchar == _Meta_eos) { return _Prs_none; } else { // handle ordinary character _Val = _Char; @@ -4147,10 +4150,12 @@ template void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid class ranges _Prs_ret _Ret; + bool _Initial = true; for (;;) { // process characters through end of bracket expression - if ((_Ret = _ClassAtom()) == _Prs_none) { + if ((_Ret = _ClassAtom(_Initial)) == _Prs_none) { return; } + _Initial = false; if (_Ret == _Prs_chr && _Val == 0 && !(_L_flags & _L_bzr_chr)) { _Error(regex_constants::error_escape); @@ -4160,7 +4165,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid clas _Next(); _Elem _Chr1 = static_cast<_Elem>(_Val); const bool _Set_preceding = _Ret == _Prs_set; - if ((_Ret = _ClassAtom()) == _Prs_none) { // treat - as ordinary character + if ((_Ret = _ClassAtom(false)) == _Prs_none) { // treat - as ordinary character if (!_Set_preceding) { _Nfa._Add_char_to_class(_Chr1); } @@ -4209,10 +4214,6 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClass() { // add bracket expre _Next(); } - if ((_L_flags & _L_brk_rstr) && _Mchar == _Meta_rsq) { // insert initial ] when not special - _Nfa._Add_char_to_class(_Meta_rsq); - _Next(); - } _ClassRanges(); } diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index ba9c575940..8df8be409e 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -1171,6 +1171,23 @@ void test_gh_5253() { g_regexTester.should_not_match("a", "()*"); } +void test_gh_5364() { + // GH-5364 ``: Allow initial ] to start character ranges in basic regular expressions + for (syntax_option_type option : {basic, extended, grep, egrep}) { + g_regexTester.should_match("]", "[]-_]", option); + g_regexTester.should_match("^", "[]-_]", option); + g_regexTester.should_match("_", "[]-_]", option); + g_regexTester.should_not_match("-", "[]-_]", option); + + g_regexTester.should_match("]", "[]a]", option); + g_regexTester.should_match("a", "[]a]", option); + g_regexTester.should_not_match("a]", "[]a]", option); + g_regexTester.should_not_match("]a", "[]a]", option); + + g_regexTester.should_throw("[]", error_brack, option); + } +} + int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase(); @@ -1208,6 +1225,7 @@ int main() { test_gh_5192(); test_gh_5214(); test_gh_5253(); + test_gh_5364(); return g_regexTester.result(); } From b4dc1d2689bf265b43c758900509eafeef111e29 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 2 Apr 2025 13:38:59 -0700 Subject: [PATCH 2/2] Code review feedback. --- .../std/tests/VSO_0000000_regex_use/test.cpp | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index 8df8be409e..42a2180674 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -1172,20 +1172,43 @@ void test_gh_5253() { } void test_gh_5364() { - // GH-5364 ``: Allow initial ] to start character ranges in basic regular expressions - for (syntax_option_type option : {basic, extended, grep, egrep}) { + // GH-5364 : Allow initial ] to start character ranges in POSIX regular expressions + for (syntax_option_type option : {basic, extended, awk, grep, egrep}) { g_regexTester.should_match("]", "[]-_]", option); g_regexTester.should_match("^", "[]-_]", option); g_regexTester.should_match("_", "[]-_]", option); g_regexTester.should_not_match("-", "[]-_]", option); + g_regexTester.should_not_match("]", "[^]-_]", option); + g_regexTester.should_not_match("^", "[^]-_]", option); + g_regexTester.should_not_match("_", "[^]-_]", option); + g_regexTester.should_match("-", "[^]-_]", option); + g_regexTester.should_match("]", "[]a]", option); g_regexTester.should_match("a", "[]a]", option); + g_regexTester.should_not_match("_", "[]a]", option); g_regexTester.should_not_match("a]", "[]a]", option); g_regexTester.should_not_match("]a", "[]a]", option); + g_regexTester.should_not_match("__", "[]a]", option); + + g_regexTester.should_not_match("]", "[^]a]", option); + g_regexTester.should_not_match("a", "[^]a]", option); + g_regexTester.should_match("_", "[^]a]", option); + g_regexTester.should_not_match("a]", "[^]a]", option); + g_regexTester.should_not_match("]a", "[^]a]", option); + g_regexTester.should_not_match("__", "[^]a]", option); g_regexTester.should_throw("[]", error_brack, option); + g_regexTester.should_throw("[^]", error_brack, option); } + + g_regexTester.should_throw("[]-_]", error_brack, ECMAScript); + g_regexTester.should_throw("[^]-_]", error_brack, ECMAScript); + g_regexTester.should_throw("[]a]", error_brack, ECMAScript); + g_regexTester.should_throw("[^]a]", error_brack, ECMAScript); + + g_regexTester.should_not_match("c", "[]", ECMAScript); + g_regexTester.should_match("c", "[^]", ECMAScript); } int main() {