Skip to content

<regex>: Allow initial ] to start character ranges in POSIX regular expressions #5364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@ private:
void _Do_ex_class(_Meta_type);
bool _CharacterClassEscape(bool);
_Prs_ret _ClassEscape2();
_Prs_ret _ClassAtom();
_Prs_ret _ClassAtom(bool);
void _ClassRanges();
void _CharacterClass();
bool _IdentityEscape();
Expand Down Expand Up @@ -4111,7 +4111,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape2() { // check for class
}

template <class _FwdIt, class _Elem, class _RxTraits>
_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom() { // check for class atom
_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { // check for class atom
if (_Mchar == _Meta_esc) { // check for valid escape sequence
_Next();
if (_L_flags & _L_grp_esc) {
Expand All @@ -4134,7 +4134,10 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom() { // check for class at
_Val = _Meta_lsq;
return _Prs_chr;
}
} else if (_Mchar == _Meta_rsq || _Mchar == _Meta_eos) {
} else if ((_Mchar == _Meta_rsq
&& (!(_L_flags & _L_brk_rstr)
|| !_Initial)) // initial ] does not close the class when it is not special
|| _Mchar == _Meta_eos) {
return _Prs_none;
} else { // handle ordinary character
_Val = _Char;
Expand All @@ -4147,10 +4150,12 @@ template <class _FwdIt, class _Elem, class _RxTraits>
void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid class ranges
_Prs_ret _Ret;

bool _Initial = true;
for (;;) { // process characters through end of bracket expression
if ((_Ret = _ClassAtom()) == _Prs_none) {
if ((_Ret = _ClassAtom(_Initial)) == _Prs_none) {
return;
}
_Initial = false;

if (_Ret == _Prs_chr && _Val == 0 && !(_L_flags & _L_bzr_chr)) {
_Error(regex_constants::error_escape);
Expand All @@ -4160,7 +4165,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid clas
_Next();
_Elem _Chr1 = static_cast<_Elem>(_Val);
const bool _Set_preceding = _Ret == _Prs_set;
if ((_Ret = _ClassAtom()) == _Prs_none) { // treat - as ordinary character
if ((_Ret = _ClassAtom(false)) == _Prs_none) { // treat - as ordinary character
if (!_Set_preceding) {
_Nfa._Add_char_to_class(_Chr1);
}
Expand Down Expand Up @@ -4209,10 +4214,6 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClass() { // add bracket expre
_Next();
}

if ((_L_flags & _L_brk_rstr) && _Mchar == _Meta_rsq) { // insert initial ] when not special
_Nfa._Add_char_to_class(_Meta_rsq);
_Next();
}
_ClassRanges();
}

Expand Down
18 changes: 18 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,23 @@ void test_gh_5253() {
g_regexTester.should_not_match("a", "()*");
}

void test_gh_5364() {
// GH-5364 `<regex>`: Allow initial ] to start character ranges in basic regular expressions
for (syntax_option_type option : {basic, extended, grep, egrep}) {
g_regexTester.should_match("]", "[]-_]", option);
g_regexTester.should_match("^", "[]-_]", option);
g_regexTester.should_match("_", "[]-_]", option);
g_regexTester.should_not_match("-", "[]-_]", option);

g_regexTester.should_match("]", "[]a]", option);
g_regexTester.should_match("a", "[]a]", option);
g_regexTester.should_not_match("a]", "[]a]", option);
g_regexTester.should_not_match("]a", "[]a]", option);

g_regexTester.should_throw("[]", error_brack, option);
}
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -1208,6 +1225,7 @@ int main() {
test_gh_5192();
test_gh_5214();
test_gh_5253();
test_gh_5364();

return g_regexTester.result();
}