+2014-11-13 Tim Shen <timshen@google.com>
+
+ PR libstdc++/63775
+ * include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
+ _BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range
+ like [z-a]. Change _M_expression_term interface.
+ * include/bits/regex_compiler.tcc (
+ _Compiler<>::_M_insert_bracket_matcher,
+ _Compiler<>::_M_expression_term): Rewrite bracket expression parsing.
+ * testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
+ Add testcases and move file out of extended.
+
2014-11-12 Jonathan Wakely <jwakely@redhat.com>
PR libstdc++/57250
template<bool __icase, bool __collate>
void
- _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
+ _M_expression_term(pair<bool, _CharT>& __last_char,
+ _BracketMatcher<_TraitsT, __icase, __collate>&
__matcher);
int
void
_M_make_range(_CharT __l, _CharT __r)
{
+ if (__l > __r)
+ __throw_regex_error(regex_constants::error_range);
_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
_M_translator._M_transform(__r)));
#ifdef _GLIBCXX_DEBUG
_M_insert_bracket_matcher(bool __neg)
{
_BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
+ pair<bool, _CharT> __last_char; // Optional<_CharT>
+ __last_char.first = false;
+ if (!(_M_flags & regex_constants::ECMAScript))
+ if (_M_try_char())
+ {
+ __matcher._M_add_char(_M_value[0]);
+ __last_char.first = true;
+ __last_char.second = _M_value[0];
+ }
while (!_M_match_token(_ScannerT::_S_token_bracket_end))
- _M_expression_term(__matcher);
+ _M_expression_term(__last_char, __matcher);
__matcher._M_ready();
_M_stack.push(_StateSeqT(
*_M_nfa,
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
- _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
+ _M_expression_term(pair<bool, _CharT>& __last_char,
+ _BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_collsymbol))
__matcher._M_add_collating_element(_M_value);
__matcher._M_add_equivalence_class(_M_value);
else if (_M_match_token(_ScannerT::_S_token_char_class_name))
__matcher._M_add_character_class(_M_value, false);
- else if (_M_try_char()) // [a
+ // POSIX doesn't permit '-' as a start-range char (say [a-z--0]),
+ // except when the '-' is the first character in the bracket expression
+ // ([--0]). ECMAScript treats all '-' after a range as a normal character.
+ // Also see above, where _M_expression_term gets called.
+ //
+ // As a result, POSIX rejects [-----], but ECMAScript doesn't.
+ // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
+ // Clang (3.5) always uses ECMAScript style even in its POSIX syntax.
+ //
+ // It turns out that no one reads BNFs ;)
+ else if (_M_try_char())
{
- auto __ch = _M_value[0];
- if (_M_try_char())
+ if (!__last_char.first)
+ {
+ if (_M_value[0] == '-'
+ && !(_M_flags & regex_constants::ECMAScript))
+ __throw_regex_error(regex_constants::error_range);
+ __matcher._M_add_char(_M_value[0]);
+ __last_char.first = true;
+ __last_char.second = _M_value[0];
+ }
+ else
{
- if (_M_value[0] == '-') // [a-
+ if (_M_value[0] == '-')
{
- if (_M_try_char()) // [a-z]
+ if (_M_try_char())
{
- __matcher._M_make_range(__ch, _M_value[0]);
- return;
+ __matcher._M_make_range(__last_char.second , _M_value[0]);
+ __last_char.first = false;
+ }
+ else
+ {
+ if (_M_scanner._M_get_token()
+ != _ScannerT::_S_token_bracket_end)
+ __throw_regex_error(regex_constants::error_range);
+ __matcher._M_add_char(_M_value[0]);
}
- // If the dash is the last character in the bracket
- // expression, it is not special.
- if (_M_scanner._M_get_token()
- != _ScannerT::_S_token_bracket_end)
- __throw_regex_error(regex_constants::error_range);
}
- __matcher._M_add_char(_M_value[0]);
+ else
+ {
+ __matcher._M_add_char(_M_value[0]);
+ __last_char.second = _M_value[0];
+ }
}
- __matcher._M_add_char(__ch);
}
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
__matcher._M_add_character_class(_M_value,
--- /dev/null
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-08-01 Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013-2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended bracket expression against a C-string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+#include <testsuite_regex.h>
+
+using namespace __gnu_test;
+using namespace std;
+
+void
+test01()
+{
+ bool test __attribute__((unused)) = true;
+
+ {
+ std::regex re("pre/[za-x]", std::regex::extended);
+ VERIFY( regex_match_debug("pre/z", re) );
+ VERIFY( regex_match_debug("pre/a", re) );
+ VERIFY( !regex_match_debug("pre/y", re) );
+ }
+ {
+ std::regex re("pre/[[:uPPer:]]", std::regex::extended);
+ VERIFY( regex_match_debug("pre/Z", re) );
+ VERIFY( !regex_match_debug("pre/_", re) );
+ VERIFY( !regex_match_debug("pre/a", re) );
+ VERIFY( !regex_match_debug("pre/0", re) );
+ }
+ {
+ std::regex re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase);
+ VERIFY( regex_match_debug("pre/Z", re) );
+ VERIFY( regex_match_debug("pre/a", re) );
+ }
+ {
+ std::regex re("pre/[[:w:][.tilde.]]", std::regex::extended);
+ VERIFY( regex_match_debug("pre/~", re) );
+ VERIFY( regex_match_debug("pre/_", re) );
+ VERIFY( regex_match_debug("pre/a", re) );
+ VERIFY( regex_match_debug("pre/0", re) );
+ }
+ {
+ std::regex re("pre/[[=a=]]", std::regex::extended);
+ VERIFY( regex_match_debug("pre/a", re) );
+ VERIFY( regex_match_debug("pre/A", re) );
+ }
+}
+
+void
+test02()
+{
+ bool test __attribute__((unused)) = true;
+
+ try
+ {
+ std::regex re("[-----]", std::regex::extended);
+ VERIFY(false);
+ }
+ catch (const std::regex_error& e)
+ {
+ VERIFY(e.code() == std::regex_constants::error_range);
+ }
+ std::regex re("[-----]", std::regex::ECMAScript);
+}
+
+void
+test03()
+{
+ bool test __attribute__((unused)) = true;
+
+ try
+ {
+ std::regex re("[z-a]", std::regex::extended);
+ VERIFY(false);
+ }
+ catch (const std::regex_error& e)
+ {
+ VERIFY(e.code() == std::regex_constants::error_range);
+ }
+}
+
+void
+test04()
+{
+ bool test __attribute__((unused)) = true;
+
+ std::regex re("[-0-9a-z]");
+ VERIFY(regex_match_debug("-", re));
+ VERIFY(regex_match_debug("1", re));
+ VERIFY(regex_match_debug("w", re));
+ re.assign("[-0-9a-z]", regex_constants::basic);
+ VERIFY(regex_match_debug("-", re));
+ VERIFY(regex_match_debug("1", re));
+ VERIFY(regex_match_debug("w", re));
+}
+
+int
+main()
+{
+ test01();
+ test02();
+ test03();
+ test04();
+ return 0;
+}
+++ /dev/null
-// { dg-options "-std=gnu++11" }
-
-//
-// 2013-08-01 Tim Shen <timshen91@gmail.com>
-//
-// Copyright (C) 2013-2014 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library. This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this library; see the file COPYING3. If not see
-// <http://www.gnu.org/licenses/>.
-
-// 28.11.2 regex_match
-// Tests Extended bracket expression against a C-string.
-
-#include <regex>
-#include <testsuite_hooks.h>
-#include <testsuite_regex.h>
-
-using namespace __gnu_test;
-using namespace std;
-
-void
-test01()
-{
- bool test __attribute__((unused)) = true;
-
- {
- std::regex re("pre/[za-x]", std::regex::extended);
- VERIFY( regex_match_debug("pre/z", re) );
- VERIFY( regex_match_debug("pre/a", re) );
- VERIFY( !regex_match_debug("pre/y", re) );
- }
- {
- std::regex re("pre/[[:uPPer:]]", std::regex::extended);
- VERIFY( regex_match_debug("pre/Z", re) );
- VERIFY( !regex_match_debug("pre/_", re) );
- VERIFY( !regex_match_debug("pre/a", re) );
- VERIFY( !regex_match_debug("pre/0", re) );
- }
- {
- std::regex re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase);
- VERIFY( regex_match_debug("pre/Z", re) );
- VERIFY( regex_match_debug("pre/a", re) );
- }
- {
- std::regex re("pre/[[:w:][.tilde.]]", std::regex::extended);
- VERIFY( regex_match_debug("pre/~", re) );
- VERIFY( regex_match_debug("pre/_", re) );
- VERIFY( regex_match_debug("pre/a", re) );
- VERIFY( regex_match_debug("pre/0", re) );
- }
- {
- std::regex re("pre/[[=a=]]", std::regex::extended);
- VERIFY( regex_match_debug("pre/a", re) );
- VERIFY( regex_match_debug("pre/A", re) );
- }
-}
-
-int
-main()
-{
- test01();
- return 0;
-}