re PR libstdc++/63775 ([C++11] Regex range with leading dash (-) not working)
authorTim Shen <timshen@google.com>
Thu, 13 Nov 2014 07:40:01 +0000 (07:40 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Thu, 13 Nov 2014 07:40:01 +0000 (07:40 +0000)
PR libstdc++/63775
* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
_BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range
like [z-a]. Change _M_expression_term interface.
* include/bits/regex_compiler.tcc (
_Compiler<>::_M_insert_bracket_matcher,
_Compiler<>::_M_expression_term): Rewrite bracket expression parsing.
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
Add testcases and move file out of extended.

From-SVN: r217461

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex_compiler.h
libstdc++-v3/include/bits/regex_compiler.tcc
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc [new file with mode: 0644]
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc [deleted file]

index d6339c9f6e9185b5c510c20abe4328daa5466d50..58563cd75b74ae03304a10564ac3d240df0cbfdb 100644 (file)
@@ -1,3 +1,15 @@
+2014-11-13  Tim Shen  <timshen@google.com>
+
+       PR libstdc++/63775
+       * include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
+       _BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range
+       like [z-a]. Change _M_expression_term interface.
+       * include/bits/regex_compiler.tcc (
+       _Compiler<>::_M_insert_bracket_matcher,
+       _Compiler<>::_M_expression_term): Rewrite bracket expression parsing.
+       * testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
+       Add testcases and move file out of extended.
+
 2014-11-12  Jonathan Wakely  <jwakely@redhat.com>
 
        PR libstdc++/57250
index 1bbc09dd9dcc109347a9ada817c3031b4813211e..d8880cc4794fab62df917fabdf8de07d746c07a2 100644 (file)
@@ -118,7 +118,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       template<bool __icase, bool __collate>
        void
-       _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
+       _M_expression_term(pair<bool, _CharT>& __last_char,
+                          _BracketMatcher<_TraitsT, __icase, __collate>&
                           __matcher);
 
       int
@@ -390,6 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_make_range(_CharT __l, _CharT __r)
       {
+       if (__l > __r)
+         __throw_regex_error(regex_constants::error_range);
        _M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
                                         _M_translator._M_transform(__r)));
 #ifdef _GLIBCXX_DEBUG
index 349d92a1200e8c572ba82b647032a967761fc871..f9598843751500ba35038c830c46da106d147481 100644 (file)
@@ -415,8 +415,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_insert_bracket_matcher(bool __neg)
     {
       _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
+      pair<bool, _CharT> __last_char; // Optional<_CharT>
+      __last_char.first = false;
+      if (!(_M_flags & regex_constants::ECMAScript))
+       if (_M_try_char())
+         {
+           __matcher._M_add_char(_M_value[0]);
+           __last_char.first = true;
+           __last_char.second = _M_value[0];
+         }
       while (!_M_match_token(_ScannerT::_S_token_bracket_end))
-       _M_expression_term(__matcher);
+       _M_expression_term(__last_char, __matcher);
       __matcher._M_ready();
       _M_stack.push(_StateSeqT(
                      *_M_nfa,
@@ -427,7 +436,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<bool __icase, bool __collate>
     void
     _Compiler<_TraitsT>::
-    _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
+    _M_expression_term(pair<bool, _CharT>& __last_char,
+                      _BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
     {
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
        __matcher._M_add_collating_element(_M_value);
@@ -435,27 +445,50 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        __matcher._M_add_equivalence_class(_M_value);
       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
        __matcher._M_add_character_class(_M_value, false);
-      else if (_M_try_char()) // [a
+      // POSIX doesn't permit '-' as a start-range char (say [a-z--0]),
+      // except when the '-' is the first character in the bracket expression
+      // ([--0]). ECMAScript treats all '-' after a range as a normal character.
+      // Also see above, where _M_expression_term gets called.
+      //
+      // As a result, POSIX rejects [-----], but ECMAScript doesn't.
+      // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
+      // Clang (3.5) always uses ECMAScript style even in its POSIX syntax.
+      //
+      // It turns out that no one reads BNFs ;)
+      else if (_M_try_char())
        {
-         auto __ch = _M_value[0];
-         if (_M_try_char())
+         if (!__last_char.first)
+           {
+             if (_M_value[0] == '-'
+                 && !(_M_flags & regex_constants::ECMAScript))
+               __throw_regex_error(regex_constants::error_range);
+             __matcher._M_add_char(_M_value[0]);
+             __last_char.first = true;
+             __last_char.second = _M_value[0];
+           }
+         else
            {
-             if (_M_value[0] == '-') // [a-
+             if (_M_value[0] == '-')
                {
-                 if (_M_try_char()) // [a-z]
+                 if (_M_try_char())
                    {
-                     __matcher._M_make_range(__ch, _M_value[0]);
-                     return;
+                     __matcher._M_make_range(__last_char.second , _M_value[0]);
+                     __last_char.first = false;
+                   }
+                 else
+                   {
+                     if (_M_scanner._M_get_token()
+                         != _ScannerT::_S_token_bracket_end)
+                       __throw_regex_error(regex_constants::error_range);
+                     __matcher._M_add_char(_M_value[0]);
                    }
-                 // If the dash is the last character in the bracket
-                 // expression, it is not special.
-                 if (_M_scanner._M_get_token()
-                     != _ScannerT::_S_token_bracket_end)
-                   __throw_regex_error(regex_constants::error_range);
                }
-             __matcher._M_add_char(_M_value[0]);
+             else
+               {
+                 __matcher._M_add_char(_M_value[0]);
+                 __last_char.second = _M_value[0];
+               }
            }
-         __matcher._M_add_char(__ch);
        }
       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
        __matcher._M_add_character_class(_M_value,
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc
new file mode 100644 (file)
index 0000000..e5cffc7
--- /dev/null
@@ -0,0 +1,126 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-08-01  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013-2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended bracket expression against a C-string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+#include <testsuite_regex.h>
+
+using namespace __gnu_test;
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  {
+    std::regex  re("pre/[za-x]", std::regex::extended);
+    VERIFY( regex_match_debug("pre/z", re) );
+    VERIFY( regex_match_debug("pre/a", re) );
+    VERIFY( !regex_match_debug("pre/y", re) );
+  }
+  {
+    std::regex  re("pre/[[:uPPer:]]", std::regex::extended);
+    VERIFY( regex_match_debug("pre/Z", re) );
+    VERIFY( !regex_match_debug("pre/_", re) );
+    VERIFY( !regex_match_debug("pre/a", re) );
+    VERIFY( !regex_match_debug("pre/0", re) );
+  }
+  {
+    std::regex  re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase);
+    VERIFY( regex_match_debug("pre/Z", re) );
+    VERIFY( regex_match_debug("pre/a", re) );
+  }
+  {
+    std::regex  re("pre/[[:w:][.tilde.]]", std::regex::extended);
+    VERIFY( regex_match_debug("pre/~", re) );
+    VERIFY( regex_match_debug("pre/_", re) );
+    VERIFY( regex_match_debug("pre/a", re) );
+    VERIFY( regex_match_debug("pre/0", re) );
+  }
+  {
+    std::regex  re("pre/[[=a=]]", std::regex::extended);
+    VERIFY( regex_match_debug("pre/a", re) );
+    VERIFY( regex_match_debug("pre/A", re) );
+  }
+}
+
+void
+test02()
+{
+  bool test __attribute__((unused)) = true;
+
+  try
+  {
+    std::regex re("[-----]", std::regex::extended);
+    VERIFY(false);
+  }
+  catch (const std::regex_error& e)
+  {
+    VERIFY(e.code() == std::regex_constants::error_range);
+  }
+  std::regex re("[-----]", std::regex::ECMAScript);
+}
+
+void
+test03()
+{
+  bool test __attribute__((unused)) = true;
+
+  try
+  {
+    std::regex re("[z-a]", std::regex::extended);
+    VERIFY(false);
+  }
+  catch (const std::regex_error& e)
+  {
+    VERIFY(e.code() == std::regex_constants::error_range);
+  }
+}
+
+void
+test04()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::regex re("[-0-9a-z]");
+  VERIFY(regex_match_debug("-", re));
+  VERIFY(regex_match_debug("1", re));
+  VERIFY(regex_match_debug("w", re));
+  re.assign("[-0-9a-z]", regex_constants::basic);
+  VERIFY(regex_match_debug("-", re));
+  VERIFY(regex_match_debug("1", re));
+  VERIFY(regex_match_debug("w", re));
+}
+
+int
+main()
+{
+  test01();
+  test02();
+  test03();
+  test04();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc
deleted file mode 100644 (file)
index ca2a5f5..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-// { dg-options "-std=gnu++11" }
-
-//
-// 2013-08-01  Tim Shen <timshen91@gmail.com>
-//
-// Copyright (C) 2013-2014 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library.  This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this library; see the file COPYING3.  If not see
-// <http://www.gnu.org/licenses/>.
-
-// 28.11.2 regex_match
-// Tests Extended bracket expression against a C-string.
-
-#include <regex>
-#include <testsuite_hooks.h>
-#include <testsuite_regex.h>
-
-using namespace __gnu_test;
-using namespace std;
-
-void
-test01()
-{
-  bool test __attribute__((unused)) = true;
-
-  {
-    std::regex  re("pre/[za-x]", std::regex::extended);
-    VERIFY( regex_match_debug("pre/z", re) );
-    VERIFY( regex_match_debug("pre/a", re) );
-    VERIFY( !regex_match_debug("pre/y", re) );
-  }
-  {
-    std::regex  re("pre/[[:uPPer:]]", std::regex::extended);
-    VERIFY( regex_match_debug("pre/Z", re) );
-    VERIFY( !regex_match_debug("pre/_", re) );
-    VERIFY( !regex_match_debug("pre/a", re) );
-    VERIFY( !regex_match_debug("pre/0", re) );
-  }
-  {
-    std::regex  re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase);
-    VERIFY( regex_match_debug("pre/Z", re) );
-    VERIFY( regex_match_debug("pre/a", re) );
-  }
-  {
-    std::regex  re("pre/[[:w:][.tilde.]]", std::regex::extended);
-    VERIFY( regex_match_debug("pre/~", re) );
-    VERIFY( regex_match_debug("pre/_", re) );
-    VERIFY( regex_match_debug("pre/a", re) );
-    VERIFY( regex_match_debug("pre/0", re) );
-  }
-  {
-    std::regex  re("pre/[[=a=]]", std::regex::extended);
-    VERIFY( regex_match_debug("pre/a", re) );
-    VERIFY( regex_match_debug("pre/A", re) );
-  }
-}
-
-int
-main()
-{
-  test01();
-  return 0;
-}