re PR libstdc++/67015 ("^[a-z0-9][a-z0-9-]*$", std::regex::extended is miscompiled)
authorTim Shen <timshen@google.com>
Wed, 29 Jul 2015 03:45:35 +0000 (03:45 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Wed, 29 Jul 2015 03:45:35 +0000 (03:45 +0000)
PR libstdc++/67015
* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
_BracketMatcher<>::_M_add_collating_element): Change signature
to make checking the and of bracket expression easier.
* include/bits/regex_compiler.tcc (_Compiler<>::_M_expression_term):
Treat '-' as a valid literal if it's at the end of bracket expression.
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
New testcases.

From-SVN: r226336

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex_compiler.h
libstdc++-v3/include/bits/regex_compiler.tcc
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc

index d7dbedd8143e4729274ca38e0312523086474e86..4447b3d9f4897802cc1845ee59cb50b68b6d125a 100644 (file)
@@ -1,3 +1,14 @@
+2015-07-29  Tim Shen  <timshen@google.com>
+
+       PR libstdc++/67015
+       * include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
+       _BracketMatcher<>::_M_add_collating_element): Change signature
+       to make checking the and of bracket expression easier.
+       * include/bits/regex_compiler.tcc (_Compiler<>::_M_expression_term):
+       Treat '-' as a valid literal if it's at the end of bracket expression.
+       * testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
+       New testcases.
+
 2015-07-24  Jonathan Wakely  <jwakely@redhat.com>
 
        * include/bits/atomic_futex.h [_GLIBCXX_HAVE_LINUX_FUTEX]
index 4472116227d09db0e1f6607a2b2afdfc46d6e872..0cb0c04b1ebe31e673321d4ccce2e78ce7401b6f 100644 (file)
@@ -116,8 +116,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        void
        _M_insert_bracket_matcher(bool __neg);
 
+      // Returns true if successfully matched one term and should continue.
+      // Returns false if the compiler should move on.
       template<bool __icase, bool __collate>
-       void
+       bool
        _M_expression_term(pair<bool, _CharT>& __last_char,
                           _BracketMatcher<_TraitsT, __icase, __collate>&
                           __matcher);
@@ -389,8 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
       }
 
-      void
-      _M_add_collating_element(const _StringT& __s)
+      _StringT
+      _M_add_collate_element(const _StringT& __s)
       {
        auto __st = _M_traits.lookup_collatename(__s.data(),
                                                 __s.data() + __s.size());
@@ -400,6 +402,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #ifdef _GLIBCXX_DEBUG
        _M_is_ready = false;
 #endif
+       return __st;
       }
 
       void
index 33d7118e024cfc51203876b9528a5dcee436a254..9a62311155526a4848720712004ecf82169cae9d 100644 (file)
@@ -424,8 +424,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
            __last_char.first = true;
            __last_char.second = _M_value[0];
          }
-      while (!_M_match_token(_ScannerT::_S_token_bracket_end))
-       _M_expression_term(__last_char, __matcher);
+      while (_M_expression_term(__last_char, __matcher));
       __matcher._M_ready();
       _M_stack.push(_StateSeqT(
                      *_M_nfa,
@@ -434,21 +433,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template<typename _TraitsT>
   template<bool __icase, bool __collate>
-    void
+    bool
     _Compiler<_TraitsT>::
     _M_expression_term(pair<bool, _CharT>& __last_char,
                       _BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
     {
+      if (_M_match_token(_ScannerT::_S_token_bracket_end))
+       return false;
+
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
-       __matcher._M_add_collating_element(_M_value);
+       {
+         auto __symbol = __matcher._M_add_collate_element(_M_value);
+         if (__symbol.size() == 1)
+           {
+             __last_char.first = true;
+             __last_char.second = __symbol[0];
+           }
+       }
       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
        __matcher._M_add_equivalence_class(_M_value);
       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
        __matcher._M_add_character_class(_M_value, false);
-      // POSIX doesn't permit '-' as a start-range char (say [a-z--0]),
-      // except when the '-' is the first character in the bracket expression
-      // ([--0]). ECMAScript treats all '-' after a range as a normal character.
-      // Also see above, where _M_expression_term gets called.
+      // POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
+      // except when the '-' is the first or last character in the bracket
+      // expression ([--0]). ECMAScript treats all '-' after a range as a
+      // normal character. Also see above, where _M_expression_term gets called.
       //
       // As a result, POSIX rejects [-----], but ECMAScript doesn't.
       // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
@@ -459,10 +468,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        {
          if (!__last_char.first)
            {
+             __matcher._M_add_char(_M_value[0]);
              if (_M_value[0] == '-'
                  && !(_M_flags & regex_constants::ECMAScript))
-               __throw_regex_error(regex_constants::error_range);
-             __matcher._M_add_char(_M_value[0]);
+               {
+                 if (_M_match_token(_ScannerT::_S_token_bracket_end))
+                   return false;
+                 __throw_regex_error(regex_constants::error_range);
+               }
              __last_char.first = true;
              __last_char.second = _M_value[0];
            }
@@ -496,6 +509,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                                     _M_value[0]));
       else
        __throw_regex_error(regex_constants::error_brack);
+
+      return true;
     }
 
   template<typename _TraitsT>
index f7653c6dc9d67afae4d12191624e40b45c6c1ce2..62131a0bcc956fbe80d89a014942b34c577f2f10 100644 (file)
@@ -82,6 +82,22 @@ test02()
     VERIFY(e.code() == std::regex_constants::error_range);
   }
   std::regex re("[-----]", std::regex::ECMAScript);
+
+  VERIFY(!regex_match("b", regex("[-ac]", regex_constants::extended)));
+  VERIFY(!regex_match("b", regex("[ac-]", regex_constants::extended)));
+  VERIFY(regex_match("b", regex("[^-ac]", regex_constants::extended)));
+  VERIFY(regex_match("b", regex("[^ac-]", regex_constants::extended)));
+  VERIFY(regex_match("&", regex("[%--]", regex_constants::extended)));
+  VERIFY(regex_match(".", regex("[--@]", regex_constants::extended)));
+  try
+  {
+    regex("[a--@]", regex_constants::extended);
+    VERIFY(false);
+  }
+  catch (const std::regex_error& e)
+  {
+  }
+  VERIFY(regex_match("].", regex("[][.hyphen.]-0]*", regex_constants::extended)));
 }
 
 void
@@ -115,6 +131,44 @@ test04()
   VERIFY(regex_match_debug("w", re));
 }
 
+// libstdc++/67015
+void
+test05()
+{
+  bool test __attribute__((unused)) = true;
+
+  regex lanana_namespace("^[a-z0-9]+$", regex::extended);
+  regex lsb_namespace("^_?([a-z0-9_.]+-, regex::extended)+[a-z0-9]+$");
+  regex debian_dpkg_conffile_cruft("dpkg-(old|dist|new|tmp, regex::extended)$");
+  regex debian_cron_namespace("^[a-z0-9][a-z0-9-]*$", regex::extended);
+  VERIFY(regex_match("test", debian_cron_namespace));
+  VERIFY(!regex_match("-a", debian_cron_namespace));
+  VERIFY(regex_match("a-", debian_cron_namespace));
+  regex debian_cron_namespace_ok("^[a-z0-9][-a-z0-9]*$", regex::extended);
+  VERIFY(regex_match("test", debian_cron_namespace_ok));
+  VERIFY(!regex_match("-a", debian_cron_namespace_ok));
+  VERIFY(regex_match("a-", debian_cron_namespace_ok));
+}
+
+// libstdc++/67015
+void
+test06()
+{
+  bool test __attribute__((unused)) = true;
+
+  regex lanana_namespace("^[a-z0-9]+$");
+  regex lsb_namespace("^_?([a-z0-9_.]+-)+[a-z0-9]+$");
+  regex debian_dpkg_conffile_cruft("dpkg-(old|dist|new|tmp)$");
+  regex debian_cron_namespace("^[a-z0-9][a-z0-9-]*$");
+  VERIFY(regex_match("test", debian_cron_namespace));
+  VERIFY(!regex_match("-a", debian_cron_namespace));
+  VERIFY(regex_match("a-", debian_cron_namespace));
+  regex debian_cron_namespace_ok("^[a-z0-9][-a-z0-9]*$");
+  VERIFY(regex_match("test", debian_cron_namespace_ok));
+  VERIFY(!regex_match("-a", debian_cron_namespace_ok));
+  VERIFY(regex_match("a-", debian_cron_namespace_ok));
+}
+
 int
 main()
 {
@@ -122,5 +176,8 @@ main()
   test02();
   test03();
   test04();
+  test05();
+  test06();
+
   return 0;
 }