re PR libstdc++/71500 (regex::icase only works on first character in a range)
authorTim Shen <timshen@google.com>
Thu, 1 Dec 2016 03:03:55 +0000 (03:03 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Thu, 1 Dec 2016 03:03:55 +0000 (03:03 +0000)
PR libstdc++/71500
* include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript
when the syntax is not specified.
* include/bits/regex_compiler.h (_RegexTranslator,
_RegexTranslatorBase): Partially support icase in ranges.
* include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply):
Refactor _M_apply to make the control flow easier to follow, and
call _M_translator._M_match_range as added previously.
* testsuite/28_regex/traits/char/icase.cc: Add new tests.
* testsuite/28_regex/traits/char/user_defined.cc: Add new tests.

From-SVN: r243093

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex.h
libstdc++-v3/include/bits/regex_compiler.h
libstdc++-v3/include/bits/regex_compiler.tcc
libstdc++-v3/testsuite/28_regex/traits/char/icase.cc [new file with mode: 0644]
libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc

index 6d3bfcc542b10cf5b89e6a0ffe8dc4dacb531593..cd4f5aeecdbeb559a19a196791a5c910106bbecd 100644 (file)
@@ -1,3 +1,16 @@
+2016-11-31  Tim Shen  <timshen@google.com>
+
+       PR libstdc++/71500
+       * include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript
+       when the syntax is not specified.
+       * include/bits/regex_compiler.h (_RegexTranslator,
+       _RegexTranslatorBase): Partially support icase in ranges.
+       * include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply):
+       Refactor _M_apply to make the control flow easier to follow, and
+       call _M_translator._M_match_range as added previously.
+       * testsuite/28_regex/traits/char/icase.cc: Add new tests.
+       * testsuite/28_regex/traits/char/user_defined.cc: Add new tests.
+
 2016-11-30  Ville Voutilainen  <ville.voutilainen@gmail.com>
 
        Fix testsuite failures caused by the patch implementing LWG 2534.
index aadf312cfc0c5f03b95f5a4c72dbc06d7a048a1a..224d3dbb68f71440c57b9b9d0872cbb329718b88 100644 (file)
@@ -762,7 +762,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       template<typename _FwdIter>
        basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc,
                    flag_type __f)
-       : _M_flags(__f), _M_loc(std::move(__loc)),
+       : _M_flags((__f & (ECMAScript | basic | extended | awk | grep | egrep))
+                  ? __f : (__f | ECMAScript)),
+       _M_loc(std::move(__loc)),
        _M_automaton(__detail::__compile_nfa<_FwdIter, _Rx_traits>(
          std::move(__first), std::move(__last), _M_loc, _M_flags))
        { }
index 410d61b7da1e13b82a3b1b1262b3e9360da3c141..318cb3bd6a5e42f64a66a3e1a700a8ab30cc339f 100644 (file)
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+_GLIBCXX_BEGIN_NAMESPACE_CXX11
+
+  template<typename>
+    class regex_traits;
+
+_GLIBCXX_END_NAMESPACE_CXX11
+_GLIBCXX_END_NAMESPACE_VERSION
+
 namespace __detail
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -207,17 +216,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // [28.13.14]
   template<typename _TraitsT, bool __icase, bool __collate>
-    class _RegexTranslator
+    class _RegexTranslatorBase
     {
     public:
       typedef typename _TraitsT::char_type           _CharT;
       typedef typename _TraitsT::string_type         _StringT;
-      typedef typename std::conditional<__collate,
-                                       _StringT,
-                                       _CharT>::type _StrTransT;
+      typedef _StringT _StrTransT;
 
       explicit
-      _RegexTranslator(const _TraitsT& __traits)
+      _RegexTranslatorBase(const _TraitsT& __traits)
       : _M_traits(__traits)
       { }
 
@@ -235,23 +242,86 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StrTransT
       _M_transform(_CharT __ch) const
       {
-       return _M_transform_impl(__ch, typename integral_constant<bool,
-                                __collate>::type());
+       _StrTransT __str(1, __ch);
+       return _M_traits.transform(__str.begin(), __str.end());
       }
 
-    private:
+      // See LWG 523. It's not efficiently implementable when _TraitsT is not
+      // std::regex_traits<>, and __collate is true. See specializations for
+      // implementations of other cases.
+      bool
+      _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
+                    const _StrTransT& __s) const
+      { return __first <= __s && __s <= __last; }
+
+    protected:
+      bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const
+      {
+       typedef std::ctype<_CharT> __ctype_type;
+       const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc());
+       auto __lower = __fctyp.tolower(__ch);
+       auto __upper = __fctyp.toupper(__ch);
+       return (__first <= __lower && __lower <= __last)
+         || (__first <= __upper && __upper <= __last);
+      }
+
+      const _TraitsT& _M_traits;
+    };
+
+  template<typename _TraitsT, bool __icase, bool __collate>
+    class _RegexTranslator
+    : public _RegexTranslatorBase<_TraitsT, __icase, __collate>
+    {
+    public:
+      typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base;
+      using _Base::_Base;
+    };
+
+  template<typename _TraitsT, bool __icase>
+    class _RegexTranslator<_TraitsT, __icase, false>
+    : public _RegexTranslatorBase<_TraitsT, __icase, false>
+    {
+    public:
+      typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base;
+      typedef typename _Base::_CharT _CharT;
+      typedef _CharT _StrTransT;
+
+      using _Base::_Base;
+
       _StrTransT
-      _M_transform_impl(_CharT __ch, false_type) const
+      _M_transform(_CharT __ch) const
       { return __ch; }
 
-      _StrTransT
-      _M_transform_impl(_CharT __ch, true_type) const
+      bool
+      _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
       {
-       _StrTransT __str = _StrTransT(1, _M_translate(__ch));
-       return _M_traits.transform(__str.begin(), __str.end());
+       if (!__icase)
+         return __first <= __ch && __ch <= __last;
+       return this->_M_in_range_icase(__first, __last, __ch);
       }
+    };
 
-      const _TraitsT& _M_traits;
+  template<typename _CharType>
+    class _RegexTranslator<std::regex_traits<_CharType>, true, true>
+    : public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
+    {
+    public:
+      typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
+       _Base;
+      typedef typename _Base::_CharT _CharT;
+      typedef typename _Base::_StrTransT _StrTransT;
+
+      using _Base::_Base;
+
+      bool
+      _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
+                    const _StrTransT& __str) const
+      {
+       __glibcxx_assert(__first.size() == 1);
+       __glibcxx_assert(__last.size() == 1);
+       __glibcxx_assert(__str.size() == 1);
+       return this->_M_in_range_icase(__first[0], __last[0], __str[0]);
+      }
     };
 
   template<typename _TraitsT>
@@ -272,6 +342,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StrTransT
       _M_transform(_CharT __ch) const
       { return __ch; }
+
+      bool
+      _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
+      { return __first <= __ch && __ch <= __last; }
     };
 
   template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
index ef6ebdd2ca065117ed781391f12f7e626832e7b7..07e56d50c50775f604ba93702699f2533a8b3f67 100644 (file)
@@ -612,37 +612,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _BracketMatcher<_TraitsT, __icase, __collate>::
     _M_apply(_CharT __ch, false_type) const
     {
-      bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(),
-                                     _M_translator._M_translate(__ch));
-      if (!__ret)
-       {
-         auto __s = _M_translator._M_transform(__ch);
-         for (auto& __it : _M_range_set)
-           if (__it.first <= __s && __s <= __it.second)
-             {
-               __ret = true;
-               break;
-             }
-         if (_M_traits.isctype(__ch, _M_class_set))
-           __ret = true;
-         else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
-                            _M_traits.transform_primary(&__ch, &__ch+1))
-                  != _M_equiv_set.end())
-           __ret = true;
-         else
-           {
-             for (auto& __it : _M_neg_class_set)
-               if (!_M_traits.isctype(__ch, __it))
-                 {
-                   __ret = true;
-                   break;
-                 }
-           }
-       }
-      if (_M_is_non_matching)
-       return !__ret;
-      else
-       return __ret;
+      return [this, __ch]
+      {
+       if (std::binary_search(_M_char_set.begin(), _M_char_set.end(),
+                              _M_translator._M_translate(__ch)))
+         return true;
+       auto __s = _M_translator._M_transform(__ch);
+       for (auto& __it : _M_range_set)
+         if (_M_translator._M_match_range(__it.first, __it.second, __s))
+           return true;
+       if (_M_traits.isctype(__ch, _M_class_set))
+         return true;
+       if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
+                     _M_traits.transform_primary(&__ch, &__ch+1))
+           != _M_equiv_set.end())
+         return true;
+       for (auto& __it : _M_neg_class_set)
+         if (!_M_traits.isctype(__ch, __it))
+           return true;
+       return false;
+      }() ^ _M_is_non_matching;
     }
 
 _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/icase.cc b/libstdc++-v3/testsuite/28_regex/traits/char/icase.cc
new file mode 100644 (file)
index 0000000..97bbd07
--- /dev/null
@@ -0,0 +1,74 @@
+// { dg-do run { target c++11 } }
+
+//
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.7 Class template regex_traits [re.traits]
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  {
+    regex re("[T-f]", regex::icase);
+
+    VERIFY(regex_match("A", re));
+    VERIFY(regex_match("F", re));
+    VERIFY(regex_match("a", re));
+    VERIFY(regex_match("f", re));
+
+    VERIFY(!regex_match("G", re));
+    VERIFY(!regex_match("S", re));
+    VERIFY(!regex_match("g", re));
+    VERIFY(!regex_match("s", re));
+
+    VERIFY(regex_match("T", re));
+    VERIFY(regex_match("Z", re));
+    VERIFY(regex_match("t", re));
+    VERIFY(regex_match("z", re));
+  }
+  // icase works with std::regex_traits<>, because we know how it's implemented.
+  {
+    regex re("[T-f]", regex::icase | regex::collate);
+
+    VERIFY(regex_match("A", re));
+    VERIFY(regex_match("F", re));
+    VERIFY(regex_match("a", re));
+    VERIFY(regex_match("f", re));
+
+    VERIFY(!regex_match("G", re));
+    VERIFY(!regex_match("S", re));
+    VERIFY(!regex_match("g", re));
+    VERIFY(!regex_match("s", re));
+
+    VERIFY(regex_match("T", re));
+    VERIFY(regex_match("Z", re));
+    VERIFY(regex_match("t", re));
+    VERIFY(regex_match("z", re));
+  }
+}
+
+int main()
+{
+  test01();
+  return 0;
+}
index 4af05634b2391fa7fd62b11a6c6b2384d94473c9..5888ce1b4a04330f69aeac58060d968ddd10c643 100644 (file)
@@ -30,6 +30,9 @@
 
 using namespace std;
 
+bool called_transform = false;
+bool called_nocase = false;
+
 template<typename CharT>
   class MyRegexTraits
   : public regex_traits<CharT>
@@ -40,14 +43,71 @@ template<typename CharT>
     {
       return c+1;
     }
+
+    CharT
+    translate_nocase(CharT c) const
+    {
+      called_nocase = true;
+      return regex_traits<CharT>::translate_nocase(c);
+    }
+
+    template<typename FwdIt>
+      basic_string<CharT>
+      transform(FwdIt begin, FwdIt end) const
+      {
+       called_transform = true;
+       return regex_traits<CharT>::transform(begin, end);
+      }
   };
 
 void
 test01()
 {
-  basic_regex<char, MyRegexTraits<char>> re(".");
-  VERIFY(!regex_match("\n", re));
-  VERIFY(!regex_match("\r", re));
+  {
+    basic_regex<char, MyRegexTraits<char>> re(".");
+    VERIFY(!regex_match("\n", re));
+    VERIFY(!regex_match("\r", re));
+  }
+  {
+    VERIFY(!called_transform);
+    basic_regex<char, MyRegexTraits<char>> re("[a]", regex::collate);
+    VERIFY(regex_match("a", re));
+    VERIFY(exchange(called_transform, false));
+  }
+  {
+    VERIFY(!called_nocase);
+    basic_regex<char, MyRegexTraits<char>> re("[a]", regex::icase);
+    VERIFY(regex_match("A", re));
+    VERIFY(exchange(called_nocase, false));
+  }
+  {
+    basic_regex<char, MyRegexTraits<char>> re("[T-f]", regex::icase);
+    VERIFY(regex_match("A", re));
+    VERIFY(regex_match("F", re));
+    VERIFY(regex_match("a", re));
+    VERIFY(regex_match("f", re));
+
+    VERIFY(!regex_match("G", re));
+    VERIFY(!regex_match("S", re));
+    VERIFY(!regex_match("g", re));
+    VERIFY(!regex_match("s", re));
+
+    VERIFY(regex_match("T", re));
+    VERIFY(regex_match("Z", re));
+    VERIFY(regex_match("t", re));
+    VERIFY(regex_match("z", re));
+  }
+  // icase doesn't participate with the presence of collate and user-defined traits.
+  {
+    basic_regex<char, MyRegexTraits<char>> re("[T-f]", regex::icase | regex::collate);
+    VERIFY(!regex_match("A", re));
+    VERIFY(!regex_match("S", re));
+    VERIFY(regex_match("T", re));
+    VERIFY(regex_match("Z", re));
+    VERIFY(regex_match("a", re));
+    VERIFY(regex_match("f", re));
+    VERIFY(!regex_match("g", re));
+  }
 }
 
 int main()