From 974afa584b18a3713eb4ada1b5d657ca0f94f498 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Thu, 1 Dec 2016 03:03:55 +0000 Subject: [PATCH] re PR libstdc++/71500 (regex::icase only works on first character in a range) PR libstdc++/71500 * include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript when the syntax is not specified. * include/bits/regex_compiler.h (_RegexTranslator, _RegexTranslatorBase): Partially support icase in ranges. * include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply): Refactor _M_apply to make the control flow easier to follow, and call _M_translator._M_match_range as added previously. * testsuite/28_regex/traits/char/icase.cc: Add new tests. * testsuite/28_regex/traits/char/user_defined.cc: Add new tests. From-SVN: r243093 --- libstdc++-v3/ChangeLog | 13 +++ libstdc++-v3/include/bits/regex.h | 4 +- libstdc++-v3/include/bits/regex_compiler.h | 102 +++++++++++++++--- libstdc++-v3/include/bits/regex_compiler.tcc | 51 ++++----- .../testsuite/28_regex/traits/char/icase.cc | 74 +++++++++++++ .../28_regex/traits/char/user_defined.cc | 66 +++++++++++- 6 files changed, 261 insertions(+), 49 deletions(-) create mode 100644 libstdc++-v3/testsuite/28_regex/traits/char/icase.cc diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 6d3bfcc542b..cd4f5aeecdb 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,16 @@ +2016-11-31 Tim Shen + + PR libstdc++/71500 + * include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript + when the syntax is not specified. + * include/bits/regex_compiler.h (_RegexTranslator, + _RegexTranslatorBase): Partially support icase in ranges. + * include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply): + Refactor _M_apply to make the control flow easier to follow, and + call _M_translator._M_match_range as added previously. + * testsuite/28_regex/traits/char/icase.cc: Add new tests. + * testsuite/28_regex/traits/char/user_defined.cc: Add new tests. + 2016-11-30 Ville Voutilainen Fix testsuite failures caused by the patch implementing LWG 2534. diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index aadf312cfc0..224d3dbb68f 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -762,7 +762,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 template basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc, flag_type __f) - : _M_flags(__f), _M_loc(std::move(__loc)), + : _M_flags((__f & (ECMAScript | basic | extended | awk | grep | egrep)) + ? __f : (__f | ECMAScript)), + _M_loc(std::move(__loc)), _M_automaton(__detail::__compile_nfa<_FwdIter, _Rx_traits>( std::move(__first), std::move(__last), _M_loc, _M_flags)) { } diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 410d61b7da1..318cb3bd6a5 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -30,6 +30,15 @@ namespace std _GLIBCXX_VISIBILITY(default) { +_GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_BEGIN_NAMESPACE_CXX11 + + template + class regex_traits; + +_GLIBCXX_END_NAMESPACE_CXX11 +_GLIBCXX_END_NAMESPACE_VERSION + namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION @@ -207,17 +216,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // [28.13.14] template - class _RegexTranslator + class _RegexTranslatorBase { public: typedef typename _TraitsT::char_type _CharT; typedef typename _TraitsT::string_type _StringT; - typedef typename std::conditional<__collate, - _StringT, - _CharT>::type _StrTransT; + typedef _StringT _StrTransT; explicit - _RegexTranslator(const _TraitsT& __traits) + _RegexTranslatorBase(const _TraitsT& __traits) : _M_traits(__traits) { } @@ -235,23 +242,86 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StrTransT _M_transform(_CharT __ch) const { - return _M_transform_impl(__ch, typename integral_constant::type()); + _StrTransT __str(1, __ch); + return _M_traits.transform(__str.begin(), __str.end()); } - private: + // See LWG 523. It's not efficiently implementable when _TraitsT is not + // std::regex_traits<>, and __collate is true. See specializations for + // implementations of other cases. + bool + _M_match_range(const _StrTransT& __first, const _StrTransT& __last, + const _StrTransT& __s) const + { return __first <= __s && __s <= __last; } + + protected: + bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const + { + typedef std::ctype<_CharT> __ctype_type; + const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc()); + auto __lower = __fctyp.tolower(__ch); + auto __upper = __fctyp.toupper(__ch); + return (__first <= __lower && __lower <= __last) + || (__first <= __upper && __upper <= __last); + } + + const _TraitsT& _M_traits; + }; + + template + class _RegexTranslator + : public _RegexTranslatorBase<_TraitsT, __icase, __collate> + { + public: + typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base; + using _Base::_Base; + }; + + template + class _RegexTranslator<_TraitsT, __icase, false> + : public _RegexTranslatorBase<_TraitsT, __icase, false> + { + public: + typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base; + typedef typename _Base::_CharT _CharT; + typedef _CharT _StrTransT; + + using _Base::_Base; + _StrTransT - _M_transform_impl(_CharT __ch, false_type) const + _M_transform(_CharT __ch) const { return __ch; } - _StrTransT - _M_transform_impl(_CharT __ch, true_type) const + bool + _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const { - _StrTransT __str = _StrTransT(1, _M_translate(__ch)); - return _M_traits.transform(__str.begin(), __str.end()); + if (!__icase) + return __first <= __ch && __ch <= __last; + return this->_M_in_range_icase(__first, __last, __ch); } + }; - const _TraitsT& _M_traits; + template + class _RegexTranslator, true, true> + : public _RegexTranslatorBase, true, true> + { + public: + typedef _RegexTranslatorBase, true, true> + _Base; + typedef typename _Base::_CharT _CharT; + typedef typename _Base::_StrTransT _StrTransT; + + using _Base::_Base; + + bool + _M_match_range(const _StrTransT& __first, const _StrTransT& __last, + const _StrTransT& __str) const + { + __glibcxx_assert(__first.size() == 1); + __glibcxx_assert(__last.size() == 1); + __glibcxx_assert(__str.size() == 1); + return this->_M_in_range_icase(__first[0], __last[0], __str[0]); + } }; template @@ -272,6 +342,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StrTransT _M_transform(_CharT __ch) const { return __ch; } + + bool + _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const + { return __first <= __ch && __ch <= __last; } }; template diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index ef6ebdd2ca0..07e56d50c50 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -612,37 +612,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _BracketMatcher<_TraitsT, __icase, __collate>:: _M_apply(_CharT __ch, false_type) const { - bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(), - _M_translator._M_translate(__ch)); - if (!__ret) - { - auto __s = _M_translator._M_transform(__ch); - for (auto& __it : _M_range_set) - if (__it.first <= __s && __s <= __it.second) - { - __ret = true; - break; - } - if (_M_traits.isctype(__ch, _M_class_set)) - __ret = true; - else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(), - _M_traits.transform_primary(&__ch, &__ch+1)) - != _M_equiv_set.end()) - __ret = true; - else - { - for (auto& __it : _M_neg_class_set) - if (!_M_traits.isctype(__ch, __it)) - { - __ret = true; - break; - } - } - } - if (_M_is_non_matching) - return !__ret; - else - return __ret; + return [this, __ch] + { + if (std::binary_search(_M_char_set.begin(), _M_char_set.end(), + _M_translator._M_translate(__ch))) + return true; + auto __s = _M_translator._M_transform(__ch); + for (auto& __it : _M_range_set) + if (_M_translator._M_match_range(__it.first, __it.second, __s)) + return true; + if (_M_traits.isctype(__ch, _M_class_set)) + return true; + if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(), + _M_traits.transform_primary(&__ch, &__ch+1)) + != _M_equiv_set.end()) + return true; + for (auto& __it : _M_neg_class_set) + if (!_M_traits.isctype(__ch, __it)) + return true; + return false; + }() ^ _M_is_non_matching; } _GLIBCXX_END_NAMESPACE_VERSION diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/icase.cc b/libstdc++-v3/testsuite/28_regex/traits/char/icase.cc new file mode 100644 index 00000000000..97bbd079f51 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/traits/char/icase.cc @@ -0,0 +1,74 @@ +// { dg-do run { target c++11 } } + +// +// Copyright (C) 2016 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// 28.7 Class template regex_traits [re.traits] + +#include +#include + +using namespace std; + +void +test01() +{ + { + regex re("[T-f]", regex::icase); + + VERIFY(regex_match("A", re)); + VERIFY(regex_match("F", re)); + VERIFY(regex_match("a", re)); + VERIFY(regex_match("f", re)); + + VERIFY(!regex_match("G", re)); + VERIFY(!regex_match("S", re)); + VERIFY(!regex_match("g", re)); + VERIFY(!regex_match("s", re)); + + VERIFY(regex_match("T", re)); + VERIFY(regex_match("Z", re)); + VERIFY(regex_match("t", re)); + VERIFY(regex_match("z", re)); + } + // icase works with std::regex_traits<>, because we know how it's implemented. + { + regex re("[T-f]", regex::icase | regex::collate); + + VERIFY(regex_match("A", re)); + VERIFY(regex_match("F", re)); + VERIFY(regex_match("a", re)); + VERIFY(regex_match("f", re)); + + VERIFY(!regex_match("G", re)); + VERIFY(!regex_match("S", re)); + VERIFY(!regex_match("g", re)); + VERIFY(!regex_match("s", re)); + + VERIFY(regex_match("T", re)); + VERIFY(regex_match("Z", re)); + VERIFY(regex_match("t", re)); + VERIFY(regex_match("z", re)); + } +} + +int main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc b/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc index 4af05634b23..5888ce1b4a0 100644 --- a/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc +++ b/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc @@ -30,6 +30,9 @@ using namespace std; +bool called_transform = false; +bool called_nocase = false; + template class MyRegexTraits : public regex_traits @@ -40,14 +43,71 @@ template { return c+1; } + + CharT + translate_nocase(CharT c) const + { + called_nocase = true; + return regex_traits::translate_nocase(c); + } + + template + basic_string + transform(FwdIt begin, FwdIt end) const + { + called_transform = true; + return regex_traits::transform(begin, end); + } }; void test01() { - basic_regex> re("."); - VERIFY(!regex_match("\n", re)); - VERIFY(!regex_match("\r", re)); + { + basic_regex> re("."); + VERIFY(!regex_match("\n", re)); + VERIFY(!regex_match("\r", re)); + } + { + VERIFY(!called_transform); + basic_regex> re("[a]", regex::collate); + VERIFY(regex_match("a", re)); + VERIFY(exchange(called_transform, false)); + } + { + VERIFY(!called_nocase); + basic_regex> re("[a]", regex::icase); + VERIFY(regex_match("A", re)); + VERIFY(exchange(called_nocase, false)); + } + { + basic_regex> re("[T-f]", regex::icase); + VERIFY(regex_match("A", re)); + VERIFY(regex_match("F", re)); + VERIFY(regex_match("a", re)); + VERIFY(regex_match("f", re)); + + VERIFY(!regex_match("G", re)); + VERIFY(!regex_match("S", re)); + VERIFY(!regex_match("g", re)); + VERIFY(!regex_match("s", re)); + + VERIFY(regex_match("T", re)); + VERIFY(regex_match("Z", re)); + VERIFY(regex_match("t", re)); + VERIFY(regex_match("z", re)); + } + // icase doesn't participate with the presence of collate and user-defined traits. + { + basic_regex> re("[T-f]", regex::icase | regex::collate); + VERIFY(!regex_match("A", re)); + VERIFY(!regex_match("S", re)); + VERIFY(regex_match("T", re)); + VERIFY(regex_match("Z", re)); + VERIFY(regex_match("a", re)); + VERIFY(regex_match("f", re)); + VERIFY(!regex_match("g", re)); + } } int main() -- 2.30.2