From 2bde8cac3724cd02c8114275f5c4688f25558859 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Thu, 25 Sep 2014 04:43:19 +0000 Subject: [PATCH] re PR libstdc++/63199 (Inserting std::wregex to std::vector loses some std::wregex values) PR libstdc++/63199 * include/bits/regex.h (basic_regex::basic_regex, basic_regex::assign, basic_regex::imbue, basic_regex::getloc, basic_regex::swap): Add _M_loc for basic_regex. * include/bits/regex_automaton.h: Add _M_traits for _NFA. * include/bits/regex_compiler.h (_Compiler::_M_get_nfa, __compile_nfa): Make _Compiler::_M_nfa heap allocated. * include/bits/regex_compiler.tcc (_Compiler::_Compiler): Make _Compiler::_M_nfa heap allocated. * include/bits/regex_executor.h (_Executor::_M_is_word): Fix accessing _M_traits. * include/bits/regex_executor.tcc (_Executor::_M_dfs): Fix accessing _M_traits. * testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/63199.cc: New testcase. From-SVN: r215578 --- libstdc++-v3/ChangeLog | 18 +++ libstdc++-v3/include/bits/regex.h | 44 ++++---- libstdc++-v3/include/bits/regex_automaton.h | 6 +- libstdc++-v3/include/bits/regex_compiler.h | 22 ++-- libstdc++-v3/include/bits/regex_compiler.tcc | 105 +++++++++--------- libstdc++-v3/include/bits/regex_executor.h | 4 +- libstdc++-v3/include/bits/regex_executor.tcc | 10 +- .../regex_match/ecma/wchar_t/63199.cc | 69 ++++++++++++ 8 files changed, 187 insertions(+), 91 deletions(-) create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/63199.cc diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 34a3d003eb1..eb1348c3107 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,21 @@ +2014-09-25 Tim Shen + + PR libstdc++/63199 + * include/bits/regex.h (basic_regex::basic_regex, basic_regex::assign, + basic_regex::imbue, basic_regex::getloc, basic_regex::swap): Add + _M_loc for basic_regex. + * include/bits/regex_automaton.h: Add _M_traits for _NFA. + * include/bits/regex_compiler.h (_Compiler::_M_get_nfa, __compile_nfa): + Make _Compiler::_M_nfa heap allocated. + * include/bits/regex_compiler.tcc (_Compiler::_Compiler): Make + _Compiler::_M_nfa heap allocated. + * include/bits/regex_executor.h (_Executor::_M_is_word): + Fix accessing _M_traits. + * include/bits/regex_executor.tcc (_Executor::_M_dfs): + Fix accessing _M_traits. + * testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/63199.cc: + New testcase. + 2014-09-24 Jonathan Wakely * config/abi/pre/gnu.ver: Make GLIBCXX_3.4 patterns stricter so the diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index 52050890c9a..4ec20d74901 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -64,7 +64,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION inline std::shared_ptr<_NFA<_TraitsT>> __compile_nfa(const typename _TraitsT::char_type* __first, const typename _TraitsT::char_type* __last, - const _TraitsT& __traits, + const typename _TraitsT::locale_type& __loc, regex_constants::syntax_option_type __flags); _GLIBCXX_END_NAMESPACE_VERSION @@ -433,7 +433,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * character sequence. */ basic_regex() - : _M_flags(ECMAScript), _M_automaton(nullptr) + : _M_flags(ECMAScript), _M_loc(), _M_original_str(), _M_automaton(nullptr) { } /** @@ -481,10 +481,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * * @param __rhs A @p regex object. */ - basic_regex(const basic_regex&& __rhs) noexcept - : _M_flags(__rhs._M_flags), _M_traits(__rhs._M_traits), - _M_automaton(std::move(__rhs._M_automaton)) - { } + basic_regex(basic_regex&& __rhs) noexcept = default; /** * @brief Constructs a basic regular expression from the string @@ -520,12 +517,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION basic_regex(_FwdIter __first, _FwdIter __last, flag_type __f = ECMAScript) : _M_flags(__f), + _M_loc(), _M_original_str(__first, __last), - _M_automaton(__detail::__compile_nfa(_M_original_str.c_str(), - _M_original_str.c_str() - + _M_original_str.size(), - _M_traits, - _M_flags)) + _M_automaton(__detail::__compile_nfa<_Rx_traits>( + _M_original_str.c_str(), + _M_original_str.c_str() + _M_original_str.size(), + _M_loc, + _M_flags)) { } /** @@ -662,9 +660,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_flags = __flags; _M_original_str.assign(__s.begin(), __s.end()); auto __p = _M_original_str.c_str(); - _M_automaton = __detail::__compile_nfa(__p, - __p + _M_original_str.size(), - _M_traits, _M_flags); + _M_automaton = __detail::__compile_nfa<_Rx_traits>( + __p, + __p + _M_original_str.size(), + _M_loc, + _M_flags); return *this; } @@ -728,9 +728,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION locale_type imbue(locale_type __loc) { - auto __ret = _M_traits.imbue(__loc); - this->assign(_M_original_str, _M_flags); - return __ret; + std::swap(__loc, _M_loc); + if (_M_automaton != nullptr) + this->assign(_M_original_str, _M_flags); + return __loc; } /** @@ -739,7 +740,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ locale_type getloc() const - { return _M_traits.getloc(); } + { return _M_loc; } // [7.8.6] swap /** @@ -751,7 +752,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION swap(basic_regex& __rhs) { std::swap(_M_flags, __rhs._M_flags); - std::swap(_M_traits, __rhs._M_traits); + std::swap(_M_loc, __rhs._M_loc); + std::swap(_M_original_str, __rhs._M_original_str); std::swap(_M_automaton, __rhs._M_automaton); } @@ -761,7 +763,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { _M_automaton->_M_dot(__ostr); } #endif - protected: + private: typedef std::shared_ptr<__detail::_NFA<_Rx_traits>> _AutomatonPtr; template _M_original_str; _AutomatonPtr _M_automaton; }; diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h index 27ec671d86d..1395dab3476 100644 --- a/libstdc++-v3/include/bits/regex_automaton.h +++ b/libstdc++-v3/include/bits/regex_automaton.h @@ -160,7 +160,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef _State<_TraitsT> _StateT; typedef _Matcher _MatcherT; - using _NFA_base::_NFA_base; + _NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags) + : _NFA_base(__flags) + { _M_traits.imbue(__loc); } // for performance reasons _NFA objects should only be moved not copied _NFA(const _NFA&) = delete; @@ -272,6 +274,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::ostream& _M_dot(std::ostream& __ostr) const; #endif + public: + _TraitsT _M_traits; }; /// Describes a sequence of one or more %_State, its current start diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 1193a5aea5a..1bbc09dd9dc 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -57,11 +57,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef regex_constants::syntax_option_type _FlagT; _Compiler(_IterT __b, _IterT __e, - const _TraitsT& __traits, _FlagT __flags); + const typename _TraitsT::locale_type& __traits, _FlagT __flags); std::shared_ptr<_RegexT> _M_get_nfa() - { return make_shared<_RegexT>(std::move(_M_nfa)); } + { return std::move(_M_nfa); } private: typedef _Scanner<_CharT> _ScannerT; @@ -135,24 +135,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return ret; } - _FlagT _M_flags; - const _TraitsT& _M_traits; - const _CtypeT& _M_ctype; - _ScannerT _M_scanner; - _RegexT _M_nfa; - _StringT _M_value; - _StackT _M_stack; + _FlagT _M_flags; + _ScannerT _M_scanner; + shared_ptr<_RegexT> _M_nfa; + _StringT _M_value; + _StackT _M_stack; + const _TraitsT& _M_traits; + const _CtypeT& _M_ctype; }; template inline std::shared_ptr<_NFA<_TraitsT>> __compile_nfa(const typename _TraitsT::char_type* __first, const typename _TraitsT::char_type* __last, - const _TraitsT& __traits, + const typename _TraitsT::locale_type& __loc, regex_constants::syntax_option_type __flags) { using _Cmplr = _Compiler<_TraitsT>; - return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa(); + return _Cmplr(__first, __last, __loc, __flags)._M_get_nfa(); } // [28.13.14] diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index f15f7dd0f7b..349d92a1200 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -62,7 +62,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template _Compiler<_TraitsT>:: _Compiler(_IterT __b, _IterT __e, - const _TraitsT& __traits, _FlagT __flags) + const typename _TraitsT::locale_type& __loc, _FlagT __flags) : _M_flags((__flags & (regex_constants::ECMAScript | regex_constants::basic @@ -72,21 +72,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | regex_constants::awk)) ? __flags : __flags | regex_constants::ECMAScript), - _M_traits(__traits), - _M_ctype(std::use_facet<_CtypeT>(_M_traits.getloc())), - _M_scanner(__b, __e, _M_flags, _M_traits.getloc()), - _M_nfa(_M_flags) + _M_scanner(__b, __e, _M_flags, __loc), + _M_nfa(make_shared<_RegexT>(__loc, _M_flags)), + _M_traits(_M_nfa->_M_traits), + _M_ctype(std::use_facet<_CtypeT>(__loc)) { - _StateSeqT __r(_M_nfa, _M_nfa._M_start()); - __r._M_append(_M_nfa._M_insert_subexpr_begin()); + _StateSeqT __r(*_M_nfa, _M_nfa->_M_start()); + __r._M_append(_M_nfa->_M_insert_subexpr_begin()); this->_M_disjunction(); if (!_M_match_token(_ScannerT::_S_token_eof)) __throw_regex_error(regex_constants::error_paren); __r._M_append(_M_pop()); _GLIBCXX_DEBUG_ASSERT(_M_stack.empty()); - __r._M_append(_M_nfa._M_insert_subexpr_end()); - __r._M_append(_M_nfa._M_insert_accept()); - _M_nfa._M_eliminate_dummy(); + __r._M_append(_M_nfa->_M_insert_subexpr_end()); + __r._M_append(_M_nfa->_M_insert_accept()); + _M_nfa->_M_eliminate_dummy(); } template @@ -100,15 +100,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StateSeqT __alt1 = _M_pop(); this->_M_alternative(); _StateSeqT __alt2 = _M_pop(); - auto __end = _M_nfa._M_insert_dummy(); + auto __end = _M_nfa->_M_insert_dummy(); __alt1._M_append(__end); __alt2._M_append(__end); // __alt2 is state._M_next, __alt1 is state._M_alt. The executor // executes _M_alt before _M_next, as well as executing left // alternative before right one. - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_alt(__alt2._M_start, - __alt1._M_start, false), + _M_stack.push(_StateSeqT(*_M_nfa, + _M_nfa->_M_insert_alt( + __alt2._M_start, __alt1._M_start, false), __end)); } } @@ -126,7 +126,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_stack.push(__re); } else - _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy())); + _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_dummy())); } template @@ -150,12 +150,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_assertion() { if (_M_match_token(_ScannerT::_S_token_line_begin)) - _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_line_begin())); + _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_line_begin())); else if (_M_match_token(_ScannerT::_S_token_line_end)) - _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_line_end())); + _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_line_end())); else if (_M_match_token(_ScannerT::_S_token_word_bound)) // _M_value[0] == 'n' means it's negative, say "not word boundary". - _M_stack.push(_StateSeqT(_M_nfa, _M_nfa. + _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa-> _M_insert_word_bound(_M_value[0] == 'n'))); else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin)) { @@ -164,11 +164,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) __throw_regex_error(regex_constants::error_paren); auto __tmp = _M_pop(); - __tmp._M_append(_M_nfa._M_insert_accept()); + __tmp._M_append(_M_nfa->_M_insert_accept()); _M_stack.push( _StateSeqT( - _M_nfa, - _M_nfa._M_insert_lookahead(__tmp._M_start, __neg))); + *_M_nfa, + _M_nfa->_M_insert_lookahead(__tmp._M_start, __neg))); } else return false; @@ -191,8 +191,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __init(); auto __e = _M_pop(); - _StateSeqT __r(_M_nfa, _M_nfa._M_insert_repeat(_S_invalid_state_id, - __e._M_start, __neg)); + _StateSeqT __r(*_M_nfa, + _M_nfa->_M_insert_repeat(_S_invalid_state_id, + __e._M_start, __neg)); __e._M_append(__r); _M_stack.push(__r); } @@ -200,17 +201,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __init(); auto __e = _M_pop(); - __e._M_append(_M_nfa._M_insert_repeat(_S_invalid_state_id, - __e._M_start, __neg)); + __e._M_append(_M_nfa->_M_insert_repeat(_S_invalid_state_id, + __e._M_start, __neg)); _M_stack.push(__e); } else if (_M_match_token(_ScannerT::_S_token_opt)) { __init(); auto __e = _M_pop(); - auto __end = _M_nfa._M_insert_dummy(); - _StateSeqT __r(_M_nfa, _M_nfa._M_insert_repeat(_S_invalid_state_id, - __e._M_start, __neg)); + auto __end = _M_nfa->_M_insert_dummy(); + _StateSeqT __r(*_M_nfa, + _M_nfa->_M_insert_repeat(_S_invalid_state_id, + __e._M_start, __neg)); __e._M_append(__end); __r._M_append(__end); _M_stack.push(__r); @@ -222,7 +224,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (!_M_match_token(_ScannerT::_S_token_dup_count)) __throw_regex_error(regex_constants::error_badbrace); _StateSeqT __r(_M_pop()); - _StateSeqT __e(_M_nfa, _M_nfa._M_insert_dummy()); + _StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy()); long __min_rep = _M_cur_int_value(10); bool __infi = false; long __n; @@ -246,9 +248,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__infi) { auto __tmp = __r._M_clone(); - _StateSeqT __s(_M_nfa, - _M_nfa._M_insert_repeat(_S_invalid_state_id, - __tmp._M_start, __neg)); + _StateSeqT __s(*_M_nfa, + _M_nfa->_M_insert_repeat(_S_invalid_state_id, + __tmp._M_start, __neg)); __tmp._M_append(__s); __e._M_append(__s); } @@ -256,7 +258,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { if (__n < 0) __throw_regex_error(regex_constants::error_badbrace); - auto __end = _M_nfa._M_insert_dummy(); + auto __end = _M_nfa->_M_insert_dummy(); // _M_alt is the "match more" branch, and _M_next is the // "match less" one. Switch _M_alt and _M_next of all created // nodes. This is a hack but IMO works well. @@ -264,15 +266,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (long __i = 0; __i < __n; ++__i) { auto __tmp = __r._M_clone(); - auto __alt = _M_nfa._M_insert_repeat(__tmp._M_start, - __end, __neg); + auto __alt = _M_nfa->_M_insert_repeat(__tmp._M_start, + __end, __neg); __stack.push(__alt); - __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end)); + __e._M_append(_StateSeqT(*_M_nfa, __alt, __tmp._M_end)); } __e._M_append(__end); while (!__stack.empty()) { - auto& __tmp = _M_nfa[__stack.top()]; + auto& __tmp = (*_M_nfa)[__stack.top()]; __stack.pop(); swap(__tmp._M_next, __tmp._M_alt); } @@ -313,13 +315,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION else if (_M_try_char()) __INSERT_REGEX_MATCHER(_M_insert_char_matcher); else if (_M_match_token(_ScannerT::_S_token_backref)) - _M_stack.push(_StateSeqT(_M_nfa, _M_nfa. + _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa-> _M_insert_backref(_M_cur_int_value(10)))); else if (_M_match_token(_ScannerT::_S_token_quoted_class)) __INSERT_REGEX_MATCHER(_M_insert_character_class_matcher); else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin)) { - _StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy()); + _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy()); this->_M_disjunction(); if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) __throw_regex_error(regex_constants::error_paren); @@ -328,12 +330,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } else if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) { - _StateSeqT __r(_M_nfa, _M_nfa._M_insert_subexpr_begin()); + _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin()); this->_M_disjunction(); if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) __throw_regex_error(regex_constants::error_paren); __r._M_append(_M_pop()); - __r._M_append(_M_nfa._M_insert_subexpr_end()); + __r._M_append(_M_nfa->_M_insert_subexpr_end()); _M_stack.push(__r); } else if (!_M_bracket_expression()) @@ -361,8 +363,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Compiler<_TraitsT>:: _M_insert_any_matcher_ecma() { - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher + _M_stack.push(_StateSeqT(*_M_nfa, + _M_nfa->_M_insert_matcher (_AnyMatcher<_TraitsT, true, __icase, __collate> (_M_traits)))); } @@ -373,8 +375,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Compiler<_TraitsT>:: _M_insert_any_matcher_posix() { - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher + _M_stack.push(_StateSeqT(*_M_nfa, + _M_nfa->_M_insert_matcher (_AnyMatcher<_TraitsT, false, __icase, __collate> (_M_traits)))); } @@ -385,8 +387,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Compiler<_TraitsT>:: _M_insert_char_matcher() { - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher + _M_stack.push(_StateSeqT(*_M_nfa, + _M_nfa->_M_insert_matcher (_CharMatcher<_TraitsT, __icase, __collate> (_M_value[0], _M_traits)))); } @@ -402,8 +404,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits); __matcher._M_add_character_class(_M_value, false); __matcher._M_ready(); - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher(std::move(__matcher)))); + _M_stack.push(_StateSeqT(*_M_nfa, + _M_nfa->_M_insert_matcher(std::move(__matcher)))); } template @@ -416,8 +418,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION while (!_M_match_token(_ScannerT::_S_token_bracket_end)) _M_expression_term(__matcher); __matcher._M_ready(); - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher(std::move(__matcher)))); + _M_stack.push(_StateSeqT( + *_M_nfa, + _M_nfa->_M_insert_matcher(std::move(__matcher)))); } template diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index 130bc742b21..cd9e55d9689 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -125,8 +125,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_is_word(_CharT __ch) const { static const _CharT __s[2] = { 'w' }; - return _M_re._M_traits.isctype - (__ch, _M_re._M_traits.lookup_classname(__s, __s+1)); + return _M_re._M_automaton->_M_traits.isctype + (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1)); } bool diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 3ca7de3d7fd..5eab85265a5 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -312,9 +312,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __last != _M_end && __tmp != __submatch.second; ++__tmp) ++__last; - if (_M_re._M_traits.transform(__submatch.first, - __submatch.second) - == _M_re._M_traits.transform(_M_current, __last)) + if (_M_re._M_automaton->_M_traits.transform(__submatch.first, + __submatch.second) + == _M_re._M_automaton->_M_traits.transform(_M_current, __last)) { if (__last != _M_current) { @@ -380,8 +380,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION case _S_opcode_alternative: if (_M_nfa._M_flags & regex_constants::ECMAScript) { - // TODO: Let DFS support ECMAScript's alternative operation. - _GLIBCXX_DEBUG_ASSERT(!__dfs_mode); + // TODO: Let BFS support ECMAScript's alternative operation. + _GLIBCXX_DEBUG_ASSERT(__dfs_mode); _M_dfs(__match_mode, __state._M_alt); // Pick lhs if it matches. Only try rhs if it doesn't. if (!_M_has_sol) diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/63199.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/63199.cc new file mode 100644 index 00000000000..cbb23f7de53 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/63199.cc @@ -0,0 +1,69 @@ +// { dg-options "-std=gnu++11" } + +// +// Copyright (C) 2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +#include +#include +#include + +using namespace __gnu_test; +using namespace std; + +// libstdc++/63199 +void +test01() +{ + bool test __attribute__((unused)) = true; + + std::setlocale(LC_ALL, ""); + + std::wstring current_token(L"II."); + + std::vector regex_vector; + + for (int i = 0; i < 4; ++i) + { + std::regex_constants::syntax_option_type flag; + flag = std::regex_constants::ECMAScript | std::regex_constants::icase; + + std::wregex reg; + reg.imbue(std::locale("")); + reg.assign(L"^(M*(?:CM|DC{1,3}|D|CD|C{1,3}){0,1}(?:XC|LX{1,3}|L|XL|X{1,3}){0,1}(?:IX|VI{0,3}|IV|I{1,3}){0,1}\\.)$", flag); + + regex_vector.emplace_back(reg); + } + + for (auto cit = regex_vector.cbegin(); cit != regex_vector.cend(); ++cit) + { + std::wstring::const_iterator it1 = current_token.begin(); + std::wstring::const_iterator it2 = current_token.end(); + std::wsmatch current_token_match; + + regex_match_debug(it1, it2, current_token_match, *cit); + VERIFY(current_token_match[0] == current_token); + VERIFY(current_token_match[1] == current_token); + } +} + +int +main() +{ + test01(); + return 0; +} -- 2.30.2