From 4dae67e09042d48c5f330d540d5fdde5f51db656 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Tue, 20 May 2014 04:31:54 +0000 Subject: [PATCH] re PR libstdc++/61227 ([C++11] Regex [\w] does not work) 2014-05-20 Tim Shen PR libstdc++/61227 * include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_character_class): Add negative character class support. * include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply): Likewise. * testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc: Add more testcases. From-SVN: r210630 --- libstdc++-v3/ChangeLog | 11 +++++++++++ libstdc++-v3/include/bits/regex_compiler.h | 11 ++++++++--- libstdc++-v3/include/bits/regex_compiler.tcc | 17 +++++++++++++++-- .../regex_match/ecma/char/quoted_char.cc | 10 ++++++++++ 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 0b2bf8924df..247af1264e2 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,14 @@ +2014-05-20 Tim Shen + + PR libstdc++/61227 + * include/bits/regex_compiler.h + (_BracketMatcher<>::_M_add_character_class): Add negative character + class support. + * include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply): + Likewise. + * testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc: + Add more testcases. + 2014-05-19 Jonathan Wakely * python/libstdcxx/v6/printers.py: Use Python3 raise syntax. diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index d7e21624e37..52f7235c8db 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -369,15 +369,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif } + // __neg should be true for \D, \S and \W only. void - _M_add_character_class(const _StringT& __s) + _M_add_character_class(const _StringT& __s, bool __neg) { auto __mask = _M_traits.lookup_classname(__s.data(), __s.data() + __s.size(), __icase); if (__mask == 0) __throw_regex_error(regex_constants::error_ctype); - _M_class_set |= __mask; + if (!__neg) + _M_class_set |= __mask; + else + _M_neg_class_set.push_back(__mask); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -387,7 +391,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_make_range(_CharT __l, _CharT __r) { _M_range_set.push_back(make_pair(_M_translator._M_transform(__l), - _M_translator._M_transform(__r))); + _M_translator._M_transform(__r))); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -435,6 +439,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::vector<_CharT> _M_char_set; std::vector<_StringT> _M_equiv_set; std::vector> _M_range_set; + std::vector<_CharClassT> _M_neg_class_set; _CharClassT _M_class_set; _TransT _M_translator; const _TraitsT& _M_traits; diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index 3cf9e457ccd..472cf1ff49c 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -397,7 +397,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1); _BracketMatcher<_TraitsT, __icase, __collate> __matcher (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits); - __matcher._M_add_character_class(_M_value); + __matcher._M_add_character_class(_M_value, false); __matcher._M_ready(); _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_matcher(std::move(__matcher)))); @@ -428,7 +428,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) __matcher._M_add_equivalence_class(_M_value); else if (_M_match_token(_ScannerT::_S_token_char_class_name)) - __matcher._M_add_character_class(_M_value); + __matcher._M_add_character_class(_M_value, false); else if (_M_try_char()) // [a { auto __ch = _M_value[0]; @@ -451,6 +451,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } __matcher._M_add_char(__ch); } + else if (_M_match_token(_ScannerT::_S_token_quoted_class)) + __matcher._M_add_character_class(_M_value, + _M_ctype.is(_CtypeT::upper, + _M_value[0])); else __throw_regex_error(regex_constants::error_brack); } @@ -527,6 +531,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_traits.transform_primary(&__ch, &__ch+1)) != _M_equiv_set.end()) __ret = true; + else + { + for (auto& __it : _M_neg_class_set) + if (!_M_traits.isctype(__ch, __it)) + { + __ret = true; + break; + } + } } if (_M_is_non_matching) return !__ret; diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc index e7280acbdbd..86417323516 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc @@ -44,6 +44,16 @@ test01() VERIFY(regex_match_debug("_az", regex("\\w*"))); VERIFY(regex_match_debug("!@#$%", regex("\\W*"))); VERIFY(!regex_match_debug("_01234", regex("\\W*"))); + + VERIFY(regex_match_debug("01", regex("[\\d]*"))); + VERIFY(regex_match_debug("asdfjkl", regex("[\\D]*"))); + VERIFY(!regex_match_debug("asdfjkl0", regex("[\\D]*"))); + VERIFY(regex_match_debug("\r\t\v\f ", regex("[\\s]*"))); + VERIFY(regex_match_debug("asdfjkl", regex("[\\S]*"))); + VERIFY(!regex_match_debug("asdfjkl\r", regex("[\\S]*"))); + VERIFY(regex_match_debug("_az", regex("[\\w]*"))); + VERIFY(regex_match_debug("!@#$%", regex("[\\W]*"))); + VERIFY(!regex_match_debug("_01234", regex("[\\W]*"))); } int -- 2.30.2