From: Tim Shen Date: Wed, 7 Aug 2013 16:27:36 +0000 (+0000) Subject: Makefile.am: Adjust to new files. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6cb784b639c3de7505674b56f542c8b714d7df8a;p=gcc.git Makefile.am: Adjust to new files. 2013-08-07 Tim Shen * include/Makefile.am: Adjust to new files. * include/Makefile.in: Regenerate. * include/bits/regex.h: Adjust to new interfaces. * include/bits/regex_automaton.h: New. * include/bits/regex_automaton.tcc: New. * include/bits/regex_compiler.h: Adjust to new files. * include/bits/regex_compiler.tcc: New. * include/bits/regex_constants.h: Tail spaces. * include/bits/regex_error.h: Likewise. * include/bits/regex_executor.h: New. * include/bits/regex_executor.tcc: New. * include/std/regex: Adjust to new files. * testsuite/28_regex/algorithms/regex_match/extended/ string_dispatch_01.cc: Adjust to new interfaces. From-SVN: r201573 --- diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 8b207a82661..025b482bc46 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,20 @@ +2013-08-07 Tim Shen + + * include/Makefile.am: Adjust to new files. + * include/Makefile.in: Regenerate. + * include/bits/regex.h: Adjust to new interfaces. + * include/bits/regex_automaton.h: New. + * include/bits/regex_automaton.tcc: New. + * include/bits/regex_compiler.h: Adjust to new files. + * include/bits/regex_compiler.tcc: New. + * include/bits/regex_constants.h: Tail spaces. + * include/bits/regex_error.h: Likewise. + * include/bits/regex_executor.h: New. + * include/bits/regex_executor.tcc: New. + * include/std/regex: Adjust to new files. + * testsuite/28_regex/algorithms/regex_match/extended/ + string_dispatch_01.cc: Adjust to new interfaces. + 2013-08-07 Paolo Carlini * include/ext/atomicity.h: Add #pragma GCC system_header. diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am index d08864cf7b8..5971af3edc1 100644 --- a/libstdc++-v3/include/Makefile.am +++ b/libstdc++-v3/include/Makefile.am @@ -126,14 +126,14 @@ bits_headers = \ ${bits_srcdir}/random.tcc \ ${bits_srcdir}/range_access.h \ ${bits_srcdir}/regex.h \ - ${bits_srcdir}/regex_compiler.h \ ${bits_srcdir}/regex_constants.h \ - ${bits_srcdir}/regex_cursor.h \ ${bits_srcdir}/regex_error.h \ - ${bits_srcdir}/regex_grep_matcher.h \ - ${bits_srcdir}/regex_grep_matcher.tcc \ - ${bits_srcdir}/regex_nfa.h \ - ${bits_srcdir}/regex_nfa.tcc \ + ${bits_srcdir}/regex_automaton.h \ + ${bits_srcdir}/regex_automaton.tcc \ + ${bits_srcdir}/regex_compiler.h \ + ${bits_srcdir}/regex_compiler.tcc \ + ${bits_srcdir}/regex_executor.h \ + ${bits_srcdir}/regex_executor.tcc \ ${bits_srcdir}/stream_iterator.h \ ${bits_srcdir}/streambuf_iterator.h \ ${bits_srcdir}/shared_ptr.h \ diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in index 95c5697f3fe..aa8ef43b224 100644 --- a/libstdc++-v3/include/Makefile.in +++ b/libstdc++-v3/include/Makefile.in @@ -393,14 +393,14 @@ bits_headers = \ ${bits_srcdir}/random.tcc \ ${bits_srcdir}/range_access.h \ ${bits_srcdir}/regex.h \ - ${bits_srcdir}/regex_compiler.h \ ${bits_srcdir}/regex_constants.h \ - ${bits_srcdir}/regex_cursor.h \ ${bits_srcdir}/regex_error.h \ - ${bits_srcdir}/regex_grep_matcher.h \ - ${bits_srcdir}/regex_grep_matcher.tcc \ - ${bits_srcdir}/regex_nfa.h \ - ${bits_srcdir}/regex_nfa.tcc \ + ${bits_srcdir}/regex_automaton.h \ + ${bits_srcdir}/regex_automaton.tcc \ + ${bits_srcdir}/regex_compiler.h \ + ${bits_srcdir}/regex_compiler.tcc \ + ${bits_srcdir}/regex_executor.h \ + ${bits_srcdir}/regex_executor.tcc \ ${bits_srcdir}/stream_iterator.h \ ${bits_srcdir}/streambuf_iterator.h \ ${bits_srcdir}/shared_ptr.h \ diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index fd8f1344ff2..ea32ade3f8e 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -40,7 +40,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * @brief Class regex_traits. Describes aspects of a regular expression. * - * A regular expression traits class that satisfies the requirements of + * A regular expression traits class that satisfies the requirements of * section [28.7]. * * The class %regex is parameterized around a set of related types and @@ -61,7 +61,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _BaseType _M_base; unsigned char _M_extended; static constexpr unsigned char _S_under = 1 << 0; - // FIXME: _S_blank should be removed in the future, when locale's complete. + // FIXME: _S_blank should be removed in the future, + // when locale's complete. static constexpr unsigned char _S_blank = 1 << 1; static constexpr unsigned char _S_valid_mask = 0x3; @@ -128,7 +129,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @brief Constructs a default traits object. */ regex_traits() { } - + /** * @brief Gives the length of a C-style string starting at @p __p. * @@ -153,7 +154,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION char_type translate(char_type __c) const { return __c; } - + /** * @brief Translates a character into a case-insensitive equivalent. * @@ -165,12 +166,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ char_type translate_nocase(char_type __c) const - { + { typedef std::ctype __ctype_type; const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); - return __fctyp.tolower(__c); + return __fctyp.tolower(__c); } - + /** * @brief Gets a sort key for a character sequence. * @@ -209,7 +210,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * * Effects: if typeid(use_facet >) == * typeid(collate_byname<_Ch_type>) and the form of the sort key - * returned by collate_byname<_Ch_type>::transform(__first, __last) + * returned by collate_byname<_Ch_type>::transform(__first, __last) * is known and can be converted into a primary sort key * then returns that key, otherwise returns an empty string. * @@ -239,7 +240,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * * @param __first beginning of the collation element name. * @param __last one-past-the-end of the collation element name. - * + * * @returns a sequence of one or more characters that represents the * collating element consisting of the character sequence designated by * the iterator range [__first, __last). Returns an empty string if the @@ -312,13 +313,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __ch a character representing a digit. * @param __radix the radix if the numeric conversion (limited to 8, 10, * or 16). - * + * * @returns the value represented by the digit __ch in base radix if the * character __ch is a valid digit in base radix; otherwise returns -1. */ int value(_Ch_type __ch, int __radix) const; - + /** * @brief Imbues the regex_traits object with a copy of a new locale. * @@ -336,7 +337,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::swap(_M_locale, __loc); return __loc; } - + /** * @brief Gets a copy of the current locale in use by the regex_traits * object. @@ -344,7 +345,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION locale_type getloc() const { return _M_locale; } - + protected: locale_type _M_locale; }; @@ -579,7 +580,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__s == __it->first) { if (__icase - && ((__it->second & (ctype_base::lower | ctype_base::upper)) != 0)) + && ((__it->second + & (ctype_base::lower | ctype_base::upper)) != 0)) return ctype_base::alpha; return __it->second; } @@ -594,7 +596,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { typedef std::ctype __ctype_type; const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); - + return __fctyp.is(__f._M_base, __c) // [[:w:]] || ((__f._M_extended & _RegexMask::_S_under) @@ -662,9 +664,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * character sequence. */ basic_regex() - : _M_flags(ECMAScript), - _M_automaton(__detail::__compile(0, 0, - _M_traits, _M_flags)) + : _M_flags(ECMAScript), _M_automaton(nullptr) { } /** @@ -680,9 +680,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ explicit basic_regex(const _Ch_type* __p, flag_type __f = ECMAScript) - : _M_flags(__f), - _M_automaton(__detail::__compile(__p, __p + _Rx_traits::length(__p), - _M_traits, _M_flags)) + : basic_regex(__p, __p + _Rx_traits::length(__p), __f) { } /** @@ -697,9 +695,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * * @throws regex_error if @p __p is not a valid regular expression. */ - basic_regex(const _Ch_type* __p, std::size_t __len, flag_type __f) - : _M_flags(__f), - _M_automaton(__detail::__compile(__p, __p + __len, _M_traits, _M_flags)) + basic_regex(const _Ch_type* __p, + std::size_t __len, flag_type __f = ECMAScript) + : basic_regex(__p, __p + __len, __f) { } /** @@ -707,10 +705,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * * @param __rhs A @p regex object. */ - basic_regex(const basic_regex& __rhs) - : _M_flags(__rhs._M_flags), _M_traits(__rhs._M_traits), - _M_automaton(__rhs._M_automaton) - { } + basic_regex(const basic_regex& __rhs) = default; /** * @brief Move-constructs a basic regular expression. @@ -733,12 +728,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ template explicit - basic_regex(const std::basic_string<_Ch_type, _Ch_traits, + basic_regex(const std::basic_string<_Ch_type, _Ch_traits, _Ch_alloc>& __s, flag_type __f = ECMAScript) - : _M_flags(__f), - _M_automaton(__detail::__compile(__s.begin(), __s.end(), - _M_traits, _M_flags)) + : basic_regex(__s.begin(), __s.end(), __f) { } /** @@ -755,10 +748,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * expression. */ template - basic_regex(_InputIterator __first, _InputIterator __last, + basic_regex(_InputIterator __first, _InputIterator __last, flag_type __f = ECMAScript) : _M_flags(__f), - _M_automaton(__detail::__compile(__first, __last, _M_traits, _M_flags)) + _M_automaton(__detail::_Compiler<_InputIterator, _Ch_type, _Rx_traits> + (__first, __last, _M_traits, _M_flags)._M_get_nfa()) { } /** @@ -771,9 +765,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ basic_regex(initializer_list<_Ch_type> __l, flag_type __f = ECMAScript) - : _M_flags(__f), - _M_automaton(__detail::__compile(__l.begin(), __l.end(), - _M_traits, _M_flags)) + : basic_regex(__l.begin(), __l.end(), __f) { } /** @@ -781,7 +773,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ ~basic_regex() { } - + /** * @brief Assigns one regular expression to another. */ @@ -806,7 +798,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION basic_regex& operator=(const _Ch_type* __p) { return this->assign(__p, flags()); } - + /** * @brief Replaces a regular expression with a new one constructed from * a string. @@ -831,7 +823,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION this->swap(__tmp); return *this; } - + /** * @brief The move-assignment operator. * @@ -880,7 +872,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return this->assign(string_type(__p, __len), __flags); } /** - * @brief Assigns a new regular expression to a regex object from a + * @brief Assigns a new regular expression to a regex object from a * string containing a regular expression pattern. * * @param __s A string containing a regular expression pattern. @@ -894,7 +886,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION basic_regex& assign(const basic_string<_Ch_type, _Ch_typeraits, _Alloc>& __s, flag_type __flags = ECMAScript) - { + { basic_regex __tmp(__s, __flags); this->swap(__tmp); return *this; @@ -942,7 +934,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION unsigned int mark_count() const { return _M_automaton->_M_sub_count() - 1; } - + /** * @brief Gets the flags used to construct the regular expression * or in the last call to assign(). @@ -950,7 +942,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION flag_type flags() const { return _M_flags; } - + // [7.8.5] locale /** * @brief Imbues the regular expression object with the given locale. @@ -960,7 +952,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION locale_type imbue(locale_type __loc) { return _M_traits.imbue(__loc); } - + /** * @brief Gets the locale currently imbued in the regular expression * object. @@ -968,7 +960,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION locale_type getloc() const { return _M_traits.getloc(); } - + // [7.8.6] swap /** * @brief Swaps the contents of two regular expression objects. @@ -988,17 +980,40 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_dot(std::ostream& __ostr) { _M_automaton->_M_dot(__ostr); } #endif - - const __detail::_AutomatonPtr& - _M_get_automaton() const - { return _M_automaton; } protected: - flag_type _M_flags; - _Rx_traits _M_traits; - __detail::_AutomatonPtr _M_automaton; + typedef std::shared_ptr<__detail::_Automaton<_Ch_type, _Rx_traits>> + _AutomatonPtr; + + template + friend std::unique_ptr< + __detail::_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> + __detail::__get_executor(_BiIter, + _BiIter, + match_results<_BiIter, _Alloc>&, + const basic_regex<_CharT, _TraitsT>&, + regex_constants::match_flag_type); + + template + friend bool + regex_match(_B, _B, + match_results<_B, _A>&, + const basic_regex<_C, _R>&, + regex_constants::match_flag_type); + + template + friend bool + regex_search(_B, _B, + match_results<_B, _A>&, + const basic_regex<_C, _R>&, + regex_constants::match_flag_type); + + flag_type _M_flags; + _Rx_traits _M_traits; + _AutomatonPtr _M_automaton; }; - + /** @brief Standard regular expressions. */ typedef basic_regex regex; @@ -1046,7 +1061,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef std::basic_string string_type; bool matched; - + constexpr sub_match() : matched() { } /** @@ -1072,7 +1087,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ? string_type(this->first, this->second) : string_type(); } - + /** * @brief Gets the matching sequence as a string. * @@ -1085,7 +1100,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ? string_type(this->first, this->second) : string_type(); } - + /** * @brief Compares this and another matched sequence. * @@ -1111,7 +1126,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION int compare(const string_type& __s) const { return this->str().compare(__s); } - + /** * @brief Compares this sub_match to a C-style string. * @@ -1125,8 +1140,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION compare(const value_type* __s) const { return this->str().compare(__s); } }; - - + + /** @brief Standard regex submatch over a C-style null-terminated string. */ typedef sub_match csub_match; @@ -1142,7 +1157,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif // [7.9.2] sub_match non-member operators - + /** * @brief Tests the equivalence of two regular expression submatches. * @param __lhs First regular expression submatch. @@ -1760,7 +1775,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef std::basic_string string_type; //@} - + public: /** * @name 28.10.1 Construction, Copying, and Destruction @@ -1815,7 +1830,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ ~match_results() { } - + //@} // 28.10.2, state: @@ -1846,7 +1861,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION size_type __size = _Base_type::size(); return (__size && _Base_type::operator[](0).matched) ? __size - 2 : 0; } - + size_type max_size() const { return _Base_type::max_size(); } @@ -1859,7 +1874,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool empty() const { return size() == 0; } - + //@} /** @@ -1911,7 +1926,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION string_type str(size_type __sub = 0) const { return (*this)[__sub].str(); } - + /** * @brief Gets a %sub_match reference for the match or submatch. * @param __sub indicates the submatch. @@ -1925,7 +1940,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ const_reference operator[](size_type __sub) const - { + { _GLIBCXX_DEBUG_ASSERT( ready() ); return __sub < size() ? _Base_type::operator[](__sub) @@ -1972,7 +1987,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const_iterator begin() const { return _Base_type::begin(); } - + /** * @brief Gets an iterator to the start of the %sub_match collection. */ @@ -1986,7 +2001,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const_iterator end() const { return !empty() ? _Base_type::end() - 2 : _Base_type::end(); } - + /** * @brief Gets an iterator to one-past-the-end of the collection. */ @@ -2047,21 +2062,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ string_type format(const char_type* __fmt, - match_flag_type __flags = regex_constants::format_default) const + match_flag_type __flags = regex_constants::format_default) const { string_type __result; format(std::back_inserter(__result), + __fmt, __fmt + char_traits::length(__fmt), __flags); return __result; } - //@} + //@} /** * @name 10.5 Allocator */ - //@{ + //@{ /** * @brief Gets a copy of the allocator. @@ -2069,13 +2085,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION allocator_type get_allocator() const { return _Base_type::get_allocator(); } - - //@} + + //@} /** * @name 10.6 Swap */ - //@{ + //@{ /** * @brief Swaps the contents of two match_results. @@ -2083,12 +2099,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void swap(match_results& __that) { _Base_type::swap(__that); } - //@} - + //@} + private: - friend class __detail::_SpecializedResults<_Bi_iter, _Alloc>; + template + friend class __detail::_Executor; + + template + friend class __detail::_DFSExecutor; + + template + friend class __detail::_BFSExecutor; + + template + friend bool + regex_match(_B, _B, match_results<_B, _A>&, + const basic_regex<_Ch_type, + _Rx_traits>&, + regex_constants::match_flag_type); + + template + friend bool + regex_search(_B, _B, match_results<_B, _A>&, + const basic_regex<_Ch_type, + _Rx_traits>&, + regex_constants::match_flag_type); }; - + typedef match_results cmatch; typedef match_results smatch; #ifdef _GLIBCXX_USE_WCHAR_T @@ -2179,11 +2216,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION regex_constants::match_flag_type __flags = regex_constants::match_default) { - __detail::_AutomatonPtr __a = __re._M_get_automaton(); - __detail::_Automaton::_SizeT __sz = __a->_M_sub_count(); - __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e); - __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); - return __a->_M_get_matcher(__cs, __r, __a, __flags)->_M_match(); + if (__re._M_automaton == nullptr) + return false; + if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match()) + { + for (auto __it : __m) + if (!__it.matched) + __it.first = __it.second = __e; + __m.at(__m.size()).matched = false; + __m.at(__m.size()).first = __s; + __m.at(__m.size()).second = __s; + __m.at(__m.size()+1).matched = false; + __m.at(__m.size()+1).first = __e; + __m.at(__m.size()+1).second = __e; + return true; + } + return false; } /** @@ -2206,7 +2254,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const basic_regex<_Ch_type, _Rx_traits>& __re, regex_constants::match_flag_type __flags = regex_constants::match_default) - { + { match_results<_Bi_iter> __what; return regex_match(__first, __last, __what, __re, __flags); } @@ -2252,7 +2300,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename _Alloc, typename _Ch_type, typename _Rx_traits> inline bool regex_match(const basic_string<_Ch_type, _Ch_traits, _Ch_alloc>& __s, - match_results::const_iterator, _Alloc>& __m, const basic_regex<_Ch_type, _Rx_traits>& __re, regex_constants::match_flag_type __flags @@ -2327,29 +2375,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION regex_constants::match_flag_type __flags = regex_constants::match_default) { - __detail::_AutomatonPtr __a = __re._M_get_automaton(); - __detail::_Automaton::_SizeT __sz = __a->_M_sub_count(); - __detail::_SpecializedCursor<_Bi_iter> __cs(__first, __last); - __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); + if (__re._M_automaton == nullptr) + return false; for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo? - { - __detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last); - auto __matcher = __a->_M_get_matcher(__curs, __r, __a, __flags); - if (__matcher->_M_search_from_first()) - { - __r._M_set_range(__m.size(), - __detail::_SpecializedCursor<_Bi_iter> - {__first, __m[0].first}); - __r._M_set_range(__m.size()+1, - __detail::_SpecializedCursor<_Bi_iter> - {__m[0].second, __last}); - __r._M_set_matched(__m.size(), - __m.prefix().first != __m.prefix().second); - __r._M_set_matched(__m.size()+1, - __m.suffix().first != __m.suffix().second); - return true; - } - } + if (__detail::__get_executor(__cur, __last, __m, __re, __flags) + ->_M_search_from_first()) + { + for (auto __it : __m) + if (!__it.matched) + __it.first = __it.second = __last; + __m.at(__m.size()).first = __first; + __m.at(__m.size()).second = __m[0].first; + __m.at(__m.size()+1).first = __m[0].second; + __m.at(__m.size()+1).second = __last; + __m.at(__m.size()).matched = + (__m.prefix().first != __m.prefix().second); + __m.at(__m.size()+1).matched = + (__m.suffix().first != __m.suffix().second); + return true; + } return false; } @@ -2513,7 +2557,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // std [28.12] Class template regex_iterator /** - * An iterator adaptor that will provide repeated calls of regex_search over + * An iterator adaptor that will provide repeated calls of regex_search over * a range until no more matches remain. */ template() const { return &_M_match; } - + /** * @brief Increments a %regex_iterator. */ regex_iterator& operator++(); - + /** * @brief Postincrements a %regex_iterator. */ @@ -2604,7 +2648,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ++(*this); return __tmp; } - + private: _Bi_iter _M_begin; _Bi_iter _M_end; @@ -2665,7 +2709,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } return *this; } - + typedef regex_iterator cregex_iterator; typedef regex_iterator sregex_iterator; #ifdef _GLIBCXX_USE_WCHAR_T @@ -2693,11 +2737,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef const value_type* pointer; typedef const value_type& reference; typedef std::forward_iterator_tag iterator_category; - + public: /** * @brief Default constructs a %regex_token_iterator. - * + * * A default-constructed %regex_token_iterator is a singular iterator * that will compare equal to the one-past-the-end value for any * iterator of the same type. @@ -2705,7 +2749,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION regex_token_iterator() : _M_position(), _M_result(nullptr), _M_suffix(), _M_n(0), _M_subs() { } - + /** * Constructs a %regex_token_iterator... * @param __a [IN] The start of the text to search. @@ -2987,7 +3031,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** @brief Token iterator for standard wide-character strings. */ typedef regex_token_iterator wsregex_token_iterator; #endif - + //@} // group regex _GLIBCXX_END_NAMESPACE_VERSION } // namespace diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h new file mode 100644 index 00000000000..5817156aadf --- /dev/null +++ b/libstdc++-v3/include/bits/regex_automaton.h @@ -0,0 +1,274 @@ +// class template regex -*- C++ -*- + +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** + * @file bits/regex_automaton.h + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{regex} + */ + +namespace std _GLIBCXX_VISIBILITY(default) +{ +namespace __detail +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + + /** + * @defgroup regex-detail Base and Implementation Classes + * @ingroup regex + * @{ + */ + + typedef int _StateIdT; + typedef std::set<_StateIdT> _StateSet; + static const _StateIdT _S_invalid_state_id = -1; + + template + using _Matcher = std::function; + + /// Operation codes that define the type of transitions within the base NFA + /// that represents the regular expression. + enum _Opcode + { + _S_opcode_unknown = 0, + _S_opcode_alternative = 1, + _S_opcode_subexpr_begin = 4, + _S_opcode_subexpr_end = 5, + _S_opcode_match = 100, + _S_opcode_accept = 255 + }; + + template + class _State + { + public: + typedef int _OpcodeT; + typedef _Matcher<_CharT> _MatcherT; + + _OpcodeT _M_opcode; // type of outgoing transition + _StateIdT _M_next; // outgoing transition + _StateIdT _M_alt; // for _S_opcode_alternative + unsigned int _M_subexpr; // for _S_opcode_subexpr_* + _MatcherT _M_matches; // for _S_opcode_match + + explicit _State(_OpcodeT __opcode) + : _M_opcode(__opcode), _M_next(_S_invalid_state_id) + { } + + _State(const _MatcherT& __m) + : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), + _M_matches(__m) + { } + + _State(_OpcodeT __opcode, unsigned __index) + : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__index) + { } + + _State(_StateIdT __next, _StateIdT __alt) + : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt) + { } + +#ifdef _GLIBCXX_DEBUG + std::ostream& + _M_print(std::ostream& ostr) const; + + // Prints graphviz dot commands for state. + std::ostream& + _M_dot(std::ostream& __ostr, _StateIdT __id) const; +#endif + }; + + /// Base class for, um, automata. Could be an NFA or a DFA. Your choice. + template + class _Automaton + { + public: + typedef unsigned int _SizeT; + + public: + virtual _SizeT + _M_sub_count() const = 0; + +#ifdef _GLIBCXX_DEBUG + virtual std::ostream& + _M_dot(std::ostream& __ostr) const = 0; +#endif + }; + + template + class _NFA + : public _Automaton<_CharT, _TraitsT>, + public std::vector<_State<_CharT, _TraitsT>> + { + public: + typedef _State<_CharT, _TraitsT> _StateT; + typedef const _Matcher<_CharT>& _MatcherT; + typedef unsigned int _SizeT; + typedef regex_constants::syntax_option_type _FlagT; + + _NFA(_FlagT __f) + : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), + _M_has_backref(false) + { } + + _FlagT + _M_options() const + { return _M_flags; } + + _StateIdT + _M_start() const + { return _M_start_state; } + + const _StateSet& + _M_final_states() const + { return _M_accepting_states; } + + _SizeT + _M_sub_count() const + { return _M_subexpr_count; } + + _StateIdT + _M_insert_accept() + { + this->push_back(_StateT(_S_opcode_accept)); + _M_accepting_states.insert(this->size()-1); + return this->size()-1; + } + + _StateIdT + _M_insert_alt(_StateIdT __next, _StateIdT __alt) + { + this->push_back(_StateT(__next, __alt)); + return this->size()-1; + } + + _StateIdT + _M_insert_matcher(_MatcherT __m) + { + this->push_back(_StateT(__m)); + return this->size()-1; + } + + _StateIdT + _M_insert_subexpr_begin() + { + auto __id = _M_subexpr_count++; + _M_paren_stack.push(__id); + this->push_back(_StateT(_S_opcode_subexpr_begin, __id)); + return this->size()-1; + } + + _StateIdT + _M_insert_subexpr_end() + { + this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.top())); + _M_paren_stack.pop(); + return this->size()-1; + } + + void + _M_set_backref(bool __b) + { _M_has_backref = __b; } + +#ifdef _GLIBCXX_DEBUG + std::ostream& + _M_dot(std::ostream& __ostr) const; +#endif + + _FlagT _M_flags; + _StateIdT _M_start_state; + _StateSet _M_accepting_states; + _SizeT _M_subexpr_count; + bool _M_has_backref; + std::stack _M_paren_stack; + }; + + /// Describes a sequence of one or more %_State, its current start + /// and end(s). This structure contains fragments of an NFA during + /// construction. + template + class _StateSeq + { + public: + typedef _NFA<_CharT, _TraitsT> _RegexT; + public: + // Constructs a single-node sequence + _StateSeq(_RegexT& __ss, _StateIdT __s, + _StateIdT __e = _S_invalid_state_id) + : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e) + { } + // Constructs a split sequence from two other sequencces + _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2) + : _M_nfa(__e1._M_nfa), + _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)), + _M_end1(__e1._M_end1), _M_end2(__e2._M_end1) + { } + + // Constructs a split sequence from a single sequence + _StateSeq(const _StateSeq& __e, _StateIdT __id) + : _M_nfa(__e._M_nfa), + _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)), + _M_end1(__id), _M_end2(__e._M_end1) + { } + + // Constructs a copy of a %_StateSeq + _StateSeq(const _StateSeq& __rhs) + : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start), + _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2) + { } + + _StateSeq& operator=(const _StateSeq& __rhs); + + _StateIdT + _M_front() const + { return _M_start; } + + // Extends a sequence by one. + void + _M_push_back(_StateIdT __id); + + // Extends and maybe joins a sequence. + void + _M_append(_StateIdT __id); + + void + _M_append(_StateSeq& __rhs); + + // Clones an entire sequence. + _StateIdT + _M_clone(); + + private: + _RegexT& _M_nfa; + _StateIdT _M_start; + _StateIdT _M_end1; + _StateIdT _M_end2; + }; + + //@} regex-detail +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace __detail +} // namespace std + +#include diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc new file mode 100644 index 00000000000..cf9c8eb3147 --- /dev/null +++ b/libstdc++-v3/include/bits/regex_automaton.tcc @@ -0,0 +1,181 @@ +// class template regex -*- C++ -*- + +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** + * @file bits/regex_automaton.tcc + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{regex} + */ + +namespace std _GLIBCXX_VISIBILITY(default) +{ +namespace __detail +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + +#ifdef _GLIBCXX_DEBUG + template + std::ostream& _State<_CharT, _TraitsT>:: + _M_print(std::ostream& ostr) const + { + switch (_M_opcode) + { + case _S_opcode_alternative: + ostr << "alt next=" << _M_next << " alt=" << _M_alt; + break; + case _S_opcode_subexpr_begin: + ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr; + break; + case _S_opcode_subexpr_end: + ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr; + break; + case _S_opcode_match: + ostr << "match next=" << _M_next; + break; + case _S_opcode_accept: + ostr << "accept next=" << _M_next; + break; + default: + ostr << "unknown next=" << _M_next; + break; + } + return ostr; + } + + // Prints graphviz dot commands for state. + template + std::ostream& _State<_CharT, _TraitsT>:: + _M_dot(std::ostream& __ostr, _StateIdT __id) const + { + switch (_M_opcode) + { + case _S_opcode_alternative: + __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n" + << __id << " -> " << _M_next + << " [label=\"epsilon\", tailport=\"s\"];\n" + << __id << " -> " << _M_alt + << " [label=\"epsilon\", tailport=\"n\"];\n"; + break; + case _S_opcode_subexpr_begin: + __ostr << __id << " [label=\"" << __id << "\\nSBEGIN " + << _M_subexpr << "\"];\n" + << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; + break; + case _S_opcode_subexpr_end: + __ostr << __id << " [label=\"" << __id << "\\nSEND " + << _M_subexpr << "\"];\n" + << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; + break; + case _S_opcode_match: + __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n" + << __id << " -> " << _M_next << " [label=\"\"];\n"; + break; + case _S_opcode_accept: + __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ; + break; + default: + __ostr << __id << " [label=\"" << __id << "\\nUNK\"];\n" + << __id << " -> " << _M_next << " [label=\"?\"];\n"; + break; + } + return __ostr; + } + + template + std::ostream& _NFA<_CharT, _TraitsT>:: + _M_dot(std::ostream& __ostr) const + { + __ostr << "digraph _Nfa {\n" + << " rankdir=LR;\n"; + for (unsigned int __i = 0; __i < this->size(); ++__i) + { this->at(__i)._M_dot(__ostr, __i); } + __ostr << "}\n"; + return __ostr; + } +#endif + + template + _StateSeq<_CharT, _TraitsT>& _StateSeq<_CharT, _TraitsT>:: + operator=(const _StateSeq& __rhs) + { + _M_start = __rhs._M_start; + _M_end1 = __rhs._M_end1; + _M_end2 = __rhs._M_end2; + return *this; + } + + template + void _StateSeq<_CharT, _TraitsT>:: + _M_push_back(_StateIdT __id) + { + if (_M_end1 != _S_invalid_state_id) + _M_nfa[_M_end1]._M_next = __id; + _M_end1 = __id; + } + + template + void _StateSeq<_CharT, _TraitsT>:: + _M_append(_StateIdT __id) + { + if (_M_end2 != _S_invalid_state_id) + { + if (_M_end2 == _M_end1) + _M_nfa[_M_end2]._M_alt = __id; + else + _M_nfa[_M_end2]._M_next = __id; + _M_end2 = _S_invalid_state_id; + } + if (_M_end1 != _S_invalid_state_id) + _M_nfa[_M_end1]._M_next = __id; + _M_end1 = __id; + } + + template + void _StateSeq<_CharT, _TraitsT>:: + _M_append(_StateSeq& __rhs) + { + if (_M_end2 != _S_invalid_state_id) + { + if (_M_end2 == _M_end1) + _M_nfa[_M_end2]._M_alt = __rhs._M_start; + else + _M_nfa[_M_end2]._M_next = __rhs._M_start; + _M_end2 = _S_invalid_state_id; + } + if (__rhs._M_end2 != _S_invalid_state_id) + _M_end2 = __rhs._M_end2; + if (_M_end1 != _S_invalid_state_id) + _M_nfa[_M_end1]._M_next = __rhs._M_start; + _M_end1 = __rhs._M_end1; + } + + // @todo implement this function. + template + _StateIdT _StateSeq<_CharT, _TraitsT>:: + _M_clone() + { return 0; } + +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace __detail +} // namespace diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 2cc9da1a110..93147999528 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -39,16 +39,89 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @{ */ - /// Base class for scanner. - struct _Scanner_base - { - typedef unsigned int _StateT; + /// Matches a character range (bracket expression) + template + struct _BracketMatcher + { + typedef typename _TraitsT::char_class_type _CharClassT; + typedef typename _TraitsT::string_type _StringT; + typedef regex_constants::syntax_option_type _FlagT; + + explicit + _BracketMatcher(bool __is_non_matching, + const _TraitsT& __t, + _FlagT __flags) + : _M_is_non_matching(__is_non_matching), _M_traits(__t), + _M_flags(__flags), _M_class_set(0) + { } + + bool + operator()(_CharT) const; + + void + _M_add_char(_CharT __c) + { + if (_M_flags & regex_constants::collate) + if (_M_is_icase()) + _M_char_set.push_back(_M_traits.translate_nocase(__c)); + else + _M_char_set.push_back(_M_traits.translate(__c)); + else + _M_char_set.push_back(__c); + } + + void + _M_add_collating_element(const _StringT& __s) + { + auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end()); + if (__st.empty()) + __throw_regex_error(regex_constants::error_collate); + // TODO: digraph + _M_char_set.push_back(__st[0]); + } - static constexpr _StateT _S_state_in_brace = 1 << 0; - static constexpr _StateT _S_state_in_bracket = 1 << 1; + void + _M_add_equivalence_class(const _StringT& __s) + { + _M_add_character_class( + _M_traits.transform_primary(&*__s.begin(), &*__s.end())); + } - virtual ~_Scanner_base() { }; - }; + void + _M_add_character_class(const _StringT& __s) + { + auto __st = _M_traits. + lookup_classname(&*__s.begin(), &*__s.end(), _M_is_icase()); + if (__st == 0) + __throw_regex_error(regex_constants::error_ctype); + _M_class_set |= __st; + } + + void + _M_make_range(_CharT __l, _CharT __r) + { _M_range_set.push_back(make_pair(_M_get_str(__l), _M_get_str(__r))); } + + bool + _M_is_icase() const + { return _M_flags & regex_constants::icase; } + + _StringT + _M_get_str(_CharT __c) const + { + auto __s = _StringT(1, + _M_is_icase() + ? _M_traits.translate_nocase(__c) + : _M_traits.translate(__c)); + return _M_traits.transform(__s.begin(), __s.end()); + } + + _TraitsT _M_traits; + _FlagT _M_flags; + bool _M_is_non_matching; + std::vector<_CharT> _M_char_set; + std::vector> _M_range_set; + _CharClassT _M_class_set; + }; /** * @brief struct _Scanner. Scans an input range for regex tokens. @@ -60,12 +133,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * constructor: different regular expression grammars will interpret * the same input pattern in syntactically different ways. */ - template - class _Scanner: public _Scanner_base + template + class _Scanner { public: - typedef _InputIterator _IteratorT; - typedef typename std::iterator_traits<_IteratorT>::value_type _CharT; + typedef unsigned int _StateT; + typedef typename std::iterator_traits<_InputIter>::value_type _CharT; typedef std::basic_string<_CharT> _StringT; typedef regex_constants::syntax_option_type _FlagT; typedef const std::ctype<_CharT> _CtypeT; @@ -103,8 +176,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _S_token_unknown }; - _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags, - std::locale __loc) + _Scanner(_InputIter __begin, _InputIter __end, + _FlagT __flags, std::locale __loc) : _M_current(__begin) , _M_end(__end) , _M_flags(__flags), _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0) { _M_advance(); } @@ -144,8 +217,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_eat_collsymbol(); - _IteratorT _M_current; - _IteratorT _M_end; + static constexpr _StateT _S_state_in_brace = 1 << 0; + static constexpr _StateT _S_state_in_bracket = 1 << 1; + _InputIter _M_current; + _InputIter _M_end; _FlagT _M_flags; _CtypeT& _M_ctype; _TokenT _M_curToken; @@ -153,520 +228,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StateT _M_state; }; - template - void - _Scanner<_InputIterator>:: - _M_advance() - { - if (_M_current == _M_end) - { - _M_curToken = _S_token_eof; - return; - } - - _CharT __c = *_M_current; - if (_M_state & _S_state_in_bracket) - { - _M_scan_in_bracket(); - return; - } - if (_M_state & _S_state_in_brace) - { - _M_scan_in_brace(); - return; - } -#if 0 - // TODO: re-enable line anchors when _M_assertion is implemented. - // See PR libstdc++/47724 - else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^')) - { - _M_curToken = _S_token_line_begin; - ++_M_current; - return; - } - else if (__c == _M_ctype.widen('$')) - { - _M_curToken = _S_token_line_end; - ++_M_current; - return; - } -#endif - else if (__c == _M_ctype.widen('.')) - { - _M_curToken = _S_token_anychar; - ++_M_current; - return; - } - else if (__c == _M_ctype.widen('*')) - { - _M_curToken = _S_token_closure0; - ++_M_current; - return; - } - else if (__c == _M_ctype.widen('+')) - { - _M_curToken = _S_token_closure1; - ++_M_current; - return; - } - else if (__c == _M_ctype.widen('|')) - { - _M_curToken = _S_token_or; - ++_M_current; - return; - } - else if (__c == _M_ctype.widen('[')) - { - if (*++_M_current == _M_ctype.widen('^')) - { - _M_curToken = _S_token_bracket_inverse_begin; - ++_M_current; - } - else - _M_curToken = _S_token_bracket_begin; - _M_state |= _S_state_in_bracket; - return; - } - else if (__c == _M_ctype.widen('\\')) - { - _M_eat_escape(); - return; - } - else if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) - { - if (__c == _M_ctype.widen('(')) - { - _M_curToken = _S_token_subexpr_begin; - ++_M_current; - return; - } - else if (__c == _M_ctype.widen(')')) - { - _M_curToken = _S_token_subexpr_end; - ++_M_current; - return; - } - else if (__c == _M_ctype.widen('{')) - { - _M_curToken = _S_token_interval_begin; - _M_state |= _S_state_in_brace; - ++_M_current; - return; - } - } - - _M_curToken = _S_token_ord_char; - _M_curValue.assign(1, __c); - ++_M_current; - } - - - template - void - _Scanner<_InputIterator>:: - _M_scan_in_brace() - { - if (_M_ctype.is(_CtypeT::digit, *_M_current)) - { - _M_curToken = _S_token_dup_count; - _M_curValue.assign(1, *_M_current); - ++_M_current; - while (_M_current != _M_end - && _M_ctype.is(_CtypeT::digit, *_M_current)) - { - _M_curValue += *_M_current; - ++_M_current; - } - return; - } - else if (*_M_current == _M_ctype.widen(',')) - { - _M_curToken = _S_token_comma; - ++_M_current; - return; - } - if (_M_flags & (regex_constants::basic | regex_constants::grep)) - { - if (*_M_current == _M_ctype.widen('\\')) - _M_eat_escape(); - } - else - { - if (*_M_current == _M_ctype.widen('}')) - { - _M_curToken = _S_token_interval_end; - _M_state &= ~_S_state_in_brace; - ++_M_current; - return; - } - } - } - - template - void - _Scanner<_InputIterator>:: - _M_scan_in_bracket() - { - if (*_M_current == _M_ctype.widen('[')) - { - ++_M_current; - if (_M_current == _M_end) - { - _M_curToken = _S_token_eof; - return; - } - - if (*_M_current == _M_ctype.widen('.')) - { - _M_curToken = _S_token_collsymbol; - _M_eat_collsymbol(); - return; - } - else if (*_M_current == _M_ctype.widen(':')) - { - _M_curToken = _S_token_char_class_name; - _M_eat_charclass(); - return; - } - else if (*_M_current == _M_ctype.widen('=')) - { - _M_curToken = _S_token_equiv_class_name; - _M_eat_equivclass(); - return; - } - } - else if (*_M_current == _M_ctype.widen('-')) - { - _M_curToken = _S_token_dash; - ++_M_current; - return; - } - else if (*_M_current == _M_ctype.widen(']')) - { - _M_curToken = _S_token_bracket_end; - _M_state &= ~_S_state_in_bracket; - ++_M_current; - return; - } - else if (*_M_current == _M_ctype.widen('\\')) - { - _M_eat_escape(); - return; - } - _M_curToken = _S_token_collelem_single; - _M_curValue.assign(1, *_M_current); - ++_M_current; - } - - // TODO implement it. - template - void - _Scanner<_InputIterator>:: - _M_eat_escape() - { - ++_M_current; - if (_M_current == _M_end) - { - _M_curToken = _S_token_eof; - return; - } - _CharT __c = *_M_current; - ++_M_current; - - if (__c == _M_ctype.widen('(')) - { - if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) - { - _M_curToken = _S_token_ord_char; - _M_curValue.assign(1, __c); - } - else - _M_curToken = _S_token_subexpr_begin; - } - else if (__c == _M_ctype.widen(')')) - { - if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) - { - _M_curToken = _S_token_ord_char; - _M_curValue.assign(1, __c); - } - else - _M_curToken = _S_token_subexpr_end; - } - else if (__c == _M_ctype.widen('{')) - { - if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) - { - _M_curToken = _S_token_ord_char; - _M_curValue.assign(1, __c); - } - else - { - _M_curToken = _S_token_interval_begin; - _M_state |= _S_state_in_brace; - } - } - else if (__c == _M_ctype.widen('}')) - { - if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) - { - _M_curToken = _S_token_ord_char; - _M_curValue.assign(1, __c); - } - else - { - if (!(_M_state && _S_state_in_brace)) - __throw_regex_error(regex_constants::error_badbrace); - _M_state &= ~_S_state_in_brace; - _M_curToken = _S_token_interval_end; - } - } - else if (__c == _M_ctype.widen('x')) - { - ++_M_current; - if (_M_current == _M_end) - { - _M_curToken = _S_token_eof; - return; - } - if (_M_ctype.is(_CtypeT::digit, *_M_current)) - { - _M_curValue.assign(1, *_M_current); - ++_M_current; - if (_M_current == _M_end) - { - _M_curToken = _S_token_eof; - return; - } - if (_M_ctype.is(_CtypeT::digit, *_M_current)) - { - _M_curValue += *_M_current; - ++_M_current; - return; - } - } - } - else if (__c == _M_ctype.widen('^') - || __c == _M_ctype.widen('.') - || __c == _M_ctype.widen('*') - || __c == _M_ctype.widen('$') - || __c == _M_ctype.widen('\\')) - { - _M_curToken = _S_token_ord_char; - _M_curValue.assign(1, __c); - } - else if (_M_ctype.is(_CtypeT::digit, __c)) - { - _M_curToken = _S_token_backref; - _M_curValue.assign(1, __c); - } - else if (_M_state & _S_state_in_bracket) - { - if (__c == _M_ctype.widen('-') - || __c == _M_ctype.widen('[') - || __c == _M_ctype.widen(']')) - { - _M_curToken = _S_token_ord_char; - _M_curValue.assign(1, __c); - } - else if ((_M_flags & regex_constants::ECMAScript) - && __c == _M_ctype.widen('b')) - { - _M_curToken = _S_token_ord_char; - _M_curValue.assign(1, _M_ctype.widen(' ')); - } - else - __throw_regex_error(regex_constants::error_escape); - } - else - __throw_regex_error(regex_constants::error_escape); - } - - // Eats a character class or throwns an exception. - // current point to ':' delimiter on entry, char after ']' on return - template - void - _Scanner<_InputIterator>:: - _M_eat_charclass() - { - ++_M_current; // skip ':' - if (_M_current == _M_end) - __throw_regex_error(regex_constants::error_ctype); - for (_M_curValue.clear(); - _M_current != _M_end && *_M_current != _M_ctype.widen(':'); - ++_M_current) - _M_curValue += *_M_current; - if (_M_current == _M_end) - __throw_regex_error(regex_constants::error_ctype); - ++_M_current; // skip ':' - if (*_M_current != _M_ctype.widen(']')) - __throw_regex_error(regex_constants::error_ctype); - ++_M_current; // skip ']' - } - - - template - void - _Scanner<_InputIterator>:: - _M_eat_equivclass() - { - ++_M_current; // skip '=' - if (_M_current == _M_end) - __throw_regex_error(regex_constants::error_collate); - for (_M_curValue.clear(); - _M_current != _M_end && *_M_current != _M_ctype.widen('='); - ++_M_current) - _M_curValue += *_M_current; - if (_M_current == _M_end) - __throw_regex_error(regex_constants::error_collate); - ++_M_current; // skip '=' - if (*_M_current != _M_ctype.widen(']')) - __throw_regex_error(regex_constants::error_collate); - ++_M_current; // skip ']' - } - - - template - void - _Scanner<_InputIterator>:: - _M_eat_collsymbol() - { - ++_M_current; // skip '.' - if (_M_current == _M_end) - __throw_regex_error(regex_constants::error_collate); - for (_M_curValue.clear(); - _M_current != _M_end && *_M_current != _M_ctype.widen('.'); - ++_M_current) - _M_curValue += *_M_current; - if (_M_current == _M_end) - __throw_regex_error(regex_constants::error_collate); - ++_M_current; // skip '.' - if (*_M_current != _M_ctype.widen(']')) - __throw_regex_error(regex_constants::error_collate); - ++_M_current; // skip ']' - } - -#ifdef _GLIBCXX_DEBUG - template - std::ostream& - _Scanner<_InputIterator>:: - _M_print(std::ostream& ostr) - { - switch (_M_curToken) - { - case _S_token_anychar: - ostr << "any-character\n"; - break; - case _S_token_backref: - ostr << "backref\n"; - break; - case _S_token_bracket_begin: - ostr << "bracket-begin\n"; - break; - case _S_token_bracket_inverse_begin: - ostr << "bracket-inverse-begin\n"; - break; - case _S_token_bracket_end: - ostr << "bracket-end\n"; - break; - case _S_token_char_class_name: - ostr << "char-class-name \"" << _M_curValue << "\"\n"; - break; - case _S_token_closure0: - ostr << "closure0\n"; - break; - case _S_token_closure1: - ostr << "closure1\n"; - break; - case _S_token_collelem_multi: - ostr << "coll-elem-multi \"" << _M_curValue << "\"\n"; - break; - case _S_token_collelem_single: - ostr << "coll-elem-single \"" << _M_curValue << "\"\n"; - break; - case _S_token_collsymbol: - ostr << "collsymbol \"" << _M_curValue << "\"\n"; - break; - case _S_token_comma: - ostr << "comma\n"; - break; - case _S_token_dash: - ostr << "dash\n"; - break; - case _S_token_dup_count: - ostr << "dup count: " << _M_curValue << "\n"; - break; - case _S_token_eof: - ostr << "EOF\n"; - break; - case _S_token_equiv_class_name: - ostr << "equiv-class-name \"" << _M_curValue << "\"\n"; - break; - case _S_token_interval_begin: - ostr << "interval begin\n"; - break; - case _S_token_interval_end: - ostr << "interval end\n"; - break; - case _S_token_line_begin: - ostr << "line begin\n"; - break; - case _S_token_line_end: - ostr << "line end\n"; - break; - case _S_token_opt: - ostr << "opt\n"; - break; - case _S_token_or: - ostr << "or\n"; - break; - case _S_token_ord_char: - ostr << "ordinary character: \"" << _M_value() << "\"\n"; - break; - case _S_token_subexpr_begin: - ostr << "subexpr begin\n"; - break; - case _S_token_subexpr_end: - ostr << "subexpr end\n"; - break; - case _S_token_word_begin: - ostr << "word begin\n"; - break; - case _S_token_word_end: - ostr << "word end\n"; - break; - case _S_token_unknown: - ostr << "-- unknown token --\n"; - break; - default: - _GLIBCXX_DEBUG_ASSERT(false); - } - return ostr; - } -#endif - /// Builds an NFA from an input iterator interval. - template + template class _Compiler { public: - typedef _InIter _IterT; - typedef typename std::iterator_traits<_InIter>::value_type _CharT; - typedef std::basic_string<_CharT> _StringT; - typedef regex_constants::syntax_option_type _FlagT; + typedef typename _TraitsT::string_type _StringT; + typedef _NFA<_CharT, _TraitsT> _RegexT; + typedef regex_constants::syntax_option_type _FlagT; - _Compiler(const _InIter& __b, const _InIter& __e, - _TraitsT& __traits, _FlagT __flags); + _Compiler(_InputIter __b, _InputIter __e, + const _TraitsT& __traits, _FlagT __flags); - const _Nfa& - _M_nfa() const - { return _M_state_store; } + std::shared_ptr<_RegexT> + _M_get_nfa() const + { return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); } private: - typedef _Scanner<_InIter> _ScannerT; - typedef typename _ScannerT::_TokenT _TokenT; - typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT; - typedef _BracketMatcher<_InIter, _TraitsT> _BMatcherT; + typedef _Scanner<_InputIter> _ScannerT; + typedef typename _ScannerT::_TokenT _TokenT; + typedef _StateSeq<_CharT, _TraitsT> _StateSeqT; + typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT; + typedef _BracketMatcher<_CharT, _TraitsT> _BMatcherT; // accepts a specific token or returns false. bool @@ -720,345 +303,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION int _M_cur_int_value(int __radix); - _TraitsT& _M_traits; - _ScannerT _M_scanner; - _StringT _M_cur_value; - _Nfa _M_state_store; - _StackT _M_stack; - _FlagT _M_flags; + const _TraitsT& _M_traits; + _ScannerT _M_scanner; + _StringT _M_cur_value; + _RegexT _M_state_store; + _StackT _M_stack; + _FlagT _M_flags; }; - template - _Compiler<_InIter, _TraitsT>:: - _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits, - _Compiler<_InIter, _TraitsT>::_FlagT __flags) - : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()), - _M_state_store(__flags), _M_flags(__flags) - { - typedef _StartTagger<_InIter, _TraitsT> _Start; - typedef _EndTagger<_InIter, _TraitsT> _End; - - _StateSeq __r(_M_state_store, - _M_state_store._M_insert_subexpr_begin(_Start(0))); - _M_disjunction(); - if (!_M_stack.empty()) - { - __r._M_append(_M_stack.top()); - _M_stack.pop(); - } - __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0))); - __r._M_append(_M_state_store._M_insert_accept()); - } - - template - bool - _Compiler<_InIter, _TraitsT>:: - _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token) - { - if (token == _M_scanner._M_token()) - { - _M_cur_value = _M_scanner._M_value(); - _M_scanner._M_advance(); - return true; - } - return false; - } - - template - void - _Compiler<_InIter, _TraitsT>:: - _M_disjunction() - { - this->_M_alternative(); - if (_M_match_token(_ScannerT::_S_token_or)) - { - _StateSeq __alt1 = _M_stack.top(); _M_stack.pop(); - this->_M_disjunction(); - _StateSeq __alt2 = _M_stack.top(); _M_stack.pop(); - _M_stack.push(_StateSeq(__alt1, __alt2)); - } - } - - template - void - _Compiler<_InIter, _TraitsT>:: - _M_alternative() - { - if (this->_M_term()) - { - _StateSeq __re = _M_stack.top(); _M_stack.pop(); - this->_M_alternative(); - if (!_M_stack.empty()) - { - __re._M_append(_M_stack.top()); - _M_stack.pop(); - } - _M_stack.push(__re); - } - } - - template - bool - _Compiler<_InIter, _TraitsT>:: - _M_term() - { - if (this->_M_assertion()) - return true; - if (this->_M_atom()) - { - this->_M_quantifier(); - return true; - } - return false; - } - - template - bool - _Compiler<_InIter, _TraitsT>:: - _M_assertion() - { - if (_M_match_token(_ScannerT::_S_token_line_begin)) - { - // __m.push(_Matcher::_S_opcode_line_begin); - return true; - } - if (_M_match_token(_ScannerT::_S_token_line_end)) - { - // __m.push(_Matcher::_S_opcode_line_end); - return true; - } - if (_M_match_token(_ScannerT::_S_token_word_begin)) - { - // __m.push(_Matcher::_S_opcode_word_begin); - return true; - } - if (_M_match_token(_ScannerT::_S_token_word_end)) - { - // __m.push(_Matcher::_S_opcode_word_end); - return true; - } - return false; - } - - template - void - _Compiler<_InIter, _TraitsT>:: - _M_quantifier() - { - if (_M_match_token(_ScannerT::_S_token_closure0)) - { - if (_M_stack.empty()) - __throw_regex_error(regex_constants::error_badrepeat); - _StateSeq __r(_M_stack.top(), -1); - __r._M_append(__r._M_front()); - _M_stack.pop(); - _M_stack.push(__r); - return; - } - if (_M_match_token(_ScannerT::_S_token_closure1)) - { - if (_M_stack.empty()) - __throw_regex_error(regex_constants::error_badrepeat); - _StateSeq __r(_M_state_store, - _M_state_store. - _M_insert_alt(_S_invalid_state_id, - _M_stack.top()._M_front())); - _M_stack.top()._M_append(__r); - return; - } - if (_M_match_token(_ScannerT::_S_token_opt)) - { - if (_M_stack.empty()) - __throw_regex_error(regex_constants::error_badrepeat); - _StateSeq __r(_M_stack.top(), -1); - _M_stack.pop(); - _M_stack.push(__r); - return; - } - if (_M_match_token(_ScannerT::_S_token_interval_begin)) - { - if (_M_stack.empty()) - __throw_regex_error(regex_constants::error_badrepeat); - if (!_M_match_token(_ScannerT::_S_token_dup_count)) - __throw_regex_error(regex_constants::error_badbrace); - _StateSeq __r(_M_stack.top()); - int __min_rep = _M_cur_int_value(10); - for (int __i = 1; __i < __min_rep; ++__i) - _M_stack.top()._M_append(__r._M_clone()); - if (_M_match_token(_ScannerT::_S_token_comma)) - if (_M_match_token(_ScannerT::_S_token_dup_count)) - { - int __n = _M_cur_int_value(10) - __min_rep; - if (__n < 0) - __throw_regex_error(regex_constants::error_badbrace); - for (int __i = 0; __i < __n; ++__i) - { - _StateSeq __r(_M_state_store, - _M_state_store. - _M_insert_alt(_S_invalid_state_id, - _M_stack.top()._M_front())); - _M_stack.top()._M_append(__r); - } - } - else - { - _StateSeq __r(_M_stack.top(), -1); - __r._M_push_back(__r._M_front()); - _M_stack.pop(); - _M_stack.push(__r); - } - if (!_M_match_token(_ScannerT::_S_token_interval_end)) - __throw_regex_error(regex_constants::error_brace); - return; - } - } - - template - bool - _Compiler<_InIter, _TraitsT>:: - _M_atom() - { - typedef _CharMatcher<_InIter, _TraitsT> _CMatcher; - typedef _StartTagger<_InIter, _TraitsT> _Start; - typedef _EndTagger<_InIter, _TraitsT> _End; - - if (_M_match_token(_ScannerT::_S_token_anychar)) - { - _M_stack.push(_StateSeq(_M_state_store, - _M_state_store._M_insert_matcher - (_AnyMatcher))); - return true; - } - if (_M_match_token(_ScannerT::_S_token_ord_char)) - { - _M_stack.push(_StateSeq(_M_state_store, - _M_state_store._M_insert_matcher - (_CMatcher(_M_cur_value[0], _M_flags, _M_traits)))); - return true; - } - if (_M_match_token(_ScannerT::_S_token_backref)) - { - // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value); - _M_state_store._M_set_back_ref(true); - //return true; - } - if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) - { - int __mark = _M_state_store._M_sub_count(); - _StateSeq __r(_M_state_store, - _M_state_store. - _M_insert_subexpr_begin(_Start(__mark))); - this->_M_disjunction(); - if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) - __throw_regex_error(regex_constants::error_paren); - if (!_M_stack.empty()) - { - __r._M_append(_M_stack.top()); - _M_stack.pop(); - } - __r._M_append(_M_state_store._M_insert_subexpr_end - (__mark, _End(__mark))); - _M_stack.push(__r); - return true; - } - return _M_bracket_expression(); - } - - template - bool - _Compiler<_InIter, _TraitsT>:: - _M_bracket_expression() - { - bool __inverse = - _M_match_token(_ScannerT::_S_token_bracket_inverse_begin); - if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin))) - return false; - _BMatcherT __matcher( __inverse, _M_flags, _M_traits); - // special case: only if _not_ chr first after - // '[' or '[^' or if ECMAscript - if (!_M_bracket_list(__matcher) // list is empty - && !(_M_flags & regex_constants::ECMAScript)) - __throw_regex_error(regex_constants::error_brack); - _M_stack.push(_StateSeq(_M_state_store, - _M_state_store._M_insert_matcher(__matcher))); - return true; - } - - template - bool // list is non-empty - _Compiler<_InIter, _TraitsT>:: - _M_bracket_list(_BMatcherT& __matcher) - { - if (_M_match_token(_ScannerT::_S_token_bracket_end)) - return false; - _M_expression_term(__matcher); - _M_bracket_list(__matcher); - return true; - } - - template - void - _Compiler<_InIter, _TraitsT>:: - _M_expression_term(_BMatcherT& __matcher) - { - if (_M_match_token(_ScannerT::_S_token_collsymbol)) - { - __matcher._M_add_collating_element(_M_cur_value); - return; - } - if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) - { - __matcher._M_add_equivalence_class(_M_cur_value); - return; - } - if (_M_match_token(_ScannerT::_S_token_char_class_name)) - { - __matcher._M_add_character_class(_M_cur_value); - return; - } - if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a - { - auto __ch = _M_cur_value[0]; - if (_M_match_token(_ScannerT::_S_token_dash)) // [a- - { - // If the dash is the last character in the bracket expression, - // it is not special. - if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end) - __matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-] - else // [a-z] - { - if (!_M_match_token(_ScannerT::_S_token_collelem_single)) - __throw_regex_error(regex_constants::error_range); - __matcher._M_make_range(__ch, _M_cur_value[0]); - } - } - else // [a] - __matcher._M_add_char(__ch); - return; - } - __throw_regex_error(regex_constants::error_brack); - } - - template - int - _Compiler<_InIter, _TraitsT>:: - _M_cur_int_value(int __radix) - { - int __v = 0; - for (typename _StringT::size_type __i = 0; - __i < _M_cur_value.length(); ++__i) - __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix); - return __v; - } - - template - _AutomatonPtr - __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t, - regex_constants::syntax_option_type __f) - { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t, - __f)._M_nfa())); } - //@} regex-detail _GLIBCXX_END_NAMESPACE_VERSION } // namespace __detail } // namespace std + +#include diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc new file mode 100644 index 00000000000..04301e49346 --- /dev/null +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -0,0 +1,896 @@ +// class template regex -*- C++ -*- + +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** + * @file bits/regex_compiler.tcc + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{regex} + */ + +namespace std _GLIBCXX_VISIBILITY(default) +{ +namespace __detail +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + + template + void + _Scanner<_BiIter>:: + _M_advance() + { + if (_M_current == _M_end) + { + _M_curToken = _S_token_eof; + return; + } + + _CharT __c = *_M_current; + if (_M_state & _S_state_in_bracket) + { + _M_scan_in_bracket(); + return; + } + if (_M_state & _S_state_in_brace) + { + _M_scan_in_brace(); + return; + } +#if 0 + // TODO: re-enable line anchors when _M_assertion is implemented. + // See PR libstdc++/47724 + else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^')) + { + _M_curToken = _S_token_line_begin; + ++_M_current; + return; + } + else if (__c == _M_ctype.widen('$')) + { + _M_curToken = _S_token_line_end; + ++_M_current; + return; + } +#endif + else if (__c == _M_ctype.widen('.')) + { + _M_curToken = _S_token_anychar; + ++_M_current; + return; + } + else if (__c == _M_ctype.widen('*')) + { + _M_curToken = _S_token_closure0; + ++_M_current; + return; + } + else if (__c == _M_ctype.widen('+')) + { + _M_curToken = _S_token_closure1; + ++_M_current; + return; + } + else if (__c == _M_ctype.widen('|')) + { + _M_curToken = _S_token_or; + ++_M_current; + return; + } + else if (__c == _M_ctype.widen('[')) + { + if (*++_M_current == _M_ctype.widen('^')) + { + _M_curToken = _S_token_bracket_inverse_begin; + ++_M_current; + } + else + _M_curToken = _S_token_bracket_begin; + _M_state |= _S_state_in_bracket; + return; + } + else if (__c == _M_ctype.widen('\\')) + { + _M_eat_escape(); + return; + } + else if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) + { + if (__c == _M_ctype.widen('(')) + { + _M_curToken = _S_token_subexpr_begin; + ++_M_current; + return; + } + else if (__c == _M_ctype.widen(')')) + { + _M_curToken = _S_token_subexpr_end; + ++_M_current; + return; + } + else if (__c == _M_ctype.widen('{')) + { + _M_curToken = _S_token_interval_begin; + _M_state |= _S_state_in_brace; + ++_M_current; + return; + } + } + + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, __c); + ++_M_current; + } + + template + void + _Scanner<_BiIter>:: + _M_scan_in_brace() + { + if (_M_ctype.is(_CtypeT::digit, *_M_current)) + { + _M_curToken = _S_token_dup_count; + _M_curValue.assign(1, *_M_current); + ++_M_current; + while (_M_current != _M_end + && _M_ctype.is(_CtypeT::digit, *_M_current)) + { + _M_curValue += *_M_current; + ++_M_current; + } + return; + } + else if (*_M_current == _M_ctype.widen(',')) + { + _M_curToken = _S_token_comma; + ++_M_current; + return; + } + if (_M_flags & (regex_constants::basic | regex_constants::grep)) + { + if (*_M_current == _M_ctype.widen('\\')) + _M_eat_escape(); + } + else + { + if (*_M_current == _M_ctype.widen('}')) + { + _M_curToken = _S_token_interval_end; + _M_state &= ~_S_state_in_brace; + ++_M_current; + return; + } + } + } + + template + void + _Scanner<_BiIter>:: + _M_scan_in_bracket() + { + if (*_M_current == _M_ctype.widen('[')) + { + ++_M_current; + if (_M_current == _M_end) + { + _M_curToken = _S_token_eof; + return; + } + + if (*_M_current == _M_ctype.widen('.')) + { + _M_curToken = _S_token_collsymbol; + _M_eat_collsymbol(); + return; + } + else if (*_M_current == _M_ctype.widen(':')) + { + _M_curToken = _S_token_char_class_name; + _M_eat_charclass(); + return; + } + else if (*_M_current == _M_ctype.widen('=')) + { + _M_curToken = _S_token_equiv_class_name; + _M_eat_equivclass(); + return; + } + } + else if (*_M_current == _M_ctype.widen('-')) + { + _M_curToken = _S_token_dash; + ++_M_current; + return; + } + else if (*_M_current == _M_ctype.widen(']')) + { + _M_curToken = _S_token_bracket_end; + _M_state &= ~_S_state_in_bracket; + ++_M_current; + return; + } + else if (*_M_current == _M_ctype.widen('\\')) + { + _M_eat_escape(); + return; + } + _M_curToken = _S_token_collelem_single; + _M_curValue.assign(1, *_M_current); + ++_M_current; + } + + // TODO Complete it. + template + void + _Scanner<_BiIter>:: + _M_eat_escape() + { + ++_M_current; + if (_M_current == _M_end) + { + _M_curToken = _S_token_eof; + return; + } + _CharT __c = *_M_current; + ++_M_current; + + if (__c == _M_ctype.widen('(')) + { + if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) + { + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, __c); + } + else + _M_curToken = _S_token_subexpr_begin; + } + else if (__c == _M_ctype.widen(')')) + { + if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) + { + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, __c); + } + else + _M_curToken = _S_token_subexpr_end; + } + else if (__c == _M_ctype.widen('{')) + { + if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) + { + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, __c); + } + else + { + _M_curToken = _S_token_interval_begin; + _M_state |= _S_state_in_brace; + } + } + else if (__c == _M_ctype.widen('}')) + { + if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) + { + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, __c); + } + else + { + if (!(_M_state && _S_state_in_brace)) + __throw_regex_error(regex_constants::error_badbrace); + _M_state &= ~_S_state_in_brace; + _M_curToken = _S_token_interval_end; + } + } + else if (__c == _M_ctype.widen('x')) + { + ++_M_current; + if (_M_current == _M_end) + { + _M_curToken = _S_token_eof; + return; + } + if (_M_ctype.is(_CtypeT::digit, *_M_current)) + { + _M_curValue.assign(1, *_M_current); + ++_M_current; + if (_M_current == _M_end) + { + _M_curToken = _S_token_eof; + return; + } + if (_M_ctype.is(_CtypeT::digit, *_M_current)) + { + _M_curValue += *_M_current; + ++_M_current; + return; + } + } + } + else if (__c == _M_ctype.widen('^') + || __c == _M_ctype.widen('.') + || __c == _M_ctype.widen('*') + || __c == _M_ctype.widen('$') + || __c == _M_ctype.widen('\\')) + { + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, __c); + } + else if (_M_ctype.is(_CtypeT::digit, __c)) + { + _M_curToken = _S_token_backref; + _M_curValue.assign(1, __c); + } + else if (_M_state & _S_state_in_bracket) + { + if (__c == _M_ctype.widen('-') + || __c == _M_ctype.widen('[') + || __c == _M_ctype.widen(']')) + { + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, __c); + } + else if ((_M_flags & regex_constants::ECMAScript) + && __c == _M_ctype.widen('b')) + { + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, _M_ctype.widen(' ')); + } + else + __throw_regex_error(regex_constants::error_escape); + } + else + __throw_regex_error(regex_constants::error_escape); + } + + // Eats a character class or throwns an exception. + // current point to ':' delimiter on entry, char after ']' on return + template + void + _Scanner<_BiIter>:: + _M_eat_charclass() + { + ++_M_current; // skip ':' + if (_M_current == _M_end) + __throw_regex_error(regex_constants::error_ctype); + for (_M_curValue.clear(); + _M_current != _M_end && *_M_current != _M_ctype.widen(':'); + ++_M_current) + _M_curValue += *_M_current; + if (_M_current == _M_end) + __throw_regex_error(regex_constants::error_ctype); + ++_M_current; // skip ':' + if (*_M_current != _M_ctype.widen(']')) + __throw_regex_error(regex_constants::error_ctype); + ++_M_current; // skip ']' + } + + + template + void + _Scanner<_BiIter>:: + _M_eat_equivclass() + { + ++_M_current; // skip '=' + if (_M_current == _M_end) + __throw_regex_error(regex_constants::error_collate); + for (_M_curValue.clear(); + _M_current != _M_end && *_M_current != _M_ctype.widen('='); + ++_M_current) + _M_curValue += *_M_current; + if (_M_current == _M_end) + __throw_regex_error(regex_constants::error_collate); + ++_M_current; // skip '=' + if (*_M_current != _M_ctype.widen(']')) + __throw_regex_error(regex_constants::error_collate); + ++_M_current; // skip ']' + } + + + template + void + _Scanner<_BiIter>:: + _M_eat_collsymbol() + { + ++_M_current; // skip '.' + if (_M_current == _M_end) + __throw_regex_error(regex_constants::error_collate); + for (_M_curValue.clear(); + _M_current != _M_end && *_M_current != _M_ctype.widen('.'); + ++_M_current) + _M_curValue += *_M_current; + if (_M_current == _M_end) + __throw_regex_error(regex_constants::error_collate); + ++_M_current; // skip '.' + if (*_M_current != _M_ctype.widen(']')) + __throw_regex_error(regex_constants::error_collate); + ++_M_current; // skip ']' + } + +#ifdef _GLIBCXX_DEBUG + template + std::ostream& + _Scanner<_BiIter>:: + _M_print(std::ostream& ostr) + { + switch (_M_curToken) + { + case _S_token_anychar: + ostr << "any-character\n"; + break; + case _S_token_backref: + ostr << "backref\n"; + break; + case _S_token_bracket_begin: + ostr << "bracket-begin\n"; + break; + case _S_token_bracket_inverse_begin: + ostr << "bracket-inverse-begin\n"; + break; + case _S_token_bracket_end: + ostr << "bracket-end\n"; + break; + case _S_token_char_class_name: + ostr << "char-class-name \"" << _M_curValue << "\"\n"; + break; + case _S_token_closure0: + ostr << "closure0\n"; + break; + case _S_token_closure1: + ostr << "closure1\n"; + break; + case _S_token_collelem_multi: + ostr << "coll-elem-multi \"" << _M_curValue << "\"\n"; + break; + case _S_token_collelem_single: + ostr << "coll-elem-single \"" << _M_curValue << "\"\n"; + break; + case _S_token_collsymbol: + ostr << "collsymbol \"" << _M_curValue << "\"\n"; + break; + case _S_token_comma: + ostr << "comma\n"; + break; + case _S_token_dash: + ostr << "dash\n"; + break; + case _S_token_dup_count: + ostr << "dup count: " << _M_curValue << "\n"; + break; + case _S_token_eof: + ostr << "EOF\n"; + break; + case _S_token_equiv_class_name: + ostr << "equiv-class-name \"" << _M_curValue << "\"\n"; + break; + case _S_token_interval_begin: + ostr << "interval begin\n"; + break; + case _S_token_interval_end: + ostr << "interval end\n"; + break; + case _S_token_line_begin: + ostr << "line begin\n"; + break; + case _S_token_line_end: + ostr << "line end\n"; + break; + case _S_token_opt: + ostr << "opt\n"; + break; + case _S_token_or: + ostr << "or\n"; + break; + case _S_token_ord_char: + ostr << "ordinary character: \"" << _M_value() << "\"\n"; + break; + case _S_token_subexpr_begin: + ostr << "subexpr begin\n"; + break; + case _S_token_subexpr_end: + ostr << "subexpr end\n"; + break; + case _S_token_word_begin: + ostr << "word begin\n"; + break; + case _S_token_word_end: + ostr << "word end\n"; + break; + case _S_token_unknown: + ostr << "-- unknown token --\n"; + break; + default: + _GLIBCXX_DEBUG_ASSERT(false); + } + return ostr; + } +#endif + + template + _Compiler<_InputIter, _CharT, _TraitsT>:: + _Compiler(_InputIter __b, _InputIter __e, + const _TraitsT& __traits, _FlagT __flags) + : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()), + _M_state_store(__flags), _M_flags(__flags) + { + _StateSeqT __r(_M_state_store, + _M_state_store._M_insert_subexpr_begin()); + _M_disjunction(); + if (!_M_stack.empty()) + { + __r._M_append(_M_stack.top()); + _M_stack.pop(); + } + __r._M_append(_M_state_store._M_insert_subexpr_end()); + __r._M_append(_M_state_store._M_insert_accept()); + } + + template + bool + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_match_token(_Compiler<_InputIter, _CharT, _TraitsT>::_TokenT token) + { + if (token == _M_scanner._M_token()) + { + _M_cur_value = _M_scanner._M_value(); + _M_scanner._M_advance(); + return true; + } + return false; + } + + template + void + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_disjunction() + { + this->_M_alternative(); + if (_M_match_token(_ScannerT::_S_token_or)) + { + _StateSeqT __alt1 = _M_stack.top(); _M_stack.pop(); + this->_M_disjunction(); + _StateSeqT __alt2 = _M_stack.top(); _M_stack.pop(); + _M_stack.push(_StateSeqT(__alt1, __alt2)); + } + } + + template + void + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_alternative() + { + if (this->_M_term()) + { + _StateSeqT __re = _M_stack.top(); _M_stack.pop(); + this->_M_alternative(); + if (!_M_stack.empty()) + { + __re._M_append(_M_stack.top()); + _M_stack.pop(); + } + _M_stack.push(__re); + } + } + + template + bool + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_term() + { + if (this->_M_assertion()) + return true; + if (this->_M_atom()) + { + this->_M_quantifier(); + return true; + } + return false; + } + + template + bool + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_assertion() + { + if (_M_match_token(_ScannerT::_S_token_line_begin)) + { + // __m.push(_Matcher::_S_opcode_line_begin); + return true; + } + if (_M_match_token(_ScannerT::_S_token_line_end)) + { + // __m.push(_Matcher::_S_opcode_line_end); + return true; + } + if (_M_match_token(_ScannerT::_S_token_word_begin)) + { + // __m.push(_Matcher::_S_opcode_word_begin); + return true; + } + if (_M_match_token(_ScannerT::_S_token_word_end)) + { + // __m.push(_Matcher::_S_opcode_word_end); + return true; + } + return false; + } + + template + void + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_quantifier() + { + if (_M_match_token(_ScannerT::_S_token_closure0)) + { + if (_M_stack.empty()) + __throw_regex_error(regex_constants::error_badrepeat); + _StateSeqT __r(_M_stack.top(), -1); + __r._M_append(__r._M_front()); + _M_stack.pop(); + _M_stack.push(__r); + return; + } + if (_M_match_token(_ScannerT::_S_token_closure1)) + { + if (_M_stack.empty()) + __throw_regex_error(regex_constants::error_badrepeat); + _StateSeqT __r(_M_state_store, + _M_state_store. + _M_insert_alt(_S_invalid_state_id, + _M_stack.top()._M_front())); + _M_stack.top()._M_append(__r); + return; + } + if (_M_match_token(_ScannerT::_S_token_opt)) + { + if (_M_stack.empty()) + __throw_regex_error(regex_constants::error_badrepeat); + _StateSeqT __r(_M_stack.top(), -1); + _M_stack.pop(); + _M_stack.push(__r); + return; + } + if (_M_match_token(_ScannerT::_S_token_interval_begin)) + { + if (_M_stack.empty()) + __throw_regex_error(regex_constants::error_badrepeat); + if (!_M_match_token(_ScannerT::_S_token_dup_count)) + __throw_regex_error(regex_constants::error_badbrace); + _StateSeqT __r(_M_stack.top()); + int __min_rep = _M_cur_int_value(10); + for (int __i = 1; __i < __min_rep; ++__i) + _M_stack.top()._M_append(__r._M_clone()); + if (_M_match_token(_ScannerT::_S_token_comma)) + if (_M_match_token(_ScannerT::_S_token_dup_count)) + { + int __n = _M_cur_int_value(10) - __min_rep; + if (__n < 0) + __throw_regex_error(regex_constants::error_badbrace); + for (int __i = 0; __i < __n; ++__i) + { + _StateSeqT __r(_M_state_store, + _M_state_store. + _M_insert_alt(_S_invalid_state_id, + _M_stack.top()._M_front())); + _M_stack.top()._M_append(__r); + } + } + else + { + _StateSeqT __r(_M_stack.top(), -1); + __r._M_push_back(__r._M_front()); + _M_stack.pop(); + _M_stack.push(__r); + } + if (!_M_match_token(_ScannerT::_S_token_interval_end)) + __throw_regex_error(regex_constants::error_brace); + return; + } + } + + template + bool + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_atom() + { + if (_M_match_token(_ScannerT::_S_token_anychar)) + { + const static auto& + __any_matcher = [](_CharT) -> bool + { return true; }; + + _M_stack.push(_StateSeqT(_M_state_store, + _M_state_store._M_insert_matcher + (__any_matcher))); + return true; + } + if (_M_match_token(_ScannerT::_S_token_ord_char)) + { + auto __c = _M_cur_value[0]; + __detail::_Matcher<_CharT> f; + if (_M_flags & regex_constants::icase) + { + auto __traits = this->_M_traits; + __c = __traits.translate_nocase(__c); + f = [__traits, __c](_CharT __ch) -> bool + { return __traits.translate_nocase(__ch) == __c; }; + } + else + f = [__c](_CharT __ch) -> bool + { return __ch == __c; }; + + _M_stack.push(_StateSeqT(_M_state_store, + _M_state_store._M_insert_matcher(f))); + return true; + } + if (_M_match_token(_ScannerT::_S_token_backref)) + { + // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value); + _M_state_store._M_set_backref(true); + //return true; + } + if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) + { + int __mark = _M_state_store._M_sub_count(); + _StateSeqT __r(_M_state_store, + _M_state_store. + _M_insert_subexpr_begin()); + this->_M_disjunction(); + if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) + __throw_regex_error(regex_constants::error_paren); + if (!_M_stack.empty()) + { + __r._M_append(_M_stack.top()); + _M_stack.pop(); + } + __r._M_append(_M_state_store._M_insert_subexpr_end()); + _M_stack.push(__r); + return true; + } + return _M_bracket_expression(); + } + + template + bool + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_bracket_expression() + { + bool __inverse = + _M_match_token(_ScannerT::_S_token_bracket_inverse_begin); + if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin))) + return false; + _BMatcherT __matcher( __inverse, _M_traits, _M_flags); + // special case: only if _not_ chr first after + // '[' or '[^' or if ECMAscript + if (!_M_bracket_list(__matcher) // list is empty + && !(_M_flags & regex_constants::ECMAScript)) + __throw_regex_error(regex_constants::error_brack); + _M_stack.push(_StateSeqT(_M_state_store, + _M_state_store._M_insert_matcher(__matcher))); + return true; + } + + template + bool // list is non-empty + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_bracket_list(_BMatcherT& __matcher) + { + if (_M_match_token(_ScannerT::_S_token_bracket_end)) + return false; + _M_expression_term(__matcher); + _M_bracket_list(__matcher); + return true; + } + + template + void + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_expression_term(_BMatcherT& __matcher) + { + if (_M_match_token(_ScannerT::_S_token_collsymbol)) + { + __matcher._M_add_collating_element(_M_cur_value); + return; + } + if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) + { + __matcher._M_add_equivalence_class(_M_cur_value); + return; + } + if (_M_match_token(_ScannerT::_S_token_char_class_name)) + { + __matcher._M_add_character_class(_M_cur_value); + return; + } + if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a + { + auto __ch = _M_cur_value[0]; + if (_M_match_token(_ScannerT::_S_token_dash)) // [a- + { + // If the dash is the last character in the bracket expression, + // it is not special. + if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end) + __matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-] + else // [a-z] + { + if (!_M_match_token(_ScannerT::_S_token_collelem_single)) + __throw_regex_error(regex_constants::error_range); + __matcher._M_make_range(__ch, _M_cur_value[0]); + } + } + else // [a] + __matcher._M_add_char(__ch); + return; + } + __throw_regex_error(regex_constants::error_brack); + } + + template + int + _Compiler<_InputIter, _CharT, _TraitsT>:: + _M_cur_int_value(int __radix) + { + int __v = 0; + for (typename _StringT::size_type __i = 0; + __i < _M_cur_value.length(); ++__i) + __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix); + return __v; + } + + template + bool _BracketMatcher<_CharT, _TraitsT>:: + operator()(_CharT __ch) const + { + auto __oldch = __ch; + if (_M_flags & regex_constants::collate) + if (_M_is_icase()) + __ch = _M_traits.translate_nocase(__ch); + else + __ch = _M_traits.translate(__ch); + + bool __ret = false; + for (auto __c : _M_char_set) + if (__c == __ch) + { + __ret = true; + break; + } + if (!__ret && _M_traits.isctype(__oldch, _M_class_set)) + __ret = true; + else + { + _StringT __s = _M_get_str(__ch); + for (auto& __it : _M_range_set) + if (__it.first <= __s && __s <= __it.second) + { + __ret = true; + break; + } + } + if (_M_is_non_matching) + __ret = !__ret; + return __ret; + } + +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace __detail +} // namespace diff --git a/libstdc++-v3/include/bits/regex_constants.h b/libstdc++-v3/include/bits/regex_constants.h index aea2a5bb2c0..8c163cc0b3e 100644 --- a/libstdc++-v3/include/bits/regex_constants.h +++ b/libstdc++-v3/include/bits/regex_constants.h @@ -79,7 +79,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ typedef unsigned int syntax_option_type; - /** + /** * Specifies that the matching of regular expressions against a character * sequence shall be performed without regard to case. */ @@ -139,7 +139,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * identical to syntax_option_type extended, except that C-style escape * sequences are supported. These sequences are: * \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\', ', - * and \\ddd (where ddd is one, two, or three octal digits). + * and \\ddd (where ddd is one, two, or three octal digits). */ constexpr syntax_option_type awk = 1 << _S_awk; @@ -154,7 +154,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * Specifies that the grammar recognized by the regular expression engine is * that used by POSIX utility grep when given the -E option in - * IEEE Std 1003.1-2001. This option is identical to syntax_option_type + * IEEE Std 1003.1-2001. This option is identical to syntax_option_type * extended, except that newlines are treated as whitespace. */ constexpr syntax_option_type egrep = 1 << _S_egrep; @@ -215,35 +215,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * expression shall not match [last, last). */ constexpr match_flag_type match_not_eol = 1 << _S_not_eol; - + /** * The expression \\b is not matched against the sub-sequence * [first,first). */ constexpr match_flag_type match_not_bow = 1 << _S_not_bow; - + /** * The expression \\b should not be matched against the sub-sequence * [last,last). */ constexpr match_flag_type match_not_eow = 1 << _S_not_eow; - + /** * If more than one match is possible then any match is an acceptable * result. */ constexpr match_flag_type match_any = 1 << _S_any; - + /** * The expression does not match an empty sequence. */ constexpr match_flag_type match_not_null = 1 << _S_not_null; - + /** * The expression only matches a sub-sequence that begins at first . */ constexpr match_flag_type match_continuous = 1 << _S_continuous; - + /** * --first is a valid iterator position. When this flag is set then the * flags match_not_bol and match_not_bow are ignored by the regular @@ -260,7 +260,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * operations all non-overlapping occurrences of the regular expression * are located and replaced, and sections of the input that did not match * the expression are copied unchanged to the output string. - * + * * Format strings (from ECMA-262 [15.5.4.11]): * @li $$ The dollar-sign itself ($) * @li $& The matched substring. diff --git a/libstdc++-v3/include/bits/regex_cursor.h b/libstdc++-v3/include/bits/regex_cursor.h deleted file mode 100644 index 444d07ae263..00000000000 --- a/libstdc++-v3/include/bits/regex_cursor.h +++ /dev/null @@ -1,105 +0,0 @@ -// class template regex -*- C++ -*- - -// Copyright (C) 2010-2013 Free Software Foundation, Inc. -// -// This file is part of the GNU ISO C++ Library. This library is free -// software; you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the -// Free Software Foundation; either version 3, or (at your option) -// any later version. - -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// Under Section 7 of GPL version 3, you are granted additional -// permissions described in the GCC Runtime Library Exception, version -// 3.1, as published by the Free Software Foundation. - -// You should have received a copy of the GNU General Public License and -// a copy of the GCC Runtime Library Exception along with this program; -// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -// . - -/** - * @file bits/regex_cursor.h - * This is an internal header file, included by other library headers. - * Do not attempt to use it directly. @headername{regex} - */ - -namespace std _GLIBCXX_VISIBILITY(default) -{ -namespace __detail -{ -_GLIBCXX_BEGIN_NAMESPACE_VERSION - - /** - * @defgroup regex-detail Base and Implementation Classes - * @ingroup regex - * @{ - */ - - /// ABC for pattern matching - struct _PatternCursor - { - virtual ~_PatternCursor() { }; - virtual void _M_next() = 0; - virtual void _M_prev() = 0; - virtual bool _M_at_end() const = 0; - }; - - /// Provides a cursor into the specific target string. - template - class _SpecializedCursor - : public _PatternCursor - { - public: - _SpecializedCursor(const _FwdIterT& __b, const _FwdIterT __e) - : _M_b(__b), _M_c(__b), _M_e(__e) - { } - - typename std::iterator_traits<_FwdIterT>::value_type - _M_current() const - { return *_M_c; } - - void - _M_next() - { ++_M_c; } - - void - _M_prev() - { --_M_c; } - - _FwdIterT - _M_pos() const - { return _M_c; } - - const _FwdIterT& - _M_begin() const - { return _M_b; } - - const _FwdIterT& - _M_end() const - { return _M_e; } - - bool - _M_at_end() const - { return _M_c == _M_e; } - - private: - _FwdIterT _M_b; - _FwdIterT _M_c; - _FwdIterT _M_e; - }; - - // Helper function to create a cursor specialized for an iterator class. - template - inline _SpecializedCursor<_FwdIterT> - __cursor(const _FwdIterT& __b, const _FwdIterT __e) - { return _SpecializedCursor<_FwdIterT>(__b, __e); } - - //@} regex-detail -_GLIBCXX_END_NAMESPACE_VERSION -} // namespace __detail -} // namespace diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h index 310b2026e60..7f06727bfae 100644 --- a/libstdc++-v3/include/bits/regex_error.h +++ b/libstdc++-v3/include/bits/regex_error.h @@ -45,7 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @name 5.3 Error Types */ //@{ - + enum error_type { _S_error_collate, diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h new file mode 100644 index 00000000000..afac8d03816 --- /dev/null +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -0,0 +1,199 @@ +// class template regex -*- C++ -*- + +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** + * @file bits/regex_executor.h + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{regex} + */ + +namespace std _GLIBCXX_VISIBILITY(default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + template + class basic_regex; + + template + class match_results; +_GLIBCXX_END_NAMESPACE_VERSION + +namespace __detail +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + + /** + * @addtogroup regex-detail + * @{ + */ + + template + class _Executor + { + public: + typedef match_results<_BiIter, _Alloc> _ResultsT; + typedef regex_constants::match_flag_type _FlagT; + + virtual + ~_Executor() + { } + + // Set matched when string exactly match the pattern. + virtual bool + _M_match() = 0; + + // Set matched when some prefix of the string matches the pattern. + virtual bool + _M_search_from_first() = 0; + + protected: + typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT; + _Executor(_BiIter __begin, + _BiIter __end, + _ResultsT& __results, + _FlagT __flags, + _SizeT __size) + : _M_current(__begin), _M_end(__end), + _M_results(__results), _M_flags(__flags) + { + __results.resize(__size + 2); + for (auto __it : __results) + __it.matched = false; + } + + _BiIter _M_current; + _BiIter _M_end; + _ResultsT& _M_results; + _FlagT _M_flags; + }; + + template + class _DFSExecutor + : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT> + { + public: + typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; + typedef _NFA<_CharT, _TraitsT> _RegexT; + typedef typename _BaseT::_ResultsT _ResultsT; + typedef regex_constants::match_flag_type _FlagT; + + _DFSExecutor(_BiIter __begin, + _BiIter __end, + _ResultsT& __results, + const _RegexT& __nfa, + _FlagT __flags) + : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()), + _M_nfa(__nfa) + { } + + bool + _M_match() + { return _M_dfs(_M_nfa._M_start()); } + + bool + _M_search_from_first() + { return _M_dfs(_M_nfa._M_start()); } + + private: + template + bool + _M_dfs(_StateIdT __i); + + const _RegexT& _M_nfa; + }; + + // It's essentially a variant of Single-Source-Shortest-Path problem, where, + // the matching results is the final distance and should be minimized. + // Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed + // (BFS-like) Bellman-Ford algorithm, + // SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm). + // + // Every entry of _M_covered saves the solution(grouping status) for every + // matching head. When states transfer, solutions will be compared and + // deduplicated(based on which greedy mode we have). + // + // Time complexity: O(_M_str_cur.size() * _M_nfa.size()) + // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count()) + template + class _BFSExecutor + : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT> + { + public: + typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; + typedef _NFA<_CharT, _TraitsT> _RegexT; + typedef typename _BaseT::_ResultsT _ResultsT; + typedef std::unique_ptr<_ResultsT> _ResultsPtr; + typedef regex_constants::match_flag_type _FlagT; + + _BFSExecutor(_BiIter __begin, + _BiIter __end, + _ResultsT& __results, + const _RegexT& __nfa, + _FlagT __flags) + : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()), + _M_nfa(__nfa) + { + if (_M_nfa._M_start() != _S_invalid_state_id) + _M_covered[_M_nfa._M_start()] = + _ResultsPtr(new _ResultsT(this->_M_results)); + _M_e_closure(); + } + + bool + _M_match() + { return _M_main_loop(); } + + bool + _M_search_from_first() + { return _M_main_loop(); } + + private: + template + bool + _M_main_loop(); + + void + _M_e_closure(); + + void + _M_move(); + + bool + _M_match_less_than(_StateIdT __u, _StateIdT __v) const; + + bool + _M_includes_some() const; + + std::map<_StateIdT, _ResultsPtr> _M_covered; + const _RegexT& _M_nfa; + }; + + //@} regex-detail +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace __detail +} // namespace std + +#include diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc new file mode 100644 index 00000000000..32d153762e4 --- /dev/null +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -0,0 +1,252 @@ +// class template regex -*- C++ -*- + +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** + * @file bits/regex_executor.tcc + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{regex} + */ + +namespace std _GLIBCXX_VISIBILITY(default) +{ +namespace __detail +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + + // TODO: This is too slow. Try to compile the NFA to a DFA. + template + template + bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: + _M_dfs(_StateIdT __i) + { + auto& __current = this->_M_current; + auto& __end = this->_M_end; + auto& __results = this->_M_results; + if (__i == _S_invalid_state_id) + // This is not that certain. Need deeper investigate. + return false; + const auto& __state = _M_nfa[__i]; + bool __ret = false; + switch (__state._M_opcode) + { + case _S_opcode_alternative: + // Greedy mode by default. For non-greedy mode, + // swap _M_alt and _M_next. + // TODO: Add greedy mode option. + __ret = _M_dfs<__match_mode>(__state._M_alt) + || _M_dfs<__match_mode>(__state._M_next); + break; + case _S_opcode_subexpr_begin: + __results.at(__state._M_subexpr).first = __current; + __ret = _M_dfs<__match_mode>(__state._M_next); + break; + case _S_opcode_subexpr_end: + __ret = _M_dfs<__match_mode>(__state._M_next); + __results.at(__state._M_subexpr).second = __current; + __results.at(__state._M_subexpr).matched = __ret; + break; + case _S_opcode_match: + if (__current != __end && __state._M_matches(*__current)) + { + ++__current; + __ret = _M_dfs<__match_mode>(__state._M_next); + --__current; + } + break; + case _S_opcode_accept: + if (__match_mode) + __ret = __current == __end; + else + __ret = true; + break; + default: + _GLIBCXX_DEBUG_ASSERT(false); + } + return __ret; + } + + template + template + bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: + _M_main_loop() + { + while (this->_M_current != this->_M_end) + { + if (!__match_mode) + if (_M_includes_some()) + return true; + _M_move(); + ++this->_M_current; + _M_e_closure(); + } + return _M_includes_some(); + } + + // The SPFA approach. + template + void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: + _M_e_closure() + { + auto& __current = this->_M_current; + std::queue<_StateIdT> __q; + std::vector __in_q(_M_nfa.size(), false); + for (auto& __it : _M_covered) + { + __in_q[__it.first] = true; + __q.push(__it.first); + } + while (!__q.empty()) + { + auto __u = __q.front(); + __q.pop(); + __in_q[__u] = false; + const auto& __state = _M_nfa[__u]; + + // Can be implemented using method, but there're too much arguments. + auto __add_visited_state = [&](_StateIdT __v) + { + if (__v == _S_invalid_state_id) + return; + if (_M_match_less_than(__u, __v)) + { + _M_covered[__v] = _ResultsPtr(new _ResultsT(*_M_covered[__u])); + // if a state is updated, it's outgoing neighbors should be + // reconsidered too. Push them to the queue. + if (!__in_q[__v]) + { + __in_q[__v] = true; + __q.push(__v); + } + } + }; + + switch (__state._M_opcode) + { + case _S_opcode_alternative: + __add_visited_state(__state._M_next); + __add_visited_state(__state._M_alt); + break; + case _S_opcode_subexpr_begin: + _M_covered[__u]->at(__state._M_subexpr).first = __current; + __add_visited_state(__state._M_next); + break; + case _S_opcode_subexpr_end: + _M_covered[__u]->at(__state._M_subexpr).second = __current; + _M_covered[__u]->at(__state._M_subexpr).matched = true; + __add_visited_state(__state._M_next); + break; + case _S_opcode_match: + break; + case _S_opcode_accept: + __add_visited_state(__state._M_next); + break; + default: + _GLIBCXX_DEBUG_ASSERT(false); + } + } + } + + template + void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: + _M_move() + { + decltype(_M_covered) __next; + for (auto& __it : _M_covered) + { + const auto& __state = _M_nfa[__it.first]; + if (__state._M_opcode == _S_opcode_match + && __state._M_matches(*this->_M_current)) + if (_M_match_less_than(__it.first, __state._M_next) + && __state._M_next != _S_invalid_state_id) + __next[__state._M_next] = move(__it.second); + } + _M_covered = move(__next); + } + + template + bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: + _M_match_less_than(_StateIdT __u, _StateIdT __v) const + { + if (_M_covered.count(__u) == 0) + return false; + if (_M_covered.count(__v) > 0) + return true; + // TODO: Greedy and Non-greedy support + return true; + } + + template + bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: + _M_includes_some() const + { + auto& __s = _M_nfa._M_final_states(); + auto& __t = _M_covered; + if (__s.size() > 0 && __t.size() > 0) + { + auto __first = __s.begin(); + auto __second = __t.begin(); + while (__first != __s.end() && __second != __t.end()) + { + if (*__first < __second->first) + ++__first; + else if (__second->first < *__first) + ++__second; + else + { + this->_M_results = *__second->second; + return true; + } + } + } + return false; + } + + template + std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> + __get_executor(_BiIter __b, + _BiIter __e, + match_results<_BiIter, _Alloc>& __m, + const basic_regex<_CharT, _TraitsT>& __re, + regex_constants::match_flag_type __flags) + { + typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> + _ExecutorPtr; + typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT; + auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>> + (__re._M_automaton); + if (__p->_M_has_backref) + return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags)); + return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags)); + } + +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace __detail +} // namespace diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.h b/libstdc++-v3/include/bits/regex_grep_matcher.h deleted file mode 100644 index 474d8a29114..00000000000 --- a/libstdc++-v3/include/bits/regex_grep_matcher.h +++ /dev/null @@ -1,260 +0,0 @@ -// class template regex -*- C++ -*- - -// Copyright (C) 2010-2013 Free Software Foundation, Inc. -// -// This file is part of the GNU ISO C++ Library. This library is free -// software; you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the -// Free Software Foundation; either version 3, or (at your option) -// any later version. - -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// Under Section 7 of GPL version 3, you are granted additional -// permissions described in the GCC Runtime Library Exception, version -// 3.1, as published by the Free Software Foundation. - -// You should have received a copy of the GNU General Public License and -// a copy of the GCC Runtime Library Exception along with this program; -// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -// . - -/** - * @file bits/regex_grep_matcher.h - * This is an internal header file, included by other library headers. - * Do not attempt to use it directly. @headername{regex} - */ - -namespace std _GLIBCXX_VISIBILITY(default) -{ -_GLIBCXX_BEGIN_NAMESPACE_VERSION - - template - class sub_match; - - template - class match_results; - -_GLIBCXX_END_NAMESPACE_VERSION - -namespace __detail -{ -_GLIBCXX_BEGIN_NAMESPACE_VERSION - - /** - * @defgroup regex-detail Base and Implementation Classes - * @ingroup regex - * @{ - */ - - /// A _Results facade specialized for wrapping a templated match_results. - template - class _SpecializedResults - : public _Results - { - public: - _SpecializedResults(const _Automaton::_SizeT __size, - const _SpecializedCursor<_FwdIterT>& __cursor, - match_results<_FwdIterT, _Alloc>& __m); - - ~_SpecializedResults() - { - if (_M_managed) - delete &_M_results; - } - - private: - _SpecializedResults(const _SpecializedResults& __rhs) - : _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)), - _M_managed(true) - { } - - public: - void - _M_set_pos(int __i, int __j, const _PatternCursor& __pc); - - void - _M_set_range(int __i, const _PatternCursor& __pc) - { - typedef const _SpecializedCursor<_FwdIterT>& _CursorT; - _CursorT __c = static_cast<_CursorT>(__pc); - _M_results.at(__i).first = __c._M_begin(); - _M_results.at(__i).second = __c._M_end(); - } - - void - _M_set_matched(int __i, bool __is_matched) - { _M_results.at(__i).matched = __is_matched; } - - std::unique_ptr<_Results> - _M_clone() const - { return unique_ptr<_Results>(new _SpecializedResults(*this)); } - - void - _M_assign(const _Results& __rhs) - { - auto __r = static_cast(&__rhs); - _M_results = __r->_M_results; - } - - private: - match_results<_FwdIterT, _Alloc>& _M_results; - bool _M_managed; - }; - - template - _SpecializedResults<_FwdIterT, _Alloc>:: - _SpecializedResults(const _Automaton::_SizeT __size, - const _SpecializedCursor<_FwdIterT>& __cursor, - match_results<_FwdIterT, _Alloc>& __m) - : _M_results(__m), _M_managed(false) - { - _M_results.clear(); - _M_results.reserve(__size + 2); - _M_results.resize(__size); - typename match_results<_FwdIterT, _Alloc>::value_type __sm; - __sm.first = __sm.second = __cursor._M_begin(); - _M_results.push_back(__sm); - __sm.first = __sm.second = __cursor._M_end(); - _M_results.push_back(__sm); - } - - template - void - _SpecializedResults<_FwdIterT, _Alloc>:: - _M_set_pos(int __i, int __j, const _PatternCursor& __pc) - { - typedef const _SpecializedCursor<_FwdIterT>& _CursorT; - _CursorT __c = static_cast<_CursorT>(__pc); - if (__j == 0) - _M_results.at(__i).first = __c._M_pos(); - else - _M_results.at(__i).second = __c._M_pos(); - } - - /// Executes a regular expression NFA/DFA over a range using a - /// variant of the parallel execution algorithm featured in the grep - /// utility, modified to use Laurikari tags. - class _Grep_matcher - { - public: - _Grep_matcher(_PatternCursor& __p, - _Results& __r, - const _AutomatonPtr& __automaton, - regex_constants::match_flag_type __flags) - : _M_nfa(static_pointer_cast<_Nfa>(__automaton)), - _M_str_cur(__p), _M_results(__r) - { } - - virtual - ~_Grep_matcher() - { } - - // Set matched when string exactly match the pattern. - virtual bool - _M_match() = 0; - - // Set matched when some prefix of the string matches the pattern. - virtual bool - _M_search_from_first() = 0; - - protected: - const std::shared_ptr<_Nfa> _M_nfa; - _PatternCursor& _M_str_cur; - _Results& _M_results; - }; - - // Time complexity: exponential - // Space complexity: O(_M_str_cur.size()) - // _M_dfs() take a state, along with current string cursor(_M_str_cur), - // trying to match current state with current character. - // Only _S_opcode_match will consume a character. - class _DFSMatcher - : public _Grep_matcher - { - public: - _DFSMatcher(_PatternCursor& __p, - _Results& __r, - const _AutomatonPtr& __automaton, - regex_constants::match_flag_type __flags) - : _Grep_matcher(__p, __r, __automaton, __flags) - { } - - bool - _M_match() - { return _M_dfs(_M_nfa->_M_start()); } - - bool - _M_search_from_first() - { return _M_dfs(_M_nfa->_M_start()); } - - private: - template - bool - _M_dfs(_StateIdT __i); - }; - - // It's essentially a variant of Single-Source-Shortest-Path problem, where, - // the matching results is the final distance and should be minimized. - // Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed - // (BFS-like) Bellman-Ford algorithm, - // SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm). - // - // Every entry of _M_current saves the solution(grouping status) for every - // matching head. When states transfer, solutions will be compared and - // deduplicated(based on which greedy mode we have). - // - // Time complexity: O(_M_str_cur.size() * _M_nfa.size()) - // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count()) - class _BFSMatcher - : public _Grep_matcher - { - public: - _BFSMatcher(_PatternCursor& __p, - _Results& __r, - const _AutomatonPtr& __automaton, - regex_constants::match_flag_type __flags) - : _Grep_matcher(__p, __r, __automaton, __flags) - { - if (_M_nfa->_M_start() != _S_invalid_state_id) - _M_current[_M_nfa->_M_start()] = _M_results._M_clone(); - _M_e_closure(); - } - - bool - _M_match() - { return _M_main_loop(); } - - bool - _M_search_from_first() - { return _M_main_loop(); } - - private: - template - bool - _M_main_loop(); - - void - _M_e_closure(); - - void - _M_move(); - - bool - _M_match_less_than(_StateIdT __u, _StateIdT __v) const; - - bool - _M_includes_some() const; - - std::map<_StateIdT, std::unique_ptr<_Results>> _M_current; - }; - - //@} regex-detail -_GLIBCXX_END_NAMESPACE_VERSION -} // namespace __detail -} // namespace std - -#include diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.tcc b/libstdc++-v3/include/bits/regex_grep_matcher.tcc deleted file mode 100644 index 8ea7998474d..00000000000 --- a/libstdc++-v3/include/bits/regex_grep_matcher.tcc +++ /dev/null @@ -1,243 +0,0 @@ -// class template regex -*- C++ -*- - -// Copyright (C) 2010-2013 Free Software Foundation, Inc. -// -// This file is part of the GNU ISO C++ Library. This library is free -// software; you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the -// Free Software Foundation; either version 3, or (at your option) -// any later version. - -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// Under Section 7 of GPL version 3, you are granted additional -// permissions described in the GCC Runtime Library Exception, version -// 3.1, as published by the Free Software Foundation. - -// You should have received a copy of the GNU General Public License and -// a copy of the GCC Runtime Library Exception along with this program; -// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -// . - -/** - * @file bits/regex_grep_matcher.tcc - * This is an internal header file, included by other library headers. - * Do not attempt to use it directly. @headername{regex} - */ - -#include - -namespace std _GLIBCXX_VISIBILITY(default) -{ -namespace __detail -{ -_GLIBCXX_BEGIN_NAMESPACE_VERSION - - // TODO: This is too slow. Try to compile the NFA to a DFA. - template - bool _DFSMatcher:: - _M_dfs(_StateIdT __i) - { - if (__i == _S_invalid_state_id) - // This is not that certain. Need deeper investigate. - return false; - const auto& __state = (*_M_nfa)[__i]; - bool __ret = false; - switch (__state._M_opcode) - { - case _S_opcode_alternative: - // Greedy mode by default. For non-greedy mode, - // swap _M_alt and _M_next. - // TODO: Add greedy mode option. - __ret = _M_dfs<__match_mode>(__state._M_alt) - || _M_dfs<__match_mode>(__state._M_next); - break; - case _S_opcode_subexpr_begin: - __state._M_tagger(_M_str_cur, _M_results); - __ret = _M_dfs<__match_mode>(__state._M_next); - break; - case _S_opcode_subexpr_end: - __state._M_tagger(_M_str_cur, _M_results); - __ret = _M_dfs<__match_mode>(__state._M_next); - _M_results._M_set_matched(__state._M_subexpr, __ret); - break; - case _S_opcode_match: - if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur)) - { - _M_str_cur._M_next(); - __ret = _M_dfs<__match_mode>(__state._M_next); - _M_str_cur._M_prev(); - } - break; - case _S_opcode_accept: - if (__match_mode) - __ret = _M_str_cur._M_at_end(); - else - __ret = true; - break; - default: - _GLIBCXX_DEBUG_ASSERT(false); - } - return __ret; - } - - template - bool _BFSMatcher:: - _M_main_loop() - { - while (!_M_str_cur._M_at_end()) - { - if (!__match_mode) - if (_M_includes_some()) - return true; - _M_move(); - _M_str_cur._M_next(); - _M_e_closure(); - } - return _M_includes_some(); - } - - // The SPFA approach. - // FIXME: move it to src/c++11 when it's stable, and make it not inlined. - inline - void _BFSMatcher:: - _M_e_closure() - { - std::queue<_StateIdT> __q; - std::vector __in_q(_M_nfa->size(), false); - for (auto& __it : _M_current) - { - __in_q[__it.first] = true; - __q.push(__it.first); - } - while (!__q.empty()) - { - auto __u = __q.front(); - __q.pop(); - __in_q[__u] = false; - const auto& __state = (*_M_nfa)[__u]; - - // Can be implemented using method, but there're too much arguments. - auto __add_visited_state = [&](_StateIdT __v) - { - if (__v == _S_invalid_state_id) - return; - if (_M_match_less_than(__u, __v)) - { - _M_current[__v] = _M_current[__u]->_M_clone(); - // if a state is updated, it's outgoing neighbors should be - // reconsidered too. Push them to the queue. - if (!__in_q[__v]) - { - __in_q[__v] = true; - __q.push(__v); - } - } - }; - - switch (__state._M_opcode) - { - case _S_opcode_alternative: - __add_visited_state(__state._M_next); - __add_visited_state(__state._M_alt); - break; - case _S_opcode_subexpr_begin: - __state._M_tagger(_M_str_cur, *_M_current[__u]); - __add_visited_state(__state._M_next); - break; - case _S_opcode_subexpr_end: - __state._M_tagger(_M_str_cur, *_M_current[__u]); - _M_current[__u]->_M_set_matched(__state._M_subexpr, true); - __add_visited_state(__state._M_next); - break; - case _S_opcode_match: - break; - case _S_opcode_accept: - __add_visited_state(__state._M_next); - break; - default: - _GLIBCXX_DEBUG_ASSERT(false); - } - } - } - - // FIXME: move it to src/c++11 when it's stable, and make it not inlined. - inline - void _BFSMatcher:: - _M_move() - { - decltype(_M_current) __next; - for (auto& __it : _M_current) - { - const auto& __state = (*_M_nfa)[__it.first]; - if (__state._M_opcode == _S_opcode_match - && __state._M_matches(_M_str_cur)) - if (_M_match_less_than(__it.first, __state._M_next) - && __state._M_next != _S_invalid_state_id) - __next[__state._M_next] = __it.second->_M_clone(); - } - _M_current = move(__next); - } - - // FIXME: move it to src/c++11 when it's stable, and make it not inlined. - inline - bool _BFSMatcher:: - _M_match_less_than(_StateIdT __u, _StateIdT __v) const - { - if (_M_current.count(__u) == 0) - return false; - if (_M_current.count(__v) > 0) - return true; - // TODO: Greedy and Non-greedy support - return true; - } - - // FIXME: move it to src/c++11 when it's stable, and make it not inlined. - inline - bool _BFSMatcher:: - _M_includes_some() const - { - auto& __s = _M_nfa->_M_final_states(); - auto& __t = _M_current; - if (__s.size() > 0 && __t.size() > 0) - { - auto __first = __s.begin(); - auto __second = __t.begin(); - while (__first != __s.end() && __second != __t.end()) - { - if (*__first < __second->first) - ++__first; - else if (__second->first < *__first) - ++__second; - else - { - _M_results._M_assign(*__second->second); - return true; - } - } - } - return false; - } - - // FIXME: move it to src/c++11 when it's stable, and make it not inlined. - inline - std::unique_ptr<_Grep_matcher> _Nfa:: - _M_get_matcher(_PatternCursor& __p, - _Results& __r, - const _AutomatonPtr& __a, - regex_constants::match_flag_type __flags) - { - if (_M_has_back_ref) - return unique_ptr<_Grep_matcher>( - new _DFSMatcher(__p, __r, __a, __flags)); - else - return unique_ptr<_Grep_matcher>( - new _BFSMatcher(__p, __r, __a, __flags)); - } - -_GLIBCXX_END_NAMESPACE_VERSION -} // namespace __detail -} // namespace diff --git a/libstdc++-v3/include/bits/regex_nfa.h b/libstdc++-v3/include/bits/regex_nfa.h deleted file mode 100644 index b4ac452fbaa..00000000000 --- a/libstdc++-v3/include/bits/regex_nfa.h +++ /dev/null @@ -1,491 +0,0 @@ -// class template regex -*- C++ -*- - -// Copyright (C) 2010-2013 Free Software Foundation, Inc. -// -// This file is part of the GNU ISO C++ Library. This library is free -// software; you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the -// Free Software Foundation; either version 3, or (at your option) -// any later version. - -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// Under Section 7 of GPL version 3, you are granted additional -// permissions described in the GCC Runtime Library Exception, version -// 3.1, as published by the Free Software Foundation. - -// You should have received a copy of the GNU General Public License and -// a copy of the GCC Runtime Library Exception along with this program; -// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -// . - -/** - * @file bits/regex_nfa.h - * This is an internal header file, included by other library headers. - * Do not attempt to use it directly. @headername{regex} - */ - -namespace std _GLIBCXX_VISIBILITY(default) -{ -namespace __detail -{ -_GLIBCXX_BEGIN_NAMESPACE_VERSION - - /** - * @addtogroup regex-detail - * @{ - */ - - /// Provides a generic facade for a templated match_results. - struct _Results - { - virtual - ~_Results() - { } - virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0; - virtual void _M_set_matched(int __i, bool __is_matched) = 0; - virtual std::unique_ptr<_Results> _M_clone() const = 0; - virtual void _M_assign(const _Results& __rhs) = 0; - }; - - class _Grep_matcher; - class _Automaton; - - /// Generic shared pointer to an automaton. - typedef std::shared_ptr<_Automaton> _AutomatonPtr; - - /// Base class for, um, automata. Could be an NFA or a DFA. Your choice. - class _Automaton - { - public: - typedef unsigned int _SizeT; - - public: - virtual - ~_Automaton() { } - - virtual _SizeT - _M_sub_count() const = 0; - - virtual std::unique_ptr<_Grep_matcher> - _M_get_matcher(_PatternCursor& __p, - _Results& __r, - const _AutomatonPtr& __automaton, - regex_constants::match_flag_type __flags) = 0; - -#ifdef _GLIBCXX_DEBUG - virtual std::ostream& - _M_dot(std::ostream& __ostr) const = 0; -#endif - }; - - /// Operation codes that define the type of transitions within the base NFA - /// that represents the regular expression. - enum _Opcode - { - _S_opcode_unknown = 0, - _S_opcode_alternative = 1, - _S_opcode_subexpr_begin = 4, - _S_opcode_subexpr_end = 5, - _S_opcode_match = 100, - _S_opcode_accept = 255 - }; - - /// Tags current state (for subexpr begin/end). - typedef std::function _Tagger; - - /// Start state tag. - template - struct _StartTagger - { - explicit - _StartTagger(int __i) - : _M_index(__i) - { } - - void - operator()(const _PatternCursor& __pc, _Results& __r) - { __r._M_set_pos(_M_index, 0, __pc); } - - int _M_index; - }; - - /// End state tag. - template - struct _EndTagger - { - explicit - _EndTagger(int __i) - : _M_index(__i) - { } - - void - operator()(const _PatternCursor& __pc, _Results& __r) - { __r._M_set_pos(_M_index, 1, __pc); } - - int _M_index; - }; - - // TODO For now we use an all-in-one comparator. In the future there may be - // optimizations based on regex_traits::translate and regex_transform. - template - struct _Comparator - { - typedef regex_constants::syntax_option_type _FlagT; - typedef typename _TraitsT::char_type _CharT; - typedef std::basic_string<_CharT> _StringT; - - _Comparator(_FlagT __flags, const _TraitsT& __traits) - : _M_flags(__flags), _M_traits(__traits) - { } - - bool - _M_equ(_CharT __a, _CharT __b) const; - - bool - _M_le(_CharT __a, _CharT __b) const; - - _FlagT _M_flags; - _TraitsT _M_traits; - }; - - /// Indicates if current state matches cursor current. - typedef std::function _Matcher; - - /// Matches any character - inline bool - _AnyMatcher(const _PatternCursor&) - { return true; } - - /// Matches a single character - template - struct _CharMatcher - : public _Comparator<_InIterT, _TraitsT> - { - typedef _Comparator<_InIterT, _TraitsT> _BaseT; - typedef typename _TraitsT::char_type _CharT; - typedef regex_constants::syntax_option_type _FlagT; - - explicit - _CharMatcher(_CharT __c, _FlagT __flags, const _TraitsT& __t) - : _BaseT(__flags, __t), _M_c(__c) - { } - - bool - operator()(const _PatternCursor& __pc) const - { - typedef const _SpecializedCursor<_InIterT>& _CursorT; - _CursorT __c = static_cast<_CursorT>(__pc); - return this->_M_equ(__c._M_current(), _M_c); - } - - _CharT _M_c; - }; - - /// Matches a character range (bracket expression) - template - struct _BracketMatcher - : public _Comparator<_InIterT, _TraitsT> - { - typedef _Comparator<_InIterT, _TraitsT> _BaseT; - typedef typename _TraitsT::char_class_type _CharClassT; - typedef regex_constants::syntax_option_type _FlagT; - typedef typename _TraitsT::char_type _CharT; - typedef std::basic_string<_CharT> _StringT; - - explicit - _BracketMatcher(bool __is_non_matching, - _FlagT __flags, - const _TraitsT& __t) - : _BaseT(__flags, __t), _M_flags(__flags), _M_traits(__t), - _M_is_non_matching(__is_non_matching), _M_class_set(0) - { } - - bool - operator()(const _PatternCursor& __pc) const; - - void - _M_add_char(_CharT __c) - { _M_char_set.push_back(__c); } - - void - _M_add_collating_element(const _StringT& __s) - { - auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end()); - if (__st.empty()) - __throw_regex_error(regex_constants::error_collate); - // TODO: digraph - _M_char_set.push_back(__st[0]); - } - - void - _M_add_equivalence_class(const _StringT& __s) - { - _M_add_character_class( - _M_traits.transform_primary(&*__s.begin(), &*__s.end())); - } - - void - _M_add_character_class(const _StringT& __s) - { - auto __st = _M_traits.lookup_classname( - &*__s.begin(), &*__s.end(), (_M_flags & regex_constants::icase)); - if (__st == 0) - __throw_regex_error(regex_constants::error_ctype); - _M_class_set |= __st; - } - - void - _M_make_range(_CharT __l, _CharT __r) - { - if (!this->_M_le(__l, __r)) - __throw_regex_error(regex_constants::error_range); - _M_range_set.push_back(make_pair(__l, __r)); - } - - _FlagT _M_flags; - _TraitsT _M_traits; - bool _M_is_non_matching; - std::vector<_CharT> _M_char_set; - std::vector> _M_range_set; - _CharClassT _M_class_set; - }; - - /// Identifies a state in the NFA. - typedef int _StateIdT; - - /// The special case in which a state identifier is not an index. - static const _StateIdT _S_invalid_state_id = -1; - - - /** - * @brief struct _State - * - * An individual state in an NFA - * - * In this case a "state" is an entry in the NFA definition coupled - * with its outgoing transition(s). All states have a single outgoing - * transition, except for accepting states (which have no outgoing - * transitions) and alt states, which have two outgoing transitions. - */ - struct _State - { - typedef int _OpcodeT; - - _OpcodeT _M_opcode; // type of outgoing transition - _StateIdT _M_next; // outgoing transition - _StateIdT _M_alt; // for _S_opcode_alternative - unsigned int _M_subexpr; // for _S_opcode_subexpr_* - _Tagger _M_tagger; // for _S_opcode_subexpr_* - _Matcher _M_matches; // for _S_opcode_match - - explicit _State(_OpcodeT __opcode) - : _M_opcode(__opcode), _M_next(_S_invalid_state_id) - { } - - _State(const _Matcher& __m) - : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m) - { } - - _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t) - : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s), - _M_tagger(__t) - { } - - _State(_StateIdT __next, _StateIdT __alt) - : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt) - { } - -#ifdef _GLIBCXX_DEBUG - std::ostream& - _M_print(std::ostream& ostr) const; - - // Prints graphviz dot commands for state. - std::ostream& - _M_dot(std::ostream& __ostr, _StateIdT __id) const; -#endif - }; - - - /// The Grep Matcher works on sets of states. Here are sets of states. - typedef std::set<_StateIdT> _StateSet; - - /** - * @brief struct _Nfa - * - * A collection of all states making up an NFA. - * - * An NFA is a 4-tuple M = (K, S, s, F), where - * K is a finite set of states, - * S is the alphabet of the NFA, - * s is the initial state, - * F is a set of final (accepting) states. - * - * This NFA class is templated on S, a type that will hold values of the - * underlying alphabet (without regard to semantics of that alphabet). The - * other elements of the tuple are generated during construction of the NFA - * and are available through accessor member functions. - */ - class _Nfa - : public _Automaton, public std::vector<_State> - { - public: - typedef _State _StateT; - typedef unsigned int _SizeT; - typedef regex_constants::syntax_option_type _FlagT; - - _Nfa(_FlagT __f) - : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), - // TODO: BFS by default. Your choice. Need to be set by the compiler. - _M_has_back_ref(false) - { } - - ~_Nfa() - { } - - _FlagT - _M_options() const - { return _M_flags; } - - _StateIdT - _M_start() const - { return _M_start_state; } - - const _StateSet& - _M_final_states() const - { return _M_accepting_states; } - - _SizeT - _M_sub_count() const - { return _M_subexpr_count; } - - _StateIdT - _M_insert_accept() - { - this->push_back(_StateT(_S_opcode_accept)); - _M_accepting_states.insert(this->size()-1); - return this->size()-1; - } - - _StateIdT - _M_insert_alt(_StateIdT __next, _StateIdT __alt) - { - this->push_back(_StateT(__next, __alt)); - return this->size()-1; - } - - _StateIdT - _M_insert_matcher(_Matcher __m) - { - this->push_back(_StateT(__m)); - return this->size()-1; - } - - _StateIdT - _M_insert_subexpr_begin(const _Tagger& __t) - { - this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++, - __t)); - return this->size()-1; - } - - _StateIdT - _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t) - { - this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t)); - return this->size()-1; - } - - void - _M_set_back_ref(bool __b) - { _M_has_back_ref = __b; } - - std::unique_ptr<_Grep_matcher> - _M_get_matcher(_PatternCursor& __p, - _Results& __r, - const _AutomatonPtr& __automaton, - regex_constants::match_flag_type __flags); - -#ifdef _GLIBCXX_DEBUG - std::ostream& - _M_dot(std::ostream& __ostr) const; -#endif - - private: - _FlagT _M_flags; - _StateIdT _M_start_state; - _StateSet _M_accepting_states; - _SizeT _M_subexpr_count; - bool _M_has_back_ref; - }; - - /// Describes a sequence of one or more %_State, its current start - /// and end(s). This structure contains fragments of an NFA during - /// construction. - class _StateSeq - { - public: - // Constructs a single-node sequence - _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id) - : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e) - { } - // Constructs a split sequence from two other sequencces - _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2) - : _M_nfa(__e1._M_nfa), - _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)), - _M_end1(__e1._M_end1), _M_end2(__e2._M_end1) - { } - - // Constructs a split sequence from a single sequence - _StateSeq(const _StateSeq& __e, _StateIdT __id) - : _M_nfa(__e._M_nfa), - _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)), - _M_end1(__id), _M_end2(__e._M_end1) - { } - - // Constructs a copy of a %_StateSeq - _StateSeq(const _StateSeq& __rhs) - : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start), - _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2) - { } - - - _StateSeq& operator=(const _StateSeq& __rhs); - - _StateIdT - _M_front() const - { return _M_start; } - - // Extends a sequence by one. - void - _M_push_back(_StateIdT __id); - - // Extends and maybe joins a sequence. - void - _M_append(_StateIdT __id); - - void - _M_append(_StateSeq& __rhs); - - // Clones an entire sequence. - _StateIdT - _M_clone(); - - private: - _Nfa& _M_nfa; - _StateIdT _M_start; - _StateIdT _M_end1; - _StateIdT _M_end2; - - }; - - //@} regex-detail -_GLIBCXX_END_NAMESPACE_VERSION -} // namespace __detail -} // namespace std - -#include - diff --git a/libstdc++-v3/include/bits/regex_nfa.tcc b/libstdc++-v3/include/bits/regex_nfa.tcc deleted file mode 100644 index 12f6fe825a5..00000000000 --- a/libstdc++-v3/include/bits/regex_nfa.tcc +++ /dev/null @@ -1,232 +0,0 @@ -// class template regex -*- C++ -*- - -// Copyright (C) 2010-2013 Free Software Foundation, Inc. -// -// This file is part of the GNU ISO C++ Library. This library is free -// software; you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the -// Free Software Foundation; either version 3, or (at your option) -// any later version. - -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// Under Section 7 of GPL version 3, you are granted additional -// permissions described in the GCC Runtime Library Exception, version -// 3.1, as published by the Free Software Foundation. - -// You should have received a copy of the GNU General Public License and -// a copy of the GCC Runtime Library Exception along with this program; -// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -// . - -/** - * @file bits/regex_nfa.tcc - * This is an internal header file, included by other library headers. - * Do not attempt to use it directly. @headername{regex} - */ -#include - -namespace std _GLIBCXX_VISIBILITY(default) -{ -namespace __detail -{ -_GLIBCXX_BEGIN_NAMESPACE_VERSION - - template - bool _BracketMatcher<_InIterT, _TraitsT>:: - operator()(const _PatternCursor& __pc) const - { - typedef const _SpecializedCursor<_InIterT>& _CursorT; - _CursorT __c = static_cast<_CursorT>(__pc); - _CharT __ch = __c._M_current(); - bool __ret = false; - for (auto __c : _M_char_set) - if (this->_M_equ(__c, __ch)) - { - __ret = true; - break; - } - if (!__ret && _M_traits.isctype(__ch, _M_class_set)) - __ret = true; - else - { - for (auto& __it : _M_range_set) - if (this->_M_le(__it.first, __ch) && this->_M_le(__ch, __it.second)) - { - __ret = true; - break; - } - } - if (_M_is_non_matching) - __ret = !__ret; - return __ret; - } - - template - bool _Comparator<_InIterT, _TraitsT>:: - _M_equ(_CharT __a, _CharT __b) const - { - if (_M_flags & regex_constants::icase) - return _M_traits.translate_nocase(__a) - == _M_traits.translate_nocase(__b); - if (_M_flags & regex_constants::collate) - return _M_traits.translate(__a) == _M_traits.translate(__b); - return __a == __b; - } - - template - bool _Comparator<_InIterT, _TraitsT>:: - _M_le(_CharT __a, _CharT __b) const - { - _StringT __str1 = _StringT(1, - _M_flags & regex_constants::icase - ? _M_traits.translate_nocase(__a) - : _M_traits.translate(__a)); - _StringT __str2 = _StringT(1, - _M_flags & regex_constants::icase - ? _M_traits.translate_nocase(__b) - : _M_traits.translate(__b)); - return _M_traits.transform(__str1.begin(), __str1.end()) - <= _M_traits.transform(__str2.begin(), __str2.end()); - } - -#ifdef _GLIBCXX_DEBUG -inline std::ostream& _State:: -_M_print(std::ostream& ostr) const -{ - switch (_M_opcode) - { - case _S_opcode_alternative: - ostr << "alt next=" << _M_next << " alt=" << _M_alt; - break; - case _S_opcode_subexpr_begin: - ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr; - break; - case _S_opcode_subexpr_end: - ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr; - break; - case _S_opcode_match: - ostr << "match next=" << _M_next; - break; - case _S_opcode_accept: - ostr << "accept next=" << _M_next; - break; - default: - ostr << "unknown next=" << _M_next; - break; - } - return ostr; -} - -// Prints graphviz dot commands for state. -inline std::ostream& _State:: -_M_dot(std::ostream& __ostr, _StateIdT __id) const -{ - switch (_M_opcode) - { - case _S_opcode_alternative: - __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n" - << __id << " -> " << _M_next - << " [label=\"epsilon\", tailport=\"s\"];\n" - << __id << " -> " << _M_alt - << " [label=\"epsilon\", tailport=\"n\"];\n"; - break; - case _S_opcode_subexpr_begin: - __ostr << __id << " [label=\"" << __id << "\\nSBEGIN " - << _M_subexpr << "\"];\n" - << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; - break; - case _S_opcode_subexpr_end: - __ostr << __id << " [label=\"" << __id << "\\nSEND " - << _M_subexpr << "\"];\n" - << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; - break; - case _S_opcode_match: - __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n" - << __id << " -> " << _M_next << " [label=\"\"];\n"; - break; - case _S_opcode_accept: - __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ; - break; - default: - __ostr << __id << " [label=\"" << __id << "\\nUNK\"];\n" - << __id << " -> " << _M_next << " [label=\"?\"];\n"; - break; - } - return __ostr; -} - -inline std::ostream& _Nfa:: -_M_dot(std::ostream& __ostr) const -{ - __ostr << "digraph _Nfa {\n" - << " rankdir=LR;\n"; - for (unsigned int __i = 0; __i < this->size(); ++__i) - { this->at(__i)._M_dot(__ostr, __i); } - __ostr << "}\n"; - return __ostr; -} -#endif - -inline _StateSeq& _StateSeq:: -operator=(const _StateSeq& __rhs) -{ - _M_start = __rhs._M_start; - _M_end1 = __rhs._M_end1; - _M_end2 = __rhs._M_end2; - return *this; -} - -inline void _StateSeq:: -_M_push_back(_StateIdT __id) -{ - if (_M_end1 != _S_invalid_state_id) - _M_nfa[_M_end1]._M_next = __id; - _M_end1 = __id; -} - -inline void _StateSeq:: -_M_append(_StateIdT __id) -{ - if (_M_end2 != _S_invalid_state_id) - { - if (_M_end2 == _M_end1) - _M_nfa[_M_end2]._M_alt = __id; - else - _M_nfa[_M_end2]._M_next = __id; - _M_end2 = _S_invalid_state_id; - } - if (_M_end1 != _S_invalid_state_id) - _M_nfa[_M_end1]._M_next = __id; - _M_end1 = __id; -} - -inline void _StateSeq:: -_M_append(_StateSeq& __rhs) -{ - if (_M_end2 != _S_invalid_state_id) - { - if (_M_end2 == _M_end1) - _M_nfa[_M_end2]._M_alt = __rhs._M_start; - else - _M_nfa[_M_end2]._M_next = __rhs._M_start; - _M_end2 = _S_invalid_state_id; - } - if (__rhs._M_end2 != _S_invalid_state_id) - _M_end2 = __rhs._M_end2; - if (_M_end1 != _S_invalid_state_id) - _M_nfa[_M_end1]._M_next = __rhs._M_start; - _M_end1 = __rhs._M_end1; -} - -// @todo implement this function. -inline _StateIdT _StateSeq:: -_M_clone() -{ return 0; } - -_GLIBCXX_END_NAMESPACE_VERSION -} // namespace __detail -} // namespace diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex index b0918ed4afe..ac9a2a85b9b 100644 --- a/libstdc++-v3/include/std/regex +++ b/libstdc++-v3/include/std/regex @@ -54,13 +54,11 @@ #include #include -#include #include #include -#include -#include +#include #include -#include +#include #include #endif // C++11 diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc index 86fab85a434..cb502eadfb4 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc @@ -38,12 +38,10 @@ template_M_sub_count(); - __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e); - __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); - VERIFY( dynamic_cast<__detail::_DFSMatcher *>( - &*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr ); + VERIFY( (dynamic_cast + <__detail::_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*> + (&*__detail::__get_executor(__s, __e, __m, __re, __flags)) + != nullptr) ); } void