From 37c74e2816535f654390781de97cc01206b0ff31 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Wed, 24 Jul 2013 14:39:54 +0000 Subject: [PATCH] Reimplment matcher using Depth-first search(backtracking). 2013-07-24 Tim Shen Reimplment matcher using Depth-first search(backtracking). PR libstdc++/53622 PR libstdc++/57173 * include/bits/regex.h: regex_match() and regex_search(). * include/bits/regex_cursor.h: Fix _M_set_pos(). * include/bits/regex_grep_matcher.h: add _M_dfs_match(). * include/bits/regex_grep_matcher.tcc: Implement it. * testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc: New. * testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc: New. * testsuite/28_regex/algorithms/regex_search/basic/string_01.cc: Remove xfail. From-SVN: r201213 --- libstdc++-v3/ChangeLog | 16 ++++++ libstdc++-v3/include/bits/regex.h | 6 +-- libstdc++-v3/include/bits/regex_cursor.h | 5 ++ .../include/bits/regex_grep_matcher.h | 20 +++++-- .../include/bits/regex_grep_matcher.tcc | 50 ++++++++++++++++++ .../algorithms/regex_match/extended/53622.cc | 52 +++++++++++++++++++ .../algorithms/regex_match/extended/57173.cc | 50 ++++++++++++++++++ .../regex_search/basic/string_01.cc | 3 +- 8 files changed, 193 insertions(+), 9 deletions(-) create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index ab21abefed5..45dc9452afb 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,19 @@ +2013-07-24 Tim Shen + + Reimplment matcher using Depth-first search(backtracking). + PR libstdc++/53622 + PR libstdc++/57173 + * include/bits/regex.h: regex_match() and regex_search(). + * include/bits/regex_cursor.h: Fix _M_set_pos(). + * include/bits/regex_grep_matcher.h: add _M_dfs_match(). + * include/bits/regex_grep_matcher.tcc: Implement it. + * testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc: + New. + * testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc: + New. + * testsuite/28_regex/algorithms/regex_search/basic/string_01.cc: + Remove xfail. + 2013-07-23 Tim Shen Implement regex_iterator and regex_token_iterator. diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index 9848f717fc6..64a1807033b 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -2185,8 +2185,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e); __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); __detail::_Grep_matcher __matcher(__cs, __r, __a, __flags); - __matcher._M_match(); - return __m[0].matched; + return __matcher._M_dfs_match(); } /** @@ -2338,8 +2337,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last); __detail::_Grep_matcher __matcher(__curs, __r, __a, __flags); - __matcher._M_search_from_first(); - if (__m[0].matched) + if (__matcher._M_dfs_search_from_first()) { __r._M_set_range(__m.size(), __detail::_SpecializedCursor<_Bi_iter> diff --git a/libstdc++-v3/include/bits/regex_cursor.h b/libstdc++-v3/include/bits/regex_cursor.h index 44e9d1d9239..444d07ae263 100644 --- a/libstdc++-v3/include/bits/regex_cursor.h +++ b/libstdc++-v3/include/bits/regex_cursor.h @@ -45,6 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { virtual ~_PatternCursor() { }; virtual void _M_next() = 0; + virtual void _M_prev() = 0; virtual bool _M_at_end() const = 0; }; @@ -66,6 +67,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_next() { ++_M_c; } + void + _M_prev() + { --_M_c; } + _FwdIterT _M_pos() const { return _M_c; } diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.h b/libstdc++-v3/include/bits/regex_grep_matcher.h index da9264d5c17..c402c780f1c 100644 --- a/libstdc++-v3/include/bits/regex_grep_matcher.h +++ b/libstdc++-v3/include/bits/regex_grep_matcher.h @@ -107,7 +107,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__j == 0) _M_results.at(__i).first = __c._M_pos(); else - _M_results.at(__i).second = __c._M_pos()+1; + _M_results.at(__i).second = __c._M_pos(); } /// A stack of states used in evaluating the NFA. @@ -127,9 +127,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_pattern(__p), _M_results(__r) { } - void _M_match(); + void + _M_match(); + + void + _M_search_from_first(); - void _M_search_from_first(); + bool + _M_dfs_match() + { return _M_dfs(_M_nfa->_M_start()); } + + bool + _M_dfs_search_from_first() + { return _M_dfs(_M_nfa->_M_start()); } private: _StateSet @@ -141,6 +151,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StateSet _M_e_closure(_StateStack& __stack, const _StateSet& __s); + template + bool + _M_dfs(_StateIdT __i); + const std::shared_ptr<_Nfa> _M_nfa; _PatternCursor& _M_pattern; _Results& _M_results; diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.tcc b/libstdc++-v3/include/bits/regex_grep_matcher.tcc index 2d4a9a60faa..46b98174888 100644 --- a/libstdc++-v3/include/bits/regex_grep_matcher.tcc +++ b/libstdc++-v3/include/bits/regex_grep_matcher.tcc @@ -103,6 +103,56 @@ namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION + // _M_dfs() take a state, along with current string cursor(_M_pattern), + // trying to match current state with current character. + // Only _S_opcode_match will consume a character. + // TODO: This is too slow. Try to compile the NFA to a DFA. + template + bool _Grep_matcher:: + _M_dfs(_StateIdT __i) + { + if (__i == _S_invalid_state_id) + // This is not that certain. Need deeper investigate. + return false; + const auto& __state = (*_M_nfa)[__i]; + bool __ret = false; + switch (__state._M_opcode) + { + case _S_opcode_alternative: + // Greedy mode by default. For non-greedy mode, + // swap _M_alt and _M_next. + __ret = _M_dfs<__match_mode>(__state._M_alt) + || _M_dfs<__match_mode>(__state._M_next); + break; + case _S_opcode_subexpr_begin: + __state._M_tagger(_M_pattern, _M_results); + __ret = _M_dfs<__match_mode>(__state._M_next); + break; + case _S_opcode_subexpr_end: + __state._M_tagger(_M_pattern, _M_results); + __ret = _M_dfs<__match_mode>(__state._M_next); + _M_results._M_set_matched(__state._M_subexpr, __ret); + break; + case _S_opcode_match: + if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern)) + { + _M_pattern._M_next(); + __ret = _M_dfs<__match_mode>(__state._M_next); + _M_pattern._M_prev(); + } + break; + case _S_opcode_accept: + if (__match_mode) + __ret = _M_pattern._M_at_end(); + else + __ret = true; + break; + default: + _GLIBCXX_DEBUG_ASSERT( false ); + } + return __ret; + } + inline void _Grep_matcher:: _M_match() { diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc new file mode 100644 index 00000000000..383ed054a90 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc @@ -0,0 +1,52 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-07-23 Tim Shen +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// 28.11.2 regex_match +// Tests Extended grouping against a std::string target. + +#include +#include + +// libstdc++/53622 +void +test01() +{ + bool test __attribute__((unused)) = true; + + std::regex re("zxcv/(one.*)abc", std::regex::extended); + std::string target("zxcv/onetwoabc"); + std::smatch m; + + VERIFY( std::regex_search(target, m, re) ); + VERIFY( m.size() == 2 ); + VERIFY( m[0].matched == true ); + VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); + VERIFY( m[1].matched == true ); + VERIFY( std::string(m[1].first, m[1].second) == "onetwo" ); +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc new file mode 100644 index 00000000000..3031c43d188 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc @@ -0,0 +1,50 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-07-23 Tim Shen +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// 28.11.3 regex_search +// Tests Extended against a std::string target. + +#include +#include +#include + +// libstdc++/57173 +void +test01() +{ + bool test __attribute__((unused)) = true; + + std::regex re("/asdf(/.*)", std::regex::extended); + std::string target("/asdf/qwerty"); + std::smatch m; + + VERIFY( std::regex_match(target, m, re) ); + VERIFY( m.size() == 2 ); + VERIFY( std::string(m[1].first, m[1].second) == "/qwerty"); +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc index f2a7f2104c1..ee487f19836 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc @@ -1,5 +1,4 @@ // { dg-options "-std=gnu++11" } -// { dg-do run { xfail *-*-* } } // // 2013-07-17 Tim Shen @@ -32,7 +31,7 @@ test01() { bool test __attribute__((unused)) = true; - std::regex re("as(df)", std::regex::basic); + std::regex re("as\\(df\\)", std::regex::basic); std::string target("xxasdfyy"); std::smatch m; -- 2.30.2