Reimplment matcher using Depth-first search(backtracking).
[gcc.git] / libstdc++-v3 / include / bits / regex_grep_matcher.tcc
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex_grep_matcher.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 #include <regex>
32
33 namespace std _GLIBCXX_VISIBILITY(default)
34 {
35 namespace
36 {
37 // A stack of states used in evaluating the NFA.
38 typedef std::stack<std::__detail::_StateIdT,
39 std::vector<std::__detail::_StateIdT>
40 > _StateStack;
41
42 // Obtains the next state set given the current state set __s and the current
43 // input character.
44 inline std::__detail::_StateSet
45 __move(const std::__detail::_PatternCursor& __p,
46 const std::__detail::_Nfa& __nfa,
47 const std::__detail::_StateSet& __s)
48 {
49 std::__detail::_StateSet __m;
50 for (std::__detail::_StateSet::const_iterator __i = __s.begin();
51 __i != __s.end(); ++__i)
52 {
53 if (*__i == std::__detail::_S_invalid_state_id)
54 continue;
55
56 const std::__detail::_State& __state = __nfa[*__i];
57 if (__state._M_opcode == std::__detail::_S_opcode_match
58 && __state._M_matches(__p))
59 __m.insert(__state._M_next);
60 }
61 return __m;
62 }
63
64 // returns true if (__s intersect __t) is not empty
65 inline bool
66 __includes_some(const std::__detail::_StateSet& __s,
67 const std::__detail::_StateSet& __t)
68 {
69 if (__s.size() > 0 && __t.size() > 0)
70 {
71 std::__detail::_StateSet::const_iterator __first = __s.begin();
72 std::__detail::_StateSet::const_iterator __second = __t.begin();
73 while (__first != __s.end() && __second != __t.end())
74 {
75 if (*__first < *__second)
76 ++__first;
77 else if (*__second < *__first)
78 ++__second;
79 else
80 return true;
81 }
82 }
83 return false;
84 }
85
86 // If an identified state __u is not already in the current state set __e,
87 // insert it and push it on the current state stack __s.
88 inline void
89 __add_visited_state(const std::__detail::_StateIdT __u,
90 _StateStack& __s,
91 std::__detail::_StateSet& __e)
92 {
93 if (__e.count(__u) == 0)
94 {
95 __e.insert(__u);
96 __s.push(__u);
97 }
98 }
99
100 } // anonymous namespace
101
102 namespace __detail
103 {
104 _GLIBCXX_BEGIN_NAMESPACE_VERSION
105
106 // _M_dfs() take a state, along with current string cursor(_M_pattern),
107 // trying to match current state with current character.
108 // Only _S_opcode_match will consume a character.
109 // TODO: This is too slow. Try to compile the NFA to a DFA.
110 template<bool __match_mode>
111 bool _Grep_matcher::
112 _M_dfs(_StateIdT __i)
113 {
114 if (__i == _S_invalid_state_id)
115 // This is not that certain. Need deeper investigate.
116 return false;
117 const auto& __state = (*_M_nfa)[__i];
118 bool __ret = false;
119 switch (__state._M_opcode)
120 {
121 case _S_opcode_alternative:
122 // Greedy mode by default. For non-greedy mode,
123 // swap _M_alt and _M_next.
124 __ret = _M_dfs<__match_mode>(__state._M_alt)
125 || _M_dfs<__match_mode>(__state._M_next);
126 break;
127 case _S_opcode_subexpr_begin:
128 __state._M_tagger(_M_pattern, _M_results);
129 __ret = _M_dfs<__match_mode>(__state._M_next);
130 break;
131 case _S_opcode_subexpr_end:
132 __state._M_tagger(_M_pattern, _M_results);
133 __ret = _M_dfs<__match_mode>(__state._M_next);
134 _M_results._M_set_matched(__state._M_subexpr, __ret);
135 break;
136 case _S_opcode_match:
137 if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern))
138 {
139 _M_pattern._M_next();
140 __ret = _M_dfs<__match_mode>(__state._M_next);
141 _M_pattern._M_prev();
142 }
143 break;
144 case _S_opcode_accept:
145 if (__match_mode)
146 __ret = _M_pattern._M_at_end();
147 else
148 __ret = true;
149 break;
150 default:
151 _GLIBCXX_DEBUG_ASSERT( false );
152 }
153 return __ret;
154 }
155
156 inline void _Grep_matcher::
157 _M_match()
158 {
159 __detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
160 for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
161 __t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
162
163 _M_results._M_set_matched(0,
164 __includes_some(_M_nfa->_M_final_states(), __t));
165 }
166
167 inline void _Grep_matcher::
168 _M_search_from_first()
169 {
170 __detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
171 for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
172 {
173 if (__includes_some(_M_nfa->_M_final_states(), __t)) // KISS
174 {
175 _M_results._M_set_matched(0, true);
176 return;
177 }
178 __t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
179 }
180 _M_results._M_set_matched(0, false);
181 }
182
183 // Creates the e-closure set for the initial state __i.
184 inline _StateSet _Grep_matcher::
185 _M_e_closure(_StateIdT __i)
186 {
187 _StateSet __s;
188 __s.insert(__i);
189 _StateStack __stack;
190 __stack.push(__i);
191 return this->_M_e_closure(__stack, __s);
192 }
193
194 // Creates the e-closure set for an arbitrary state set __s.
195 inline _StateSet _Grep_matcher::
196 _M_e_closure(const _StateSet& __s)
197 {
198 _StateStack __stack;
199 for (_StateSet::const_iterator __i = __s.begin(); __i != __s.end(); ++__i)
200 __stack.push(*__i);
201 return this->_M_e_closure(__stack, __s);
202 }
203
204 inline _StateSet _Grep_matcher::
205 _M_e_closure(_StateStack& __stack, const _StateSet& __s)
206 {
207 _StateSet __e = __s;
208 while (!__stack.empty())
209 {
210 _StateIdT __t = __stack.top(); __stack.pop();
211 if (__t == _S_invalid_state_id)
212 continue;
213 // for each __u with edge from __t to __u labeled e do ...
214 const _State& __state = _M_nfa->operator[](__t);
215 switch (__state._M_opcode)
216 {
217 case _S_opcode_alternative:
218 __add_visited_state(__state._M_next, __stack, __e);
219 __add_visited_state(__state._M_alt, __stack, __e);
220 break;
221 case _S_opcode_subexpr_begin:
222 __add_visited_state(__state._M_next, __stack, __e);
223 __state._M_tagger(_M_pattern, _M_results);
224 break;
225 case _S_opcode_subexpr_end:
226 __add_visited_state(__state._M_next, __stack, __e);
227 __state._M_tagger(_M_pattern, _M_results);
228 _M_results._M_set_matched(__state._M_subexpr, true);
229 break;
230 case _S_opcode_accept:
231 __add_visited_state(__state._M_next, __stack, __e);
232 break;
233 default:
234 break;
235 }
236 }
237 return __e;
238 }
239
240 _GLIBCXX_END_NAMESPACE_VERSION
241 } // namespace __detail
242 } // namespace