1 // class template regex -*- C++ -*-
3 // Copyright (C) 2013 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_executor.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 // FIXME convert comments to doxygen format.
33 // TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
34 // much more similar. Also, make grouping seperated. The
35 // regex_constants::nosubs enables much more simpler execution.
37 namespace std
_GLIBCXX_VISIBILITY(default)
39 _GLIBCXX_BEGIN_NAMESPACE_VERSION
40 template<typename
, typename
>
46 template<typename
, typename
>
48 _GLIBCXX_END_NAMESPACE_VERSION
52 _GLIBCXX_BEGIN_NAMESPACE_VERSION
55 * @addtogroup regex-detail
59 template<typename _BiIter
, typename _Alloc
,
60 typename _CharT
, typename _TraitsT
>
64 typedef basic_regex
<_CharT
, _TraitsT
> _RegexT
;
65 typedef std::vector
<sub_match
<_BiIter
>, _Alloc
> _ResultsVec
;
66 typedef regex_constants::match_flag_type _FlagT
;
67 typedef typename
_TraitsT::char_class_type _ClassT
;
70 _Executor(_BiIter __begin
,
72 _ResultsVec
& __results
,
78 _M_results(__results
),
79 _M_flags((__flags
& regex_constants::match_prev_avail
)
81 & ~regex_constants::match_not_bol
82 & ~regex_constants::match_not_bow
)
90 // Set matched when string exactly match the pattern.
99 // Set matched when some prefix of the string matches the pattern.
101 _M_search_from_first()
103 _M_match_mode
= false;
112 _M_is_word(_CharT __ch
) const
114 static const _CharT __s
= 'w';
115 return _M_re
._M_traits
.isctype
116 (__ch
, _M_re
._M_traits
.lookup_classname(&__s
, &__s
+1));
122 return _M_current
== _M_begin
123 && !(_M_flags
& (regex_constants::match_not_bol
124 | regex_constants::match_prev_avail
));
130 return _M_current
== _M_end
131 && !(_M_flags
& regex_constants::match_not_eol
);
135 _M_word_boundry(_State
<_CharT
, _TraitsT
> __state
) const;
137 virtual std::unique_ptr
<_Executor
>
138 _M_clone() const = 0;
140 // Return whether now match the given sub-NFA.
142 _M_lookahead(_State
<_CharT
, _TraitsT
> __state
) const
144 auto __sub
= this->_M_clone();
145 __sub
->_M_set_start(__state
._M_alt
);
146 return __sub
->_M_search_from_first();
150 _M_set_results(_ResultsVec
& __cur_results
);
154 _M_init(_BiIter __cur
) = 0;
157 _M_set_start(_StateIdT __start
) = 0;
163 const _BiIter _M_begin
;
164 const _BiIter _M_end
;
165 const _RegexT
& _M_re
;
166 _ResultsVec
& _M_results
;
171 // A _DFSExecutor perform a DFS on given NFA and input string. At the very
172 // beginning the executor stands in the start state, then it try every
173 // possible state transition in current state recursively. Some state
174 // transitions consume input string, say, a single-char-matcher or a
175 // back-reference matcher; some not, like assertion or other anchor nodes.
176 // When the input is exhausted and the current state is an accepting state,
177 // the whole executor return true.
179 // TODO: This approach is exponentially slow for certain input.
180 // Try to compile the NFA to a DFA.
182 // Time complexity: exponential
183 // Space complexity: O(__end - __begin)
184 template<typename _BiIter
, typename _Alloc
,
185 typename _CharT
, typename _TraitsT
>
187 : public _Executor
<_BiIter
, _Alloc
, _CharT
, _TraitsT
>
190 typedef _Executor
<_BiIter
, _Alloc
, _CharT
, _TraitsT
> _BaseT
;
191 typedef _NFA
<_CharT
, _TraitsT
> _NFAT
;
192 typedef typename
_BaseT::_RegexT _RegexT
;
193 typedef typename
_BaseT::_ResultsVec _ResultsVec
;
194 typedef typename
_BaseT::_FlagT _FlagT
;
197 _DFSExecutor(_BiIter __begin
,
199 _ResultsVec
& __results
,
202 : _BaseT(__begin
, __end
, __results
, __re
, __flags
),
203 _M_nfa(*std::static_pointer_cast
<_NFA
<_CharT
, _TraitsT
>>
204 (__re
._M_automaton
)),
205 _M_start_state(_M_nfa
._M_start())
210 _M_init(_BiIter __cur
)
212 _M_cur_results
.resize(_M_nfa
._M_sub_count() + 2);
213 this->_M_current
= __cur
;
217 _M_set_start(_StateIdT __start
)
218 { _M_start_state
= __start
; }
222 { return _M_dfs(this->_M_start_state
); }
225 _M_dfs(_StateIdT __start
);
227 std::unique_ptr
<_BaseT
>
230 return std::unique_ptr
<_BaseT
>(new _DFSExecutor(this->_M_current
,
237 // To record current solution.
238 _ResultsVec _M_cur_results
;
240 _StateIdT _M_start_state
;
243 // Like the DFS approach, it try every possible state transition; Unlike DFS,
244 // it uses a queue instead of a stack to store matching states. It's a BFS
247 // Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html) explained
248 // this algorithm clearly.
250 // Every entry of _M_covered saves the solution(grouping status) for every
251 // matching head. When states transit, solutions will be compared and
252 // deduplicated(based on which greedy mode we have).
254 // Time complexity: O((__end - __begin) * _M_nfa.size())
255 // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
256 template<typename _BiIter
, typename _Alloc
,
257 typename _CharT
, typename _TraitsT
>
259 : public _Executor
<_BiIter
, _Alloc
, _CharT
, _TraitsT
>
262 typedef _Executor
<_BiIter
, _Alloc
, _CharT
, _TraitsT
> _BaseT
;
263 typedef _NFA
<_CharT
, _TraitsT
> _NFAT
;
264 typedef typename
_BaseT::_RegexT _RegexT
;
265 typedef typename
_BaseT::_ResultsVec _ResultsVec
;
266 typedef typename
_BaseT::_FlagT _FlagT
;
267 // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
268 // carefully work out how to compare to conflict matching states.
270 // A matching state is a pair(where, when); `where` is a NFA node; `when`
271 // is a _BiIter, indicating which char is the next to be matched. Two
272 // matching states conflict if they have equivalent `where` and `when`.
274 // Now we need to drop one and keep another, because at most one of them
275 // could be the final optimal solution. This behavior is affected by
278 // The definition of `greedy`:
279 // For the sequence of quantifiers in NFA sorted by their start positions,
280 // now maintain a vector in every matching state, with length equal to
281 // quantifier seq, recording repeating times of every quantifier. Now to
282 // compare two matching states, we just lexically compare these two
283 // vectors. To win the compare(to survive), one matching state needs to
284 // make its greedy quantifier count larger, and ungreedy quantifiers
287 // In the implementation, we recorded negtive counts for greedy
288 // quantifiers and positive counts of ungreedy ones. Now the implicit
289 // operator<() for lexicographical_compare will emit the answer.
291 // When two vectors equal, it means the `where`, `when` and quantifier
292 // counts are identical, and indicates the same solution; so
293 // _ResultsEntry::operator<() just return false.
295 : private _ResultsVec
298 _ResultsEntry(size_t __res_sz
, size_t __sz
)
299 : _ResultsVec(__res_sz
), _M_quant_keys(__sz
)
304 { _ResultsVec::resize(__n
); }
308 { return _ResultsVec::size(); }
311 operator[](size_t __idx
)
312 { return _ResultsVec::operator[](__idx
); }
315 operator<(const _ResultsEntry
& __rhs
) const
317 _GLIBCXX_DEBUG_ASSERT(_M_quant_keys
.size()
318 == __rhs
._M_quant_keys
.size());
319 return lexicographical_compare(_M_quant_keys
.begin(),
321 __rhs
._M_quant_keys
.begin(),
322 __rhs
._M_quant_keys
.end());
326 _M_inc(size_t __idx
, bool __neg
)
327 { _M_quant_keys
[__idx
] += __neg
? 1 : -1; }
334 std::vector
<int> _M_quant_keys
;
336 typedef std::unique_ptr
<_ResultsEntry
> _ResultsPtr
;
342 _TodoList(size_t __sz
)
343 : _M_states(), _M_exists(__sz
, false)
346 void _M_push(_StateIdT __u
)
348 _GLIBCXX_DEBUG_ASSERT(__u
< _M_exists
.size());
351 _M_exists
[__u
] = true;
352 _M_states
.push_back(__u
);
358 auto __ret
= _M_states
.back();
359 _M_states
.pop_back();
360 _M_exists
[__ret
] = false;
364 bool _M_empty() const
365 { return _M_states
.empty(); }
370 _M_exists
.assign(_M_exists
.size(), false);
374 std::vector
<_StateIdT
> _M_states
;
375 std::vector
<bool> _M_exists
;
379 _BFSExecutor(_BiIter __begin
,
381 _ResultsVec
& __results
,
384 : _BaseT(__begin
, __end
, __results
, __re
, __flags
),
385 _M_nfa(*std::static_pointer_cast
<_NFA
<_CharT
, _TraitsT
>>
386 (__re
._M_automaton
)),
387 _M_match_stack(_M_nfa
.size()),
388 _M_stack(_M_nfa
.size()),
389 _M_start_state(_M_nfa
._M_start())
394 _M_init(_BiIter __cur
)
396 this->_M_current
= __cur
;
398 _ResultsVec
& __res(this->_M_results
);
399 _M_covered
[this->_M_start_state
] =
400 _ResultsPtr(new _ResultsEntry(__res
.size(),
401 _M_nfa
._M_quant_count
));
402 _M_stack
._M_push(this->_M_start_state
);
406 _M_set_start(_StateIdT __start
)
407 { _M_start_state
= __start
; }
421 std::unique_ptr
<_BaseT
>
424 return std::unique_ptr
<_BaseT
>(new _BFSExecutor(this->_M_current
,
432 std::map
<_StateIdT
, _ResultsPtr
> _M_covered
;
433 _TodoList _M_match_stack
;
435 _StateIdT _M_start_state
;
436 // To record global optimal solution.
437 _ResultsPtr _M_cur_results
;
441 _GLIBCXX_END_NAMESPACE_VERSION
442 } // namespace __detail
445 #include <bits/regex_executor.tcc>