* Do not attempt to use it directly. @headername{regex}
*/
-// TODO: convert comments to doxygen format.
+// FIXME convert comments to doxygen format.
+
+// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
+// much more similar. Also, make grouping seperated. The
+// regex_constants::nosubs enables much more simpler execution.
namespace std _GLIBCXX_VISIBILITY(default)
{
class _Executor
{
public:
- typedef match_results<_BiIter, _Alloc> _ResultsT;
+ typedef basic_regex<_CharT, _TraitsT> _RegexT;
typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
typedef regex_constants::match_flag_type _FlagT;
+ typedef typename _TraitsT::char_class_type _ClassT;
+
+ public:
+ _Executor(_BiIter __begin,
+ _BiIter __end,
+ _ResultsVec& __results,
+ const _RegexT& __re,
+ _FlagT __flags)
+ : _M_begin(__begin),
+ _M_end(__end),
+ _M_re(__re),
+ _M_results(__results),
+ _M_flags((__flags & regex_constants::match_prev_avail)
+ ? (__flags
+ & ~regex_constants::match_not_bol
+ & ~regex_constants::match_not_bow)
+ : __flags)
+ { }
virtual
~_Executor()
{ }
// Set matched when string exactly match the pattern.
- virtual void
- _M_match() = 0;
+ bool
+ _M_match()
+ {
+ _M_match_mode = true;
+ _M_init(_M_begin);
+ return _M_main();
+ }
// Set matched when some prefix of the string matches the pattern.
- virtual void
- _M_search_from_first() = 0;
-
- protected:
- typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
- _Executor(_BiIter __begin,
- _BiIter __end,
- _ResultsT& __results,
- _FlagT __flags,
- _SizeT __size)
- : _M_current(__begin), _M_end(__end), _M_results(__results),
- _M_flags(__flags)
+ bool
+ _M_search_from_first()
+ {
+ _M_match_mode = false;
+ _M_init(_M_begin);
+ return _M_main();
+ }
+
+ bool
+ _M_search();
+
+ bool
+ _M_is_word(_CharT __ch) const
+ {
+ static const _CharT __s = 'w';
+ return _M_re._M_traits.isctype
+ (__ch, _M_re._M_traits.lookup_classname(&__s, &__s+1));
+ }
+
+ bool
+ _M_at_begin() const
+ {
+ return _M_current == _M_begin
+ && !(_M_flags & (regex_constants::match_not_bol
+ | regex_constants::match_prev_avail));
+ }
+
+ bool
+ _M_at_end() const
{
- __size += 2;
- _M_results.resize(__size);
- for (auto __i = 0; __i < __size; __i++)
- _M_results[__i].matched = false;
+ return _M_current == _M_end
+ && !(_M_flags & regex_constants::match_not_eol);
}
- _BiIter _M_current;
- _BiIter _M_end;
- _ResultsVec& _M_results;
- _FlagT _M_flags;
+ bool
+ _M_word_boundry(_State<_CharT, _TraitsT> __state) const;
+
+ virtual std::unique_ptr<_Executor>
+ _M_clone() const = 0;
+
+ // Return whether now match the given sub-NFA.
+ bool
+ _M_lookahead(_State<_CharT, _TraitsT> __state) const
+ {
+ auto __sub = this->_M_clone();
+ __sub->_M_set_start(__state._M_alt);
+ return __sub->_M_search_from_first();
+ }
+
+ void
+ _M_set_results(_ResultsVec& __cur_results);
+
+ public:
+ virtual void
+ _M_init(_BiIter __cur) = 0;
+
+ virtual void
+ _M_set_start(_StateIdT __start) = 0;
+
+ virtual bool
+ _M_main() = 0;
+
+ _BiIter _M_current;
+ const _BiIter _M_begin;
+ const _BiIter _M_end;
+ const _RegexT& _M_re;
+ _ResultsVec& _M_results;
+ _FlagT _M_flags;
+ bool _M_match_mode;
};
// A _DFSExecutor perform a DFS on given NFA and input string. At the very
{
public:
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
- typedef _NFA<_CharT, _TraitsT> _RegexT;
- typedef typename _BaseT::_ResultsT _ResultsT;
+ typedef _NFA<_CharT, _TraitsT> _NFAT;
+ typedef typename _BaseT::_RegexT _RegexT;
typedef typename _BaseT::_ResultsVec _ResultsVec;
- typedef regex_constants::match_flag_type _FlagT;
-
- _DFSExecutor(_BiIter __begin,
- _BiIter __end,
- _ResultsT& __results,
- const _RegexT& __nfa,
- _FlagT __flags)
- : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
- _M_traits(_TraitsT()), _M_nfa(__nfa), _M_results_ret(this->_M_results)
+ typedef typename _BaseT::_FlagT _FlagT;
+
+ public:
+ _DFSExecutor(_BiIter __begin,
+ _BiIter __end,
+ _ResultsVec& __results,
+ const _RegexT& __re,
+ _FlagT __flags)
+ : _BaseT(__begin, __end, __results, __re, __flags),
+ _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
+ (__re._M_automaton)),
+ _M_start_state(_M_nfa._M_start())
{ }
+ private:
void
- _M_match()
- { _M_dfs<true>(_M_nfa._M_start()); }
+ _M_init(_BiIter __cur)
+ {
+ _M_cur_results.resize(_M_nfa._M_sub_count() + 2);
+ this->_M_current = __cur;
+ }
void
- _M_search_from_first()
- { _M_dfs<false>(_M_nfa._M_start()); }
+ _M_set_start(_StateIdT __start)
+ { _M_start_state = __start; }
- private:
- template<bool __match_mode>
- bool
- _M_dfs(_StateIdT __i);
+ bool
+ _M_main()
+ { return _M_dfs(this->_M_start_state); }
+
+ bool
+ _M_dfs(_StateIdT __start);
+
+ std::unique_ptr<_BaseT>
+ _M_clone() const
+ {
+ return std::unique_ptr<_BaseT>(new _DFSExecutor(this->_M_current,
+ this->_M_end,
+ this->_M_results,
+ this->_M_re,
+ this->_M_flags));
+ }
- _ResultsVec _M_results_ret;
- _TraitsT _M_traits;
- const _RegexT& _M_nfa;
+ // To record current solution.
+ _ResultsVec _M_cur_results;
+ const _NFAT& _M_nfa;
+ _StateIdT _M_start_state;
};
// Like the DFS approach, it try every possible state transition; Unlike DFS,
{
public:
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
- typedef _NFA<_CharT, _TraitsT> _RegexT;
- typedef typename _BaseT::_ResultsT _ResultsT;
+ typedef _NFA<_CharT, _TraitsT> _NFAT;
+ typedef typename _BaseT::_RegexT _RegexT;
typedef typename _BaseT::_ResultsVec _ResultsVec;
- typedef std::unique_ptr<_ResultsVec> _ResultsPtr;
- typedef regex_constants::match_flag_type _FlagT;
-
- _BFSExecutor(_BiIter __begin,
- _BiIter __end,
- _ResultsT& __results,
- const _RegexT& __nfa,
- _FlagT __flags)
- : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
- _M_nfa(__nfa)
+ typedef typename _BaseT::_FlagT _FlagT;
+ // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
+ // carefully work out how to compare to conflict matching states.
+ //
+ // A matching state is a pair(where, when); `where` is a NFA node; `when`
+ // is a _BiIter, indicating which char is the next to be matched. Two
+ // matching states conflict if they have equivalent `where` and `when`.
+ //
+ // Now we need to drop one and keep another, because at most one of them
+ // could be the final optimal solution. This behavior is affected by
+ // greedy policy.
+ //
+ // The definition of `greedy`:
+ // For the sequence of quantifiers in NFA sorted by their start positions,
+ // now maintain a vector in every matching state, with length equal to
+ // quantifier seq, recording repeating times of every quantifier. Now to
+ // compare two matching states, we just lexically compare these two
+ // vectors. To win the compare(to survive), one matching state needs to
+ // make its greedy quantifier count larger, and ungreedy quantifiers
+ // count smaller.
+ //
+ // In the implementation, we recorded negtive counts for greedy
+ // quantifiers and positive counts of ungreedy ones. Now the implicit
+ // operator<() for lexicographical_compare will emit the answer.
+ //
+ // When two vectors equal, it means the `where`, `when` and quantifier
+ // counts are identical, and indicates the same solution; so
+ // _ResultsEntry::operator<() just return false.
+ struct _ResultsEntry
+ : private _ResultsVec
{
- if (_M_nfa._M_start() != _S_invalid_state_id)
- _M_covered[_M_nfa._M_start()] =
- _ResultsPtr(new _ResultsVec(this->_M_results));
- _M_e_closure();
- }
+ public:
+ _ResultsEntry(size_t __res_sz, size_t __sz)
+ : _ResultsVec(__res_sz), _M_quant_keys(__sz)
+ { }
+
+ void
+ resize(size_t __n)
+ { _ResultsVec::resize(__n); }
+
+ size_t
+ size()
+ { return _ResultsVec::size(); }
+
+ sub_match<_BiIter>&
+ operator[](size_t __idx)
+ { return _ResultsVec::operator[](__idx); }
+
+ bool
+ operator<(const _ResultsEntry& __rhs) const
+ {
+ _GLIBCXX_DEBUG_ASSERT(_M_quant_keys.size()
+ == __rhs._M_quant_keys.size());
+ return lexicographical_compare(_M_quant_keys.begin(),
+ _M_quant_keys.end(),
+ __rhs._M_quant_keys.begin(),
+ __rhs._M_quant_keys.end());
+ }
+
+ void
+ _M_inc(size_t __idx, bool __neg)
+ { _M_quant_keys[__idx] += __neg ? 1 : -1; }
+
+ _ResultsVec&
+ _M_get()
+ { return *this; }
+
+ public:
+ std::vector<int> _M_quant_keys;
+ };
+ typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
+
+ class _TodoList
+ {
+ public:
+ explicit
+ _TodoList(size_t __sz)
+ : _M_states(), _M_exists(__sz, false)
+ { }
+
+ void _M_push(_StateIdT __u)
+ {
+ _GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
+ if (!_M_exists[__u])
+ {
+ _M_exists[__u] = true;
+ _M_states.push_back(__u);
+ }
+ }
+
+ _StateIdT _M_pop()
+ {
+ auto __ret = _M_states.back();
+ _M_states.pop_back();
+ _M_exists[__ret] = false;
+ return __ret;
+ }
+
+ bool _M_empty() const
+ { return _M_states.empty(); }
+
+ void _M_clear()
+ {
+ _M_states.clear();
+ _M_exists.assign(_M_exists.size(), false);
+ }
+
+ private:
+ std::vector<_StateIdT> _M_states;
+ std::vector<bool> _M_exists;
+ };
+ public:
+ _BFSExecutor(_BiIter __begin,
+ _BiIter __end,
+ _ResultsVec& __results,
+ const _RegexT& __re,
+ _FlagT __flags)
+ : _BaseT(__begin, __end, __results, __re, __flags),
+ _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
+ (__re._M_automaton)),
+ _M_match_stack(_M_nfa.size()),
+ _M_stack(_M_nfa.size()),
+ _M_start_state(_M_nfa._M_start())
+ { }
+
+ private:
void
- _M_match()
- { _M_main_loop<true>(); }
+ _M_init(_BiIter __cur)
+ {
+ this->_M_current = __cur;
+ _M_covered.clear();
+ _ResultsVec& __res(this->_M_results);
+ _M_covered[this->_M_start_state] =
+ _ResultsPtr(new _ResultsEntry(__res.size(),
+ _M_nfa._M_quant_count));
+ _M_stack._M_push(this->_M_start_state);
+ }
void
- _M_search_from_first()
- { _M_main_loop<false>(); }
+ _M_set_start(_StateIdT __start)
+ { _M_start_state = __start; }
- private:
- template<bool __match_mode>
- void
- _M_main_loop();
+ bool
+ _M_main();
void
_M_e_closure();
_M_move();
bool
- _M_match_less_than(const _ResultsVec& __u, const _ResultsVec& __v) const;
+ _M_includes_some();
- bool
- _M_includes_some() const;
+ std::unique_ptr<_BaseT>
+ _M_clone() const
+ {
+ return std::unique_ptr<_BaseT>(new _BFSExecutor(this->_M_current,
+ this->_M_end,
+ this->_M_results,
+ this->_M_re,
+ this->_M_flags));
+ }
- std::map<_StateIdT, _ResultsPtr> _M_covered;
- const _RegexT& _M_nfa;
+ const _NFAT& _M_nfa;
+ std::map<_StateIdT, _ResultsPtr> _M_covered;
+ _TodoList _M_match_stack;
+ _TodoList _M_stack;
+ _StateIdT _M_start_state;
+ // To record global optimal solution.
+ _ResultsPtr _M_cur_results;
};
//@} regex-detail