// class template regex -*- C++ -*-
-// Copyright (C) 2013 Free Software Foundation, Inc.
+// Copyright (C) 2013-2014 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// FIXME convert comments to doxygen format.
-// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
-// much more similar. Also, make grouping seperated. The
-// regex_constants::nosubs enables much more simpler execution.
-
namespace std _GLIBCXX_VISIBILITY(default)
{
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
- template<typename, typename>
- class basic_regex;
-
- template<typename>
- class sub_match;
-
- template<typename, typename>
- class match_results;
-_GLIBCXX_END_NAMESPACE_VERSION
-
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
+ /**
+ * @brief Takes a regex and an input string and does the matching.
+ *
+ * The %_Executor class has two modes: DFS mode and BFS mode, controlled
+ * by the template parameter %__dfs_mode.
+ */
+ template<typename _BiIter, typename _Alloc, typename _TraitsT,
+ bool __dfs_mode>
class _Executor
{
+ using __search_mode = integral_constant<bool, __dfs_mode>;
+ using __dfs = true_type;
+ using __bfs = false_type;
+
+ enum class _Match_mode : unsigned char { _Exact, _Prefix };
+
public:
- typedef basic_regex<_CharT, _TraitsT> _RegexT;
- typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
- typedef regex_constants::match_flag_type _FlagT;
- typedef typename _TraitsT::char_class_type _ClassT;
+ typedef typename iterator_traits<_BiIter>::value_type _CharT;
+ typedef basic_regex<_CharT, _TraitsT> _RegexT;
+ typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
+ typedef regex_constants::match_flag_type _FlagT;
+ typedef typename _TraitsT::char_class_type _ClassT;
+ typedef _NFA<_TraitsT> _NFAT;
public:
_Executor(_BiIter __begin,
: _M_begin(__begin),
_M_end(__end),
_M_re(__re),
+ _M_nfa(*__re._M_automaton),
_M_results(__results),
+ _M_rep_count(_M_nfa.size()),
+ _M_states(_M_nfa._M_start(), _M_nfa.size()),
_M_flags((__flags & regex_constants::match_prev_avail)
? (__flags
& ~regex_constants::match_not_bol
: __flags)
{ }
- virtual
- ~_Executor()
- { }
-
- // Set matched when string exactly match the pattern.
+ // Set matched when string exactly matches the pattern.
bool
_M_match()
{
- _M_match_mode = true;
- _M_init(_M_begin);
- return _M_main();
+ _M_current = _M_begin;
+ return _M_main(_Match_mode::_Exact);
}
// Set matched when some prefix of the string matches the pattern.
bool
_M_search_from_first()
{
- _M_match_mode = false;
- _M_init(_M_begin);
- return _M_main();
+ _M_current = _M_begin;
+ return _M_main(_Match_mode::_Prefix);
}
bool
_M_search();
+ private:
+ void
+ _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
+
+ void
+ _M_dfs(_Match_mode __match_mode, _StateIdT __start);
+
+ bool
+ _M_main(_Match_mode __match_mode)
+ { return _M_main_dispatch(__match_mode, __search_mode{}); }
+
+ bool
+ _M_main_dispatch(_Match_mode __match_mode, __dfs);
+
+ bool
+ _M_main_dispatch(_Match_mode __match_mode, __bfs);
+
bool
_M_is_word(_CharT __ch) const
{
- static const _CharT __s = 'w';
- return _M_re._M_traits.isctype
- (__ch, _M_re._M_traits.lookup_classname(&__s, &__s+1));
+ static const _CharT __s[2] = { 'w' };
+ return _M_re._M_automaton->_M_traits.isctype
+ (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
}
bool
}
bool
- _M_word_boundry(_State<_CharT, _TraitsT> __state) const;
-
- virtual std::unique_ptr<_Executor>
- _M_clone() const = 0;
+ _M_word_boundary() const;
- // Return whether now match the given sub-NFA.
bool
- _M_lookahead(_State<_CharT, _TraitsT> __state) const
- {
- auto __sub = this->_M_clone();
- __sub->_M_set_start(__state._M_alt);
- return __sub->_M_search_from_first();
- }
+ _M_lookahead(_State<_TraitsT> __state);
- void
- _M_set_results(_ResultsVec& __cur_results);
+ // Holds additional information used in BFS-mode.
+ template<typename _SearchMode, typename _ResultsVec>
+ struct _State_info;
- public:
- virtual void
- _M_init(_BiIter __cur) = 0;
-
- virtual void
- _M_set_start(_StateIdT __start) = 0;
-
- virtual bool
- _M_main() = 0;
-
- _BiIter _M_current;
- const _BiIter _M_begin;
- const _BiIter _M_end;
- const _RegexT& _M_re;
- _ResultsVec& _M_results;
- _FlagT _M_flags;
- bool _M_match_mode;
- };
-
- // A _DFSExecutor perform a DFS on given NFA and input string. At the very
- // beginning the executor stands in the start state, then it try every
- // possible state transition in current state recursively. Some state
- // transitions consume input string, say, a single-char-matcher or a
- // back-reference matcher; some not, like assertion or other anchor nodes.
- // When the input is exhausted and the current state is an accepting state,
- // the whole executor return true.
- //
- // TODO: This approach is exponentially slow for certain input.
- // Try to compile the NFA to a DFA.
- //
- // Time complexity: exponential
- // Space complexity: O(__end - __begin)
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- class _DFSExecutor
- : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
- {
- public:
- typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
- typedef _NFA<_CharT, _TraitsT> _NFAT;
- typedef typename _BaseT::_RegexT _RegexT;
- typedef typename _BaseT::_ResultsVec _ResultsVec;
- typedef typename _BaseT::_FlagT _FlagT;
-
- public:
- _DFSExecutor(_BiIter __begin,
- _BiIter __end,
- _ResultsVec& __results,
- const _RegexT& __re,
- _FlagT __flags)
- : _BaseT(__begin, __end, __results, __re, __flags),
- _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
- (__re._M_automaton)),
- _M_start_state(_M_nfa._M_start())
- { }
-
- private:
- void
- _M_init(_BiIter __cur)
- {
- _M_cur_results.resize(_M_nfa._M_sub_count() + 2);
- this->_M_current = __cur;
- }
-
- void
- _M_set_start(_StateIdT __start)
- { _M_start_state = __start; }
-
- bool
- _M_main()
- { return _M_dfs(this->_M_start_state); }
-
- bool
- _M_dfs(_StateIdT __start);
-
- std::unique_ptr<_BaseT>
- _M_clone() const
- {
- return std::unique_ptr<_BaseT>(new _DFSExecutor(this->_M_current,
- this->_M_end,
- this->_M_results,
- this->_M_re,
- this->_M_flags));
- }
-
- // To record current solution.
- _ResultsVec _M_cur_results;
- const _NFAT& _M_nfa;
- _StateIdT _M_start_state;
- };
-
- // Like the DFS approach, it try every possible state transition; Unlike DFS,
- // it uses a queue instead of a stack to store matching states. It's a BFS
- // approach.
- //
- // Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html) explained
- // this algorithm clearly.
- //
- // Every entry of _M_covered saves the solution(grouping status) for every
- // matching head. When states transit, solutions will be compared and
- // deduplicated(based on which greedy mode we have).
- //
- // Time complexity: O((__end - __begin) * _M_nfa.size())
- // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- class _BFSExecutor
- : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
- {
- public:
- typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
- typedef _NFA<_CharT, _TraitsT> _NFAT;
- typedef typename _BaseT::_RegexT _RegexT;
- typedef typename _BaseT::_ResultsVec _ResultsVec;
- typedef typename _BaseT::_FlagT _FlagT;
- // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
- // carefully work out how to compare to conflict matching states.
- //
- // A matching state is a pair(where, when); `where` is a NFA node; `when`
- // is a _BiIter, indicating which char is the next to be matched. Two
- // matching states conflict if they have equivalent `where` and `when`.
- //
- // Now we need to drop one and keep another, because at most one of them
- // could be the final optimal solution. This behavior is affected by
- // greedy policy.
- //
- // The definition of `greedy`:
- // For the sequence of quantifiers in NFA sorted by their start positions,
- // now maintain a vector in every matching state, with length equal to
- // quantifier seq, recording repeating times of every quantifier. Now to
- // compare two matching states, we just lexically compare these two
- // vectors. To win the compare(to survive), one matching state needs to
- // make its greedy quantifier count larger, and ungreedy quantifiers
- // count smaller.
- //
- // In the implementation, we recorded negtive counts for greedy
- // quantifiers and positive counts of ungreedy ones. Now the implicit
- // operator<() for lexicographical_compare will emit the answer.
- //
- // When two vectors equal, it means the `where`, `when` and quantifier
- // counts are identical, and indicates the same solution; so
- // _ResultsEntry::operator<() just return false.
- struct _ResultsEntry
- : private _ResultsVec
- {
- public:
- _ResultsEntry(size_t __res_sz, size_t __sz)
- : _ResultsVec(__res_sz), _M_quant_keys(__sz)
- { }
-
- void
- resize(size_t __n)
- { _ResultsVec::resize(__n); }
-
- size_t
- size()
- { return _ResultsVec::size(); }
-
- sub_match<_BiIter>&
- operator[](size_t __idx)
- { return _ResultsVec::operator[](__idx); }
-
- bool
- operator<(const _ResultsEntry& __rhs) const
+ template<typename _ResultsVec>
+ struct _State_info<__bfs, _ResultsVec>
{
- _GLIBCXX_DEBUG_ASSERT(_M_quant_keys.size()
- == __rhs._M_quant_keys.size());
- return lexicographical_compare(_M_quant_keys.begin(),
- _M_quant_keys.end(),
- __rhs._M_quant_keys.begin(),
- __rhs._M_quant_keys.end());
- }
-
- void
- _M_inc(size_t __idx, bool __neg)
- { _M_quant_keys[__idx] += __neg ? 1 : -1; }
-
- _ResultsVec&
- _M_get()
- { return *this; }
-
- public:
- std::vector<int> _M_quant_keys;
- };
- typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
-
- class _TodoList
- {
- public:
- explicit
- _TodoList(size_t __sz)
- : _M_states(), _M_exists(__sz, false)
- { }
-
- void _M_push(_StateIdT __u)
+ explicit
+ _State_info(_StateIdT __start, size_t __n)
+ : _M_visited_states(new bool[__n]()), _M_start(__start)
+ { }
+
+ bool _M_visited(_StateIdT __i)
+ {
+ if (_M_visited_states[__i])
+ return true;
+ _M_visited_states[__i] = true;
+ return false;
+ }
+
+ void _M_queue(_StateIdT __i, const _ResultsVec& __res)
+ { _M_match_queue.emplace_back(__i, __res); }
+
+ // Dummy implementations for BFS mode.
+ _BiIter* _M_get_sol_pos() { return nullptr; }
+
+ // Saves states that need to be considered for the next character.
+ vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
+ // Indicates which states are already visited.
+ unique_ptr<bool[]> _M_visited_states;
+ // To record current solution.
+ _StateIdT _M_start;
+ };
+
+ template<typename _ResultsVec>
+ struct _State_info<__dfs, _ResultsVec>
{
- _GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
- if (!_M_exists[__u])
- {
- _M_exists[__u] = true;
- _M_states.push_back(__u);
- }
- }
-
- _StateIdT _M_pop()
- {
- auto __ret = _M_states.back();
- _M_states.pop_back();
- _M_exists[__ret] = false;
- return __ret;
- }
+ explicit
+ _State_info(_StateIdT __start, size_t) : _M_start(__start)
+ { }
- bool _M_empty() const
- { return _M_states.empty(); }
+ // Dummy implementations for DFS mode.
+ bool _M_visited(_StateIdT) const { return false; }
+ void _M_queue(_StateIdT, const _ResultsVec&) { }
- void _M_clear()
- {
- _M_states.clear();
- _M_exists.assign(_M_exists.size(), false);
- }
+ _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
- private:
- std::vector<_StateIdT> _M_states;
- std::vector<bool> _M_exists;
- };
+ // To record current solution.
+ _StateIdT _M_start;
+ _BiIter _M_sol_pos;
+ };
public:
- _BFSExecutor(_BiIter __begin,
- _BiIter __end,
- _ResultsVec& __results,
- const _RegexT& __re,
- _FlagT __flags)
- : _BaseT(__begin, __end, __results, __re, __flags),
- _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
- (__re._M_automaton)),
- _M_match_stack(_M_nfa.size()),
- _M_stack(_M_nfa.size()),
- _M_start_state(_M_nfa._M_start())
- { }
-
- private:
- void
- _M_init(_BiIter __cur)
- {
- this->_M_current = __cur;
- _M_covered.clear();
- _ResultsVec& __res(this->_M_results);
- _M_covered[this->_M_start_state] =
- _ResultsPtr(new _ResultsEntry(__res.size(),
- _M_nfa._M_quant_count));
- _M_stack._M_push(this->_M_start_state);
- }
-
- void
- _M_set_start(_StateIdT __start)
- { _M_start_state = __start; }
-
- bool
- _M_main();
-
- void
- _M_e_closure();
-
- void
- _M_move();
-
- bool
- _M_includes_some();
-
- std::unique_ptr<_BaseT>
- _M_clone() const
- {
- return std::unique_ptr<_BaseT>(new _BFSExecutor(this->_M_current,
- this->_M_end,
- this->_M_results,
- this->_M_re,
- this->_M_flags));
- }
-
- const _NFAT& _M_nfa;
- std::map<_StateIdT, _ResultsPtr> _M_covered;
- _TodoList _M_match_stack;
- _TodoList _M_stack;
- _StateIdT _M_start_state;
- // To record global optimal solution.
- _ResultsPtr _M_cur_results;
+ _ResultsVec _M_cur_results;
+ _BiIter _M_current;
+ const _BiIter _M_begin;
+ const _BiIter _M_end;
+ const _RegexT& _M_re;
+ const _NFAT& _M_nfa;
+ _ResultsVec& _M_results;
+ vector<pair<_BiIter, int>> _M_rep_count;
+ _State_info<__search_mode, _ResultsVec> _M_states;
+ _FlagT _M_flags;
+ // Do we have a solution so far?
+ bool _M_has_sol;
};
//@} regex-detail