regex.h (regex_token_iterator<>::regex_token_iterator): Fix initialization orders...
[gcc.git] / libstdc++-v3 / include / bits / regex_executor.h
index e2d6e6e412d87f911fb1279802e1b94a7d129af5..018b649a25aef1cacdc7973e9286a7492ff2e968 100644 (file)
  *  Do not attempt to use it directly. @headername{regex}
  */
 
-// TODO: convert comments to doxygen format.
+// FIXME convert comments to doxygen format.
+
+// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
+// much more similar. Also, make grouping seperated. The
+// regex_constants::nosubs enables much more simpler execution.
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
@@ -57,42 +61,111 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     class _Executor
     {
     public:
-      typedef match_results<_BiIter, _Alloc>          _ResultsT;
+      typedef basic_regex<_CharT, _TraitsT>           _RegexT;
       typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
       typedef regex_constants::match_flag_type        _FlagT;
+      typedef typename _TraitsT::char_class_type      _ClassT;
+
+    public:
+      _Executor(_BiIter         __begin,
+               _BiIter         __end,
+               _ResultsVec&    __results,
+               const _RegexT&  __re,
+               _FlagT          __flags)
+      : _M_begin(__begin),
+      _M_end(__end),
+      _M_re(__re),
+      _M_results(__results),
+      _M_flags((__flags & regex_constants::match_prev_avail)
+              ? (__flags
+                 & ~regex_constants::match_not_bol
+                 & ~regex_constants::match_not_bow)
+              : __flags)
+      { }
 
       virtual
       ~_Executor()
       { }
 
       // Set matched when string exactly match the pattern.
-      virtual void
-      _M_match() = 0;
+      bool
+      _M_match()
+      {
+       _M_match_mode = true;
+       _M_init(_M_begin);
+       return _M_main();
+      }
 
       // Set matched when some prefix of the string matches the pattern.
-      virtual void
-      _M_search_from_first() = 0;
-
-    protected:
-      typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
-      _Executor(_BiIter    __begin,
-                _BiIter    __end,
-                _ResultsT& __results,
-                _FlagT     __flags,
-                _SizeT     __size)
-      : _M_current(__begin), _M_end(__end), _M_results(__results),
-        _M_flags(__flags)
+      bool
+      _M_search_from_first()
+      {
+       _M_match_mode = false;
+       _M_init(_M_begin);
+       return _M_main();
+      }
+
+      bool
+      _M_search();
+
+      bool
+      _M_is_word(_CharT __ch) const
+      {
+       static const _CharT __s = 'w';
+       return _M_re._M_traits.isctype
+         (__ch, _M_re._M_traits.lookup_classname(&__s, &__s+1));
+      }
+
+      bool
+      _M_at_begin() const
+      {
+       return _M_current == _M_begin
+         && !(_M_flags & (regex_constants::match_not_bol
+                          | regex_constants::match_prev_avail));
+      }
+
+      bool
+      _M_at_end() const
       {
-        __size += 2;
-        _M_results.resize(__size);
-        for (auto __i = 0; __i < __size; __i++)
-          _M_results[__i].matched = false;
+       return _M_current == _M_end
+         && !(_M_flags & regex_constants::match_not_eol);
       }
 
-      _BiIter       _M_current;
-      _BiIter       _M_end;
-      _ResultsVec&  _M_results;
-      _FlagT        _M_flags;
+      bool
+      _M_word_boundry(_State<_CharT, _TraitsT> __state) const;
+
+      virtual std::unique_ptr<_Executor>
+      _M_clone() const = 0;
+
+      // Return whether now match the given sub-NFA.
+      bool
+      _M_lookahead(_State<_CharT, _TraitsT> __state) const
+      {
+       auto __sub = this->_M_clone();
+       __sub->_M_set_start(__state._M_alt);
+       return __sub->_M_search_from_first();
+      }
+
+      void
+      _M_set_results(_ResultsVec& __cur_results);
+
+    public:
+      virtual void
+      _M_init(_BiIter __cur) = 0;
+
+      virtual void
+      _M_set_start(_StateIdT __start) = 0;
+
+      virtual bool
+      _M_main() = 0;
+
+      _BiIter         _M_current;
+      const _BiIter   _M_begin;
+      const _BiIter   _M_end;
+      const _RegexT&  _M_re;
+      _ResultsVec&    _M_results;
+      _FlagT          _M_flags;
+      bool            _M_match_mode;
     };
 
   // A _DFSExecutor perform a DFS on given NFA and input string. At the very
@@ -115,36 +188,56 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
     public:
       typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
-      typedef _NFA<_CharT, _TraitsT>                       _RegexT;
-      typedef typename _BaseT::_ResultsT                   _ResultsT;
+      typedef _NFA<_CharT, _TraitsT>                       _NFAT;
+      typedef typename _BaseT::_RegexT                     _RegexT;
       typedef typename _BaseT::_ResultsVec                 _ResultsVec;
-      typedef regex_constants::match_flag_type             _FlagT;
-
-      _DFSExecutor(_BiIter        __begin,
-                   _BiIter        __end,
-                   _ResultsT&     __results,
-                   const _RegexT& __nfa,
-                   _FlagT         __flags)
-      : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
-        _M_traits(_TraitsT()), _M_nfa(__nfa), _M_results_ret(this->_M_results)
+      typedef typename _BaseT::_FlagT                      _FlagT;
+
+    public:
+      _DFSExecutor(_BiIter         __begin,
+                  _BiIter         __end,
+                  _ResultsVec&    __results,
+                  const _RegexT&  __re,
+                  _FlagT          __flags)
+      : _BaseT(__begin, __end, __results, __re, __flags),
+      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
+            (__re._M_automaton)),
+      _M_start_state(_M_nfa._M_start())
       { }
 
+    private:
       void
-      _M_match()
-      { _M_dfs<true>(_M_nfa._M_start()); }
+      _M_init(_BiIter __cur)
+      {
+       _M_cur_results.resize(_M_nfa._M_sub_count() + 2);
+       this->_M_current = __cur;
+      }
 
       void
-      _M_search_from_first()
-      { _M_dfs<false>(_M_nfa._M_start()); }
+      _M_set_start(_StateIdT __start)
+      { _M_start_state = __start; }
 
-    private:
-      template<bool __match_mode>
-        bool
-        _M_dfs(_StateIdT __i);
+      bool
+      _M_main()
+      { return _M_dfs(this->_M_start_state); }
+
+      bool
+      _M_dfs(_StateIdT __start);
+
+      std::unique_ptr<_BaseT>
+      _M_clone() const
+      {
+       return std::unique_ptr<_BaseT>(new _DFSExecutor(this->_M_current,
+                                                       this->_M_end,
+                                                       this->_M_results,
+                                                       this->_M_re,
+                                                       this->_M_flags));
+      }
 
-      _ResultsVec    _M_results_ret;
-      _TraitsT       _M_traits;
-      const _RegexT& _M_nfa;
+      // To record current solution.
+      _ResultsVec     _M_cur_results;
+      const _NFAT&    _M_nfa;
+      _StateIdT       _M_start_state;
     };
 
   // Like the DFS approach, it try every possible state transition; Unlike DFS,
@@ -167,38 +260,154 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
     public:
       typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
-      typedef _NFA<_CharT, _TraitsT>                       _RegexT;
-      typedef typename _BaseT::_ResultsT                   _ResultsT;
+      typedef _NFA<_CharT, _TraitsT>                       _NFAT;
+      typedef typename _BaseT::_RegexT                     _RegexT;
       typedef typename _BaseT::_ResultsVec                 _ResultsVec;
-      typedef std::unique_ptr<_ResultsVec>                 _ResultsPtr;
-      typedef regex_constants::match_flag_type             _FlagT;
-
-      _BFSExecutor(_BiIter        __begin,
-                   _BiIter        __end,
-                   _ResultsT&     __results,
-                   const _RegexT& __nfa,
-                   _FlagT         __flags)
-      : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
-        _M_nfa(__nfa)
+      typedef typename _BaseT::_FlagT                      _FlagT;
+      // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
+      // carefully work out how to compare to conflict matching states.
+      //
+      // A matching state is a pair(where, when); `where` is a NFA node; `when`
+      // is a _BiIter, indicating which char is the next to be matched. Two
+      // matching states conflict if they have equivalent `where` and `when`.
+      //
+      // Now we need to drop one and keep another, because at most one of them
+      // could be the final optimal solution. This behavior is affected by
+      // greedy policy.
+      //
+      // The definition of `greedy`:
+      // For the sequence of quantifiers in NFA sorted by their start positions,
+      // now maintain a vector in every matching state, with length equal to
+      // quantifier seq, recording repeating times of every quantifier. Now to
+      // compare two matching states, we just lexically compare these two
+      // vectors. To win the compare(to survive), one matching state needs to
+      // make its greedy quantifier count larger, and ungreedy quantifiers
+      // count smaller.
+      //
+      // In the implementation, we recorded negtive counts for greedy
+      // quantifiers and positive counts of ungreedy ones. Now the implicit
+      // operator<() for lexicographical_compare will emit the answer.
+      //
+      // When two vectors equal, it means the `where`, `when` and quantifier
+      // counts are identical, and indicates the same solution; so
+      // _ResultsEntry::operator<() just return false.
+      struct _ResultsEntry
+      : private _ResultsVec
       {
-        if (_M_nfa._M_start() != _S_invalid_state_id)
-          _M_covered[_M_nfa._M_start()] =
-            _ResultsPtr(new _ResultsVec(this->_M_results));
-        _M_e_closure();
-      }
+      public:
+       _ResultsEntry(size_t __res_sz, size_t __sz)
+       : _ResultsVec(__res_sz), _M_quant_keys(__sz)
+       { }
+
+       void
+       resize(size_t __n)
+       { _ResultsVec::resize(__n); }
+
+       size_t
+       size()
+       { return _ResultsVec::size(); }
+
+       sub_match<_BiIter>&
+       operator[](size_t __idx)
+       { return _ResultsVec::operator[](__idx); }
+
+       bool
+       operator<(const _ResultsEntry& __rhs) const
+       {
+         _GLIBCXX_DEBUG_ASSERT(_M_quant_keys.size()
+                               == __rhs._M_quant_keys.size());
+         return lexicographical_compare(_M_quant_keys.begin(),
+                                        _M_quant_keys.end(),
+                                        __rhs._M_quant_keys.begin(),
+                                        __rhs._M_quant_keys.end());
+       }
+
+       void
+       _M_inc(size_t __idx, bool __neg)
+       { _M_quant_keys[__idx] += __neg ? 1 : -1; }
+
+       _ResultsVec&
+       _M_get()
+       { return *this; }
+
+      public:
+       std::vector<int> _M_quant_keys;
+      };
+      typedef std::unique_ptr<_ResultsEntry>               _ResultsPtr;
+
+      class _TodoList
+      {
+      public:
+       explicit
+       _TodoList(size_t __sz)
+       : _M_states(), _M_exists(__sz, false)
+       { }
+
+       void _M_push(_StateIdT __u)
+       {
+         _GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
+         if (!_M_exists[__u])
+           {
+             _M_exists[__u] = true;
+             _M_states.push_back(__u);
+           }
+       }
+
+       _StateIdT _M_pop()
+       {
+         auto __ret = _M_states.back();
+         _M_states.pop_back();
+         _M_exists[__ret] = false;
+         return __ret;
+       }
+
+       bool _M_empty() const
+       { return _M_states.empty(); }
+
+       void _M_clear()
+       {
+         _M_states.clear();
+         _M_exists.assign(_M_exists.size(), false);
+       }
+
+      private:
+       std::vector<_StateIdT>         _M_states;
+       std::vector<bool>              _M_exists;
+      };
 
+    public:
+      _BFSExecutor(_BiIter         __begin,
+                  _BiIter         __end,
+                  _ResultsVec&    __results,
+                  const _RegexT&  __re,
+                  _FlagT          __flags)
+      : _BaseT(__begin, __end, __results, __re, __flags),
+      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
+            (__re._M_automaton)),
+      _M_match_stack(_M_nfa.size()),
+      _M_stack(_M_nfa.size()),
+      _M_start_state(_M_nfa._M_start())
+      { }
+
+    private:
       void
-      _M_match()
-      { _M_main_loop<true>(); }
+      _M_init(_BiIter __cur)
+      {
+       this->_M_current = __cur;
+       _M_covered.clear();
+       _ResultsVec& __res(this->_M_results);
+       _M_covered[this->_M_start_state] =
+         _ResultsPtr(new _ResultsEntry(__res.size(),
+                                       _M_nfa._M_quant_count));
+       _M_stack._M_push(this->_M_start_state);
+      }
 
       void
-      _M_search_from_first()
-      { _M_main_loop<false>(); }
+      _M_set_start(_StateIdT __start)
+      { _M_start_state = __start; }
 
-    private:
-      template<bool __match_mode>
-        void
-        _M_main_loop();
+      bool
+      _M_main();
 
       void
       _M_e_closure();
@@ -207,13 +416,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_move();
 
       bool
-      _M_match_less_than(const _ResultsVec& __u, const _ResultsVec& __v) const;
+      _M_includes_some();
 
-      bool
-      _M_includes_some() const;
+      std::unique_ptr<_BaseT>
+      _M_clone() const
+      {
+       return std::unique_ptr<_BaseT>(new _BFSExecutor(this->_M_current,
+                                                       this->_M_end,
+                                                       this->_M_results,
+                                                       this->_M_re,
+                                                       this->_M_flags));
+      }
 
-      std::map<_StateIdT, _ResultsPtr>     _M_covered;
-      const _RegexT&                       _M_nfa;
+      const _NFAT&                     _M_nfa;
+      std::map<_StateIdT, _ResultsPtr> _M_covered;
+      _TodoList                        _M_match_stack;
+      _TodoList                        _M_stack;
+      _StateIdT                        _M_start_state;
+      // To record global optimal solution.
+      _ResultsPtr                      _M_cur_results;
     };
 
  //@} regex-detail