Split _M_dfs() into smaller functions.

author Tim Shen <timshen@google.com>

Mon, 22 Aug 2016 19:50:15 +0000 (19:50 +0000)

committer Tim Shen <timshen@gcc.gnu.org>

Mon, 22 Aug 2016 19:50:15 +0000 (19:50 +0000)
author Tim Shen <timshen@google.com>
Mon, 22 Aug 2016 19:50:15 +0000 (19:50 +0000)
committer Tim Shen <timshen@gcc.gnu.org>
Mon, 22 Aug 2016 19:50:15 +0000 (19:50 +0000)
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog

index 299bce642e8c925208e039d2f29ca7f9ae695379..2e88e398b2ab5b4f88ac77ce88e0c06e84903add 100644 (file)
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,14 @@
+2016-08-22  Tim Shen  <timshen@google.com>
+
+       Split _M_dfs() into smaller functions.
+       * regex_executor.h(_M_handle_repeat, _M_handle_subexpr_begin)
+       (_M_handle_subexpr_end, _M_handle_line_begin_assertion)
+       (_M_handle_line_end_assertion, _M_handle_word_boundary)
+       (_M_handle_subexpr_lookahead, _M_handle_match)
+       (_M_handle_backref, _M_handle_accept, _M_handle_alternative):
+       Add separate function declarations.
+       * regex_executor.tcc: Split _M_dfs() into multiple handler functions.
+
  2016-08-22  Gleb Natapov  <gleb@scylladb.com>
  
         PR libstdc++/68297
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h

index ef8aa9167d5cd85323e2de74b500e344aafe993f..33a68dd375b1975654c6d76ac80a2881b1fa25fc 100644 (file)
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -108,6 +108,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        void
        _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
  
+      void
+      _M_handle_repeat(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_subexpr_begin(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_subexpr_end(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_line_end_assertion(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_word_boundary(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_match(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_backref(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_accept(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_alternative(_Match_mode, _StateIdT);
+
        void
        _M_dfs(_Match_mode __match_mode, _StateIdT __start);
  
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc

index 6bbcb1b4977d1df26a26d1d1ecce5b81c98e6095..382909f1cccb09815bd30c3b2954174fbae9d59c 100644 (file)
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -195,213 +195,295 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         }
      };
  
+  // _M_alt branch is "match once more", while _M_next is "get me out
+  // of this quantifier". Executing _M_next first or _M_alt first don't
+  // mean the same thing, and we need to choose the correct order under
+  // given greedy mode.
    template<typename _BiIter, typename _Alloc, typename _TraitsT,
            bool __dfs_mode>
      void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_dfs(_Match_mode __match_mode, _StateIdT __i)
+    _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i)
      {
-      if (_M_states._M_visited(__i))
-       return;
-
        const auto& __state = _M_nfa[__i];
-      // Every change on _M_cur_results and _M_current will be rolled back after
-      // finishing the recursion step.
-      switch (__state._M_opcode())
+
+      // Greedy.
+      if (!__state._M_neg)
         {
-       // _M_alt branch is "match once more", while _M_next is "get me out
-       // of this quantifier". Executing _M_next first or _M_alt first don't
-       // mean the same thing, and we need to choose the correct order under
-       // given greedy mode.
-       case _S_opcode_repeat:
-         {
-           // Greedy.
-           if (!__state._M_neg)
-             {
-               _M_rep_once_more(__match_mode, __i);
-               // If it's DFS executor and already accepted, we're done.
-               if (!__dfs_mode || !_M_has_sol)
-                 _M_dfs(__match_mode, __state._M_next);
-             }
-           else // Non-greedy mode
-             {
-               if (__dfs_mode)
-                 {
-                   // vice-versa.
-                   _M_dfs(__match_mode, __state._M_next);
-                   if (!_M_has_sol)
-                     _M_rep_once_more(__match_mode, __i);
-                 }
-               else
-                 {
-                   // DON'T attempt anything, because there's already another
-                   // state with higher priority accepted. This state cannot
-                   // be better by attempting its next node.
-                   if (!_M_has_sol)
-                     {
-                       _M_dfs(__match_mode, __state._M_next);
-                       // DON'T attempt anything if it's already accepted. An
-                       // accepted state *must* be better than a solution that
-                       // matches a non-greedy quantifier one more time.
-                       if (!_M_has_sol)
-                         _M_rep_once_more(__match_mode, __i);
-                     }
-                 }
-             }
-           }
-         break;
-       case _S_opcode_subexpr_begin:
-         {
-           auto& __res = _M_cur_results[__state._M_subexpr];
-           auto __back = __res.first;
-           __res.first = _M_current;
-           _M_dfs(__match_mode, __state._M_next);
-           __res.first = __back;
-         }
-         break;
-       case _S_opcode_subexpr_end:
-         {
-           auto& __res = _M_cur_results[__state._M_subexpr];
-           auto __back = __res;
-           __res.second = _M_current;
-           __res.matched = true;
-           _M_dfs(__match_mode, __state._M_next);
-           __res = __back;
-         }
-         break;
-       case _S_opcode_line_begin_assertion:
-         if (_M_at_begin())
+         _M_rep_once_more(__match_mode, __i);
+         // If it's DFS executor and already accepted, we're done.
+         if (!__dfs_mode || !_M_has_sol)
             _M_dfs(__match_mode, __state._M_next);
-         break;
-       case _S_opcode_line_end_assertion:
-         if (_M_at_end())
-           _M_dfs(__match_mode, __state._M_next);
-         break;
-       case _S_opcode_word_boundary:
-         if (_M_word_boundary() == !__state._M_neg)
-           _M_dfs(__match_mode, __state._M_next);
-         break;
-       // Here __state._M_alt offers a single start node for a sub-NFA.
-       // We recursively invoke our algorithm to match the sub-NFA.
-       case _S_opcode_subexpr_lookahead:
-         if (_M_lookahead(__state._M_alt) == !__state._M_neg)
-           _M_dfs(__match_mode, __state._M_next);
-         break;
-       case _S_opcode_match:
-         if (_M_current == _M_end)
-           break;
+       }
+      else // Non-greedy mode
+       {
           if (__dfs_mode)
             {
-             if (__state._M_matches(*_M_current))
-               {
-                 ++_M_current;
-                 _M_dfs(__match_mode, __state._M_next);
-                 --_M_current;
-               }
+             // vice-versa.
+             _M_dfs(__match_mode, __state._M_next);
+             if (!_M_has_sol)
+               _M_rep_once_more(__match_mode, __i);
             }
           else
-           if (__state._M_matches(*_M_current))
-             _M_states._M_queue(__state._M_next, _M_cur_results);
-         break;
-       // First fetch the matched result from _M_cur_results as __submatch;
-       // then compare it with
-       // (_M_current, _M_current + (__submatch.second - __submatch.first)).
-       // If matched, keep going; else just return and try another state.
-       case _S_opcode_backref:
-         {
-           __glibcxx_assert(__dfs_mode);
-           auto& __submatch = _M_cur_results[__state._M_backref_index];
-           if (!__submatch.matched)
-             break;
-           auto __last = _M_current;
-           for (auto __tmp = __submatch.first;
-                __last != _M_end && __tmp != __submatch.second;
-                ++__tmp)
-             ++__last;
-           if (_M_re._M_automaton->_M_traits.transform(__submatch.first,
-                                                       __submatch.second)
-               == _M_re._M_automaton->_M_traits.transform(_M_current, __last))
-             {
-               if (__last != _M_current)
-                 {
-                   auto __backup = _M_current;
-                   _M_current = __last;
-                   _M_dfs(__match_mode, __state._M_next);
-                   _M_current = __backup;
-                 }
-               else
-                 _M_dfs(__match_mode, __state._M_next);
-             }
-         }
-         break;
-       case _S_opcode_accept:
-         if (__dfs_mode)
             {
-             __glibcxx_assert(!_M_has_sol);
-             if (__match_mode == _Match_mode::_Exact)
-               _M_has_sol = _M_current == _M_end;
-             else
-               _M_has_sol = true;
-             if (_M_current == _M_begin
-                 && (_M_flags & regex_constants::match_not_null))
-               _M_has_sol = false;
-             if (_M_has_sol)
+             // DON'T attempt anything, because there's already another
+             // state with higher priority accepted. This state cannot
+             // be better by attempting its next node.
+             if (!_M_has_sol)
                 {
-                 if (_M_nfa._M_flags & regex_constants::ECMAScript)
-                   _M_results = _M_cur_results;
-                 else // POSIX
-                   {
-                     __glibcxx_assert(_M_states._M_get_sol_pos());
-                     // Here's POSIX's logic: match the longest one. However
-                     // we never know which one (lhs or rhs of "|") is longer
-                     // unless we try both of them and compare the results.
-                     // The member variable _M_sol_pos records the end
-                     // position of the last successful match. It's better
-                     // to be larger, because POSIX regex is always greedy.
-                     // TODO: This could be slow.
-                     if (*_M_states._M_get_sol_pos() == _BiIter()
-                         || std::distance(_M_begin,
-                                          *_M_states._M_get_sol_pos())
-                            < std::distance(_M_begin, _M_current))
-                       {
-                         *_M_states._M_get_sol_pos() = _M_current;
-                         _M_results = _M_cur_results;
-                       }
-                   }
+                 _M_dfs(__match_mode, __state._M_next);
+                 // DON'T attempt anything if it's already accepted. An
+                 // accepted state *must* be better than a solution that
+                 // matches a non-greedy quantifier one more time.
+                 if (!_M_has_sol)
+                   _M_rep_once_more(__match_mode, __i);
                 }
             }
-         else
+       }
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+
+      auto& __res = _M_cur_results[__state._M_subexpr];
+      auto __back = __res.first;
+      __res.first = _M_current;
+      _M_dfs(__match_mode, __state._M_next);
+      __res.first = __back;
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+
+      auto& __res = _M_cur_results[__state._M_subexpr];
+      auto __back = __res;
+      __res.second = _M_current;
+      __res.matched = true;
+      _M_dfs(__match_mode, __state._M_next);
+      __res = __back;
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_line_begin_assertion(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+      if (_M_at_begin())
+       _M_dfs(__match_mode, __state._M_next);
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_line_end_assertion(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+      if (_M_at_end())
+       _M_dfs(__match_mode, __state._M_next);
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_word_boundary(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+      if (_M_word_boundary() == !__state._M_neg)
+       _M_dfs(__match_mode, __state._M_next);
+    }
+
+  // Here __state._M_alt offers a single start node for a sub-NFA.
+  // We recursively invoke our algorithm to match the sub-NFA.
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_subexpr_lookahead(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+      if (_M_lookahead(__state._M_alt) == !__state._M_neg)
+       _M_dfs(__match_mode, __state._M_next);
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_match(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+
+      if (_M_current == _M_end)
+       return;
+      if (__dfs_mode)
+       {
+         if (__state._M_matches(*_M_current))
             {
-             if (_M_current == _M_begin
-                 && (_M_flags & regex_constants::match_not_null))
-               break;
-             if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end)
-               if (!_M_has_sol)
-                 {
-                   _M_has_sol = true;
-                   _M_results = _M_cur_results;
-                 }
+             ++_M_current;
+             _M_dfs(__match_mode, __state._M_next);
+             --_M_current;
             }
-         break;
-       case _S_opcode_alternative:
-         if (_M_nfa._M_flags & regex_constants::ECMAScript)
+       }
+      else
+       if (__state._M_matches(*_M_current))
+         _M_states._M_queue(__state._M_next, _M_cur_results);
+    }
+
+  // First fetch the matched result from _M_cur_results as __submatch;
+  // then compare it with
+  // (_M_current, _M_current + (__submatch.second - __submatch.first)).
+  // If matched, keep going; else just return and try another state.
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
+    {
+      __glibcxx_assert(__dfs_mode);
+
+      const auto& __state = _M_nfa[__i];
+      auto& __submatch = _M_cur_results[__state._M_backref_index];
+      if (!__submatch.matched)
+       return;
+      auto __last = _M_current;
+      for (auto __tmp = __submatch.first;
+          __last != _M_end && __tmp != __submatch.second;
+          ++__tmp)
+       ++__last;
+      if (_M_re._M_automaton->_M_traits.transform(__submatch.first,
+                                                 __submatch.second)
+         == _M_re._M_automaton->_M_traits.transform(_M_current, __last))
+       {
+         if (__last != _M_current)
             {
-             // TODO: Fix BFS support. It is wrong.
-             _M_dfs(__match_mode, __state._M_alt);
-             // Pick lhs if it matches. Only try rhs if it doesn't.
-             if (!_M_has_sol)
-               _M_dfs(__match_mode, __state._M_next);
+             auto __backup = _M_current;
+             _M_current = __last;
+             _M_dfs(__match_mode, __state._M_next);
+             _M_current = __backup;
             }
           else
+           _M_dfs(__match_mode, __state._M_next);
+       }
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_accept(_Match_mode __match_mode, _StateIdT __i)
+    {
+      if (__dfs_mode)
+       {
+         __glibcxx_assert(!_M_has_sol);
+         if (__match_mode == _Match_mode::_Exact)
+           _M_has_sol = _M_current == _M_end;
+         else
+           _M_has_sol = true;
+         if (_M_current == _M_begin
+             && (_M_flags & regex_constants::match_not_null))
+           _M_has_sol = false;
+         if (_M_has_sol)
             {
-             // Try both and compare the result.
-             // See "case _S_opcode_accept:" handling above.
-             _M_dfs(__match_mode, __state._M_alt);
-             auto __has_sol = _M_has_sol;
-             _M_has_sol = false;
-             _M_dfs(__match_mode, __state._M_next);
-             _M_has_sol |= __has_sol;
+             if (_M_nfa._M_flags & regex_constants::ECMAScript)
+               _M_results = _M_cur_results;
+             else // POSIX
+               {
+                 __glibcxx_assert(_M_states._M_get_sol_pos());
+                 // Here's POSIX's logic: match the longest one. However
+                 // we never know which one (lhs or rhs of "|") is longer
+                 // unless we try both of them and compare the results.
+                 // The member variable _M_sol_pos records the end
+                 // position of the last successful match. It's better
+                 // to be larger, because POSIX regex is always greedy.
+                 // TODO: This could be slow.
+                 if (*_M_states._M_get_sol_pos() == _BiIter()
+                     || std::distance(_M_begin,
+                                      *_M_states._M_get_sol_pos())
+                        < std::distance(_M_begin, _M_current))
+                   {
+                     *_M_states._M_get_sol_pos() = _M_current;
+                     _M_results = _M_cur_results;
+                   }
+               }
             }
-         break;
+       }
+      else
+       {
+         if (_M_current == _M_begin
+             && (_M_flags & regex_constants::match_not_null))
+           return;
+         if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end)
+           if (!_M_has_sol)
+             {
+               _M_has_sol = true;
+               _M_results = _M_cur_results;
+             }
+       }
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+
+      if (_M_nfa._M_flags & regex_constants::ECMAScript)
+       {
+         // TODO: Fix BFS support. It is wrong.
+         _M_dfs(__match_mode, __state._M_alt);
+         // Pick lhs if it matches. Only try rhs if it doesn't.
+         if (!_M_has_sol)
+           _M_dfs(__match_mode, __state._M_next);
+       }
+      else
+       {
+         // Try both and compare the result.
+         // See "case _S_opcode_accept:" handling above.
+         _M_dfs(__match_mode, __state._M_alt);
+         auto __has_sol = _M_has_sol;
+         _M_has_sol = false;
+         _M_dfs(__match_mode, __state._M_next);
+         _M_has_sol |= __has_sol;
+       }
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_dfs(_Match_mode __match_mode, _StateIdT __i)
+    {
+      if (_M_states._M_visited(__i))
+       return;
+
+      switch (_M_nfa[__i]._M_opcode())
+       {
+       case _S_opcode_repeat:
+         _M_handle_repeat(__match_mode, __i); break;
+       case _S_opcode_subexpr_begin:
+         _M_handle_subexpr_begin(__match_mode, __i); break;
+       case _S_opcode_subexpr_end:
+         _M_handle_subexpr_end(__match_mode, __i); break;
+       case _S_opcode_line_begin_assertion:
+         _M_handle_line_begin_assertion(__match_mode, __i); break;
+       case _S_opcode_line_end_assertion:
+         _M_handle_line_end_assertion(__match_mode, __i); break;
+       case _S_opcode_word_boundary:
+         _M_handle_word_boundary(__match_mode, __i); break;
+       case _S_opcode_subexpr_lookahead:
+         _M_handle_subexpr_lookahead(__match_mode, __i); break;
+       case _S_opcode_match:
+         _M_handle_match(__match_mode, __i); break;
+       case _S_opcode_backref:
+         _M_handle_backref(__match_mode, __i); break;
+       case _S_opcode_accept:
+         _M_handle_accept(__match_mode, __i); break;
+       case _S_opcode_alternative:
+         _M_handle_alternative(__match_mode, __i); break;
         default:
           __glibcxx_assert(false);
         }
author	Tim Shen <timshen@google.com>
	Mon, 22 Aug 2016 19:50:15 +0000 (19:50 +0000)
committer	Tim Shen <timshen@gcc.gnu.org>
	Mon, 22 Aug 2016 19:50:15 +0000 (19:50 +0000)
libstdc++-v3/ChangeLog		patch \| blob \| history
libstdc++-v3/include/bits/regex_executor.h		patch \| blob \| history
libstdc++-v3/include/bits/regex_executor.tcc		patch \| blob \| history