regex_automaton.h (_State_base, _State<>): Remove _TraitsT dependency from _State...
authorTim Shen <timshen@google.com>
Thu, 30 Jul 2015 10:35:45 +0000 (10:35 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Thu, 30 Jul 2015 10:35:45 +0000 (10:35 +0000)
* include/bits/regex_automaton.h (_State_base, _State<>):
Remove _TraitsT dependency from _State<>; Make matcher member
into the union to reduce struct size.
* include/bits/regex_automaton.tcc (_State_base<>::_M_print,
_State_base<>::_M_dot, _StateSeq<>::_M_clone):
Adjust to fit the interface. Factor out common parts in
_M_clone as _State<>::_M_has_alt.
* include/bits/regex_executor.h (_Executer<>::_M_lookahead):
Only pass state id instead of the whole state.
* include/bits/regex_executor.tcc (_Executer<>::_M_dfs,
_Executer<>::_M_lookahead): Adjust to fit the interface.
* include/std/regex: Include <ext/aligned_buffer.h>

From-SVN: r226395

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex_automaton.h
libstdc++-v3/include/bits/regex_automaton.tcc
libstdc++-v3/include/bits/regex_executor.h
libstdc++-v3/include/bits/regex_executor.tcc
libstdc++-v3/include/std/regex

index fc4eca92bdf8a9016165e128aa3c7aad3374ee1c..806d3d9bab90209428b9710c3e2e0ab72cfaca4b 100644 (file)
@@ -1,3 +1,18 @@
+2015-07-30  Tim Shen  <timshen@google.com>
+
+       * include/bits/regex_automaton.h (_State_base, _State<>):
+       Remove _TraitsT dependency from _State<>; Make matcher member
+       into the union to reduce struct size.
+       * include/bits/regex_automaton.tcc (_State_base<>::_M_print,
+       _State_base<>::_M_dot, _StateSeq<>::_M_clone):
+       Adjust to fit the interface. Factor out common parts in
+       _M_clone as _State<>::_M_has_alt.
+       * include/bits/regex_executor.h (_Executer<>::_M_lookahead):
+       Only pass state id instead of the whole state.
+       * include/bits/regex_executor.tcc (_Executer<>::_M_dfs,
+       _Executer<>::_M_lookahead): Adjust to fit the interface.
+       * include/std/regex: Include <ext/aligned_buffer.h>
+
 2015-07-30  Jonathan Wakely  <jwakely@redhat.com>
 
        * include/experimental/any (any::operator=(const any&)): Check for
index fc0eb410013231e02d1552439da6cdd8b945b757..b6ab3071ba777d1978ae56b4cb8024aea0bbdee5 100644 (file)
@@ -72,7 +72,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   struct _State_base
   {
+  protected:
     _Opcode      _M_opcode;           // type of outgoing transition
+
+  public:
     _StateIdT    _M_next;             // outgoing transition
     union // Since they are mutually exclusive.
     {
@@ -87,16 +90,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        // quantifiers (ungreedy if set true)
        bool       _M_neg;
       };
+      // For _S_opcode_match
+      __gnu_cxx::__aligned_membuf<_Matcher<char>> _M_matcher_storage;
     };
 
+  protected:
     explicit _State_base(_Opcode __opcode)
     : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
     { }
 
-  protected:
-    ~_State_base() = default;
-
   public:
+    bool
+    _M_has_alt()
+    {
+      return _M_opcode == _S_opcode_alternative
+       || _M_opcode == _S_opcode_repeat
+       || _M_opcode == _S_opcode_subexpr_lookahead;
+    }
+
 #ifdef _GLIBCXX_DEBUG
     std::ostream&
     _M_print(std::ostream& ostr) const;
@@ -107,14 +118,67 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
   };
 
-  template<typename _TraitsT>
+  template<typename _Char_type>
     struct _State : _State_base
     {
-      typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
+      typedef _Matcher<_Char_type> _MatcherT;
+      static_assert(sizeof(_MatcherT) == sizeof(_Matcher<char>),
+                   "std::function<bool(T)> has the same size as "
+                   "std::function<bool(char)>");
+      static_assert(alignof(_MatcherT) == alignof(_Matcher<char>),
+                   "std::function<bool(T)> has the same alignment as "
+                   "std::function<bool(char)>");
+
+      explicit
+      _State(_Opcode __opcode) : _State_base(__opcode)
+      {
+       if (_M_opcode() == _S_opcode_match)
+         new (this->_M_matcher_storage._M_addr()) _MatcherT();
+      }
 
-      _MatcherT      _M_matches;        // for _S_opcode_match
+      _State(const _State& __rhs) : _State_base(__rhs)
+      {
+       if (__rhs._M_opcode() == _S_opcode_match)
+         new (this->_M_matcher_storage._M_addr())
+           _MatcherT(__rhs._M_get_matcher());
+      }
 
-      explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
+      _State(_State&& __rhs) : _State_base(__rhs)
+      {
+       if (__rhs._M_opcode() == _S_opcode_match)
+         new (this->_M_matcher_storage._M_addr())
+           _MatcherT(std::move(__rhs._M_get_matcher()));
+      }
+
+      _State&
+      operator=(const _State&) = delete;
+
+      ~_State()
+      {
+       if (_M_opcode() == _S_opcode_match)
+         _M_get_matcher().~_MatcherT();
+      }
+
+      // Since correct ctor and dtor rely on _M_opcode, it's better not to
+      // change it over time.
+      _Opcode
+      _M_opcode() const
+      { return _State_base::_M_opcode; }
+
+      bool
+      _M_matches(_Char_type __char) const
+      { return _M_get_matcher()(__char); }
+
+      _MatcherT&
+      _M_get_matcher()
+      { return *static_cast<_MatcherT*>(this->_M_matcher_storage._M_addr()); }
+
+      const _MatcherT&
+      _M_get_matcher() const
+      {
+       return *static_cast<const _MatcherT*>(
+           this->_M_matcher_storage._M_addr());
+      }
     };
 
   struct _NFA_base
@@ -155,10 +219,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template<typename _TraitsT>
     struct _NFA
-    : _NFA_base, std::vector<_State<_TraitsT>>
+    : _NFA_base, std::vector<_State<typename _TraitsT::char_type>>
     {
-      typedef _State<_TraitsT>                         _StateT;
-      typedef _Matcher<typename _TraitsT::char_type>   _MatcherT;
+      typedef typename _TraitsT::char_type     _Char_type;
+      typedef _State<_Char_type>               _StateT;
+      typedef _Matcher<_Char_type>             _MatcherT;
 
       _NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags)
       : _NFA_base(__flags)
@@ -202,7 +267,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_insert_matcher(_MatcherT __m)
       {
        _StateT __tmp(_S_opcode_match);
-       __tmp._M_matches = std::move(__m);
+       __tmp._M_get_matcher() = std::move(__m);
        return _M_insert_state(std::move(__tmp));
       }
 
index 72fe978d68c196b846750321e3ddf25538411580..cecc4074f18115861c5dbd5f72246cf9a54a082f 100644 (file)
@@ -174,13 +174,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
       for (auto& __it : *this)
        {
-         while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode
+         while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode()
                 == _S_opcode_dummy)
            __it._M_next = (*this)[__it._M_next]._M_next;
-         if (__it._M_opcode == _S_opcode_alternative
-             || __it._M_opcode == _S_opcode_repeat
-             || __it._M_opcode == _S_opcode_subexpr_lookahead)
-           while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
+         if (__it._M_has_alt())
+           while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode()
                   == _S_opcode_dummy)
              __it._M_alt = (*this)[__it._M_alt]._M_next;
        }
@@ -200,11 +198,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          __stack.pop();
          auto __dup = _M_nfa[__u];
          // _M_insert_state() never return -1
-         auto __id = _M_nfa._M_insert_state(__dup);
+         auto __id = _M_nfa._M_insert_state(std::move(__dup));
          __m[__u] = __id;
-         if (__dup._M_opcode == _S_opcode_alternative
-             || __dup._M_opcode == _S_opcode_repeat
-             || __dup._M_opcode == _S_opcode_subexpr_lookahead)
+         if (__dup._M_has_alt())
            if (__dup._M_alt != _S_invalid_state_id
                && __m.count(__dup._M_alt) == 0)
              __stack.push(__dup._M_alt);
@@ -223,9 +219,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
              _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next) > 0);
              __ref._M_next = __m[__ref._M_next];
            }
-         if (__ref._M_opcode == _S_opcode_alternative
-             || __ref._M_opcode == _S_opcode_repeat
-             || __ref._M_opcode == _S_opcode_subexpr_lookahead)
+         if (__ref._M_has_alt())
            if (__ref._M_alt != _S_invalid_state_id)
              {
                _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt) > 0);
index 404f30bf1e13a2a5eb3d4f10aab794a2074fe5c5..f3f8876f41f05642f8760790eab5db70f47fdd22 100644 (file)
@@ -148,7 +148,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_word_boundary() const;
 
       bool
-      _M_lookahead(_State<_TraitsT> __state);
+      _M_lookahead(_StateIdT __next);
 
        // Holds additional information used in BFS-mode.
       template<typename _SearchMode, typename _ResultsVec>
index 9b5c1c672d28cf2205ec28e4f88549269dfce579..3fd17f69e2c4b2175d28ffdc0fe341a989e2e1bf 100644 (file)
@@ -145,11 +145,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _BiIter, typename _Alloc, typename _TraitsT,
           bool __dfs_mode>
     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_lookahead(_State<_TraitsT> __state)
+    _M_lookahead(_StateIdT __next)
     {
       _ResultsVec __what(_M_cur_results.size());
       _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
-      __sub._M_states._M_start = __state._M_alt;
+      __sub._M_states._M_start = __next;
       if (__sub._M_search_from_first())
        {
          for (size_t __i = 0; __i < __what.size(); __i++)
@@ -203,7 +203,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       const auto& __state = _M_nfa[__i];
       // Every change on _M_cur_results and _M_current will be rolled back after
       // finishing the recursion step.
-      switch (__state._M_opcode)
+      switch (__state._M_opcode())
        {
        // _M_alt branch is "match once more", while _M_next is "get me out
        // of this quantifier". Executing _M_next first or _M_alt first don't
@@ -280,7 +280,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        // Here __state._M_alt offers a single start node for a sub-NFA.
        // We recursively invoke our algorithm to match the sub-NFA.
        case _S_opcode_subexpr_lookahead:
-         if (_M_lookahead(__state) == !__state._M_neg)
+         if (_M_lookahead(__state._M_alt) == !__state._M_neg)
            _M_dfs(__match_mode, __state._M_next);
          break;
        case _S_opcode_match:
index 3dff372d724caa8107c73215e4dd47e4371603fc..b6fe4c74051db748ff47db7fa0814bfc8e77c90e 100644 (file)
@@ -53,6 +53,7 @@
 #include <map>
 #include <cstring>
 
+#include <ext/aligned_buffer.h>
 #include <bits/regex_constants.h>
 #include <bits/regex_error.h>
 #include <bits/regex_automaton.h>