Implement bracket expression.
authorTim Shen <timshen91@gmail.com>
Sat, 3 Aug 2013 14:32:54 +0000 (14:32 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Sat, 3 Aug 2013 14:32:54 +0000 (14:32 +0000)
2013-08-03  Tim Shen  <timshen91@gmail.com>

Implement bracket expression.
* include/bits/regex.h: Remove constexpr from "|=", etc.
* include/bits/regex_compiler.h: Parse bracket expression.
* include/bits/regex_nfa.h: _Comparator and _BracketMatcher(old
_RangeMatcher).
* include/bits/regex_nfa.tcc: Implement them.
* testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
from regex_search to regex_match.
* testsuite/28_regex/algorithms/regex_match/extended/
cstring_bracket_01.cc: New.

From-SVN: r201465

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex.h
libstdc++-v3/include/bits/regex_compiler.h
libstdc++-v3/include/bits/regex_nfa.h
libstdc++-v3/include/bits/regex_nfa.tcc
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc [new file with mode: 0644]

index 8c820956523eac9125f8e11b6ce81515d848a112..a1a690dd82fe3d1b8b16eb3c0db73ffd83934f19 100644 (file)
@@ -1,3 +1,16 @@
+2013-08-03  Tim Shen  <timshen91@gmail.com>
+
+       Implement bracket expression.
+       * include/bits/regex.h: Remove constexpr from "|=", etc.
+       * include/bits/regex_compiler.h: Parse bracket expression.
+       * include/bits/regex_nfa.h: _Comparator and _BracketMatcher(old
+       _RangeMatcher).
+       * include/bits/regex_nfa.tcc: Implement them.
+       * testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
+       from regex_search to regex_match.
+       * testsuite/28_regex/algorithms/regex_match/extended/
+       cstring_bracket_01.cc: New.
+
 2013-08-02  Paolo Carlini  <paolo.carlini@oracle.com>
 
        * include/debug/functions.h (__foreign_iterator_aux4):
index c1f65339848e2df5d9832442506d5340a53cbb26..fd8f1344ff20ad10fb970c347ff92164648d69b5 100644 (file)
@@ -95,15 +95,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
           operator~() const
           { return _RegexMask(~_M_base, ~_M_extended); }
 
-          constexpr _RegexMask&
+          _RegexMask&
           operator&=(_RegexMask __other)
           { return *this = (*this) & __other; }
 
-          constexpr _RegexMask&
+          _RegexMask&
           operator|=(_RegexMask __other)
           { return *this = (*this) | __other; }
 
-          constexpr _RegexMask&
+          _RegexMask&
           operator^=(_RegexMask __other)
           { return *this = (*this) ^ __other; }
 
@@ -228,7 +228,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
               __fctyp.tolower(&*__v.begin(), &*__v.end());
               return this->transform(&*__v.begin(), &*__v.end());
             }
-          __catch (...)
+          __catch (std::bad_cast)
             {
             }
           return string_type();
@@ -519,7 +519,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         };
 
       std::string __s(__last - __first, '?');
-      string_type a(__first, __last);
       __fctyp.narrow(__first, __last, '?', &*__s.begin());
 
       for (unsigned int __i = 0; *__collatenames[__i]; __i++)
index badea8c641e29c6483cbedc42b56e40c7d5626fb..2cc9da1a110330fb352c945bce137687114cf231 100644 (file)
@@ -44,9 +44,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   {
     typedef unsigned int _StateT;
 
-    static constexpr _StateT _S_state_at_start    = 1 << 0;
-    static constexpr _StateT _S_state_in_brace    = 1 << 2;
-    static constexpr _StateT _S_state_in_bracket  = 1 << 3;
+    static constexpr _StateT _S_state_in_brace    = 1 << 0;
+    static constexpr _StateT _S_state_in_bracket  = 1 << 1;
 
     virtual ~_Scanner_base() { };
   };
@@ -77,8 +76,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _S_token_anychar,
        _S_token_backref,
        _S_token_bracket_begin,
+       _S_token_bracket_inverse_begin,
        _S_token_bracket_end,
-       _S_token_inverse_class,
        _S_token_char_class_name,
        _S_token_closure0,
        _S_token_closure1,
@@ -97,7 +96,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _S_token_opt,
        _S_token_or,
        _S_token_ord_char,
-       _S_token_quoted_char,
        _S_token_subexpr_begin,
        _S_token_subexpr_end,
        _S_token_word_begin,
@@ -108,7 +106,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
               std::locale __loc)
       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
-        _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
+        _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
       { _M_advance(); }
 
       void
@@ -219,9 +217,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        }
       else if (__c == _M_ctype.widen('['))
        {
-         _M_curToken = _S_token_bracket_begin;
-         _M_state |= (_S_state_in_bracket | _S_state_at_start);
-         ++_M_current;
+          if (*++_M_current == _M_ctype.widen('^'))
+            {
+              _M_curToken = _S_token_bracket_inverse_begin;
+              ++_M_current;
+            }
+          else
+            _M_curToken = _S_token_bracket_begin;
+         _M_state |= _S_state_in_bracket;
          return;
        }
       else if (__c == _M_ctype.widen('\\'))
@@ -304,14 +307,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _Scanner<_InputIterator>::
     _M_scan_in_bracket()
     {
-      if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
-       {
-         _M_curToken = _S_token_inverse_class;
-         _M_state &= ~_S_state_at_start;
-         ++_M_current;
-         return;
-       }
-      else if (*_M_current == _M_ctype.widen('['))
+      if (*_M_current == _M_ctype.widen('['))
        {
          ++_M_current;
          if (_M_current == _M_end)
@@ -347,21 +343,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        }
       else if (*_M_current == _M_ctype.widen(']'))
        {
-         if (!(_M_flags & regex_constants::ECMAScript)
-             || !(_M_state & _S_state_at_start))
-           {
-             // special case: only if  _not_ chr first after
-             // '[' or '[^' and if not ECMAscript
-             _M_curToken = _S_token_bracket_end;
-             ++_M_current;
-             return;
-           }
+          _M_curToken = _S_token_bracket_end;
+          _M_state &= ~_S_state_in_bracket;
+          ++_M_current;
+          return;
        }
+      else if (*_M_current == _M_ctype.widen('\\'))
+        {
+         _M_eat_escape();
+         return;
+        }
       _M_curToken = _S_token_collelem_single;
       _M_curValue.assign(1, *_M_current);
       ++_M_current;
     }
 
+  // TODO implement it.
   template<typename _InputIterator>
     void
     _Scanner<_InputIterator>::
@@ -463,11 +460,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          _M_curToken = _S_token_backref;
          _M_curValue.assign(1, __c);
        }
+      else if (_M_state & _S_state_in_bracket)
+        {
+          if (__c == _M_ctype.widen('-')
+              || __c == _M_ctype.widen('[')
+              || __c == _M_ctype.widen(']'))
+            {
+              _M_curToken = _S_token_ord_char;
+              _M_curValue.assign(1, __c);
+            }
+          else if ((_M_flags & regex_constants::ECMAScript)
+                   && __c == _M_ctype.widen('b'))
+            {
+              _M_curToken = _S_token_ord_char;
+              _M_curValue.assign(1, _M_ctype.widen(' '));
+            }
+          else
+            __throw_regex_error(regex_constants::error_escape);
+        }
       else
        __throw_regex_error(regex_constants::error_escape);
     }
 
-
   // Eats a character class or throwns an exception.
   // current point to ':' delimiter on entry, char after ']' on return
   template<typename _InputIterator>
@@ -549,6 +563,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        case _S_token_bracket_begin:
          ostr << "bracket-begin\n";
          break;
+       case _S_token_bracket_inverse_begin:
+          ostr << "bracket-inverse-begin\n";
+          break;
        case _S_token_bracket_end:
          ostr << "bracket-end\n";
          break;
@@ -606,9 +623,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        case _S_token_ord_char:
          ostr << "ordinary character: \"" << _M_value() << "\"\n";
          break;
-       case _S_token_quoted_char:
-         ostr << "quoted char\n";
-         break;
        case _S_token_subexpr_begin:
          ostr << "subexpr begin\n";
          break;
@@ -624,6 +638,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        case _S_token_unknown:
          ostr << "-- unknown token --\n";
          break;
+        default:
+          _GLIBCXX_DEBUG_ASSERT(false);
       }
       return ostr;
     }
@@ -650,7 +666,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       typedef _Scanner<_InIter>                              _ScannerT;
       typedef typename _ScannerT::_TokenT                    _TokenT;
       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
-      typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
+      typedef _BracketMatcher<_InIter, _TraitsT>             _BMatcherT;
 
       // accepts a specific token or returns false.
       bool
@@ -659,7 +675,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_disjunction();
 
-      bool
+      void
       _M_alternative();
 
       bool
@@ -668,7 +684,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       bool
       _M_assertion();
 
-      bool
+      void
       _M_quantifier();
 
       bool
@@ -678,31 +694,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_bracket_expression();
 
       bool
-      _M_bracket_list(_RMatcherT& __matcher);
+      _M_bracket_list(_BMatcherT& __matcher);
 
       bool
-      _M_follow_list(_RMatcherT& __matcher);
+      _M_follow_list(_BMatcherT& __matcher);
 
-      bool
-      _M_follow_list2(_RMatcherT& __matcher);
-
-      bool
-      _M_expression_term(_RMatcherT& __matcher);
+      void
+      _M_expression_term(_BMatcherT& __matcher);
 
       bool
-      _M_range_expression(_RMatcherT& __matcher);
+      _M_range_expression(_BMatcherT& __matcher);
 
       bool
-      _M_start_range(_RMatcherT& __matcher);
+      _M_start_range(_BMatcherT& __matcher);
 
       bool
-      _M_collating_symbol(_RMatcherT& __matcher);
+      _M_collating_symbol(_BMatcherT& __matcher);
 
       bool
-      _M_equivalence_class(_RMatcherT& __matcher);
+      _M_equivalence_class(_BMatcherT& __matcher);
 
       bool
-      _M_character_class(_RMatcherT& __matcher);
+      _M_character_class(_BMatcherT& __matcher);
 
       int
       _M_cur_int_value(int __radix);
@@ -712,6 +725,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StringT       _M_cur_value;
       _Nfa           _M_state_store;
       _StackT        _M_stack;
+      _FlagT         _M_flags;
     };
 
   template<typename _InIter, typename _TraitsT>
@@ -719,7 +733,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
              _Compiler<_InIter, _TraitsT>::_FlagT __flags)
     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
-      _M_state_store(__flags)
+      _M_state_store(__flags), _M_flags(__flags)
     {
       typedef _StartTagger<_InIter, _TraitsT> _Start;
       typedef _EndTagger<_InIter, _TraitsT> _End;
@@ -743,8 +757,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { 
       if (token == _M_scanner._M_token())
        {
-         _M_cur_value = _M_scanner._M_value();
-         _M_scanner._M_advance();
+          _M_cur_value = _M_scanner._M_value();
+          _M_scanner._M_advance();
          return true;
        }
       return false;
@@ -766,7 +780,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<typename _InIter, typename _TraitsT>
-    bool
+    void
     _Compiler<_InIter, _TraitsT>::
     _M_alternative()
     {
@@ -780,9 +794,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
              _M_stack.pop();
            }
          _M_stack.push(__re);
-         return true;
        }
-      return false;
     }
 
   template<typename _InIter, typename _TraitsT>
@@ -829,7 +841,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<typename _InIter, typename _TraitsT>
-    bool
+    void
     _Compiler<_InIter, _TraitsT>::
     _M_quantifier()
     {
@@ -841,7 +853,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          __r._M_append(__r._M_front());
          _M_stack.pop();
          _M_stack.push(__r);
-         return true;
+         return;
        }
       if (_M_match_token(_ScannerT::_S_token_closure1))
        {
@@ -852,7 +864,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                        _M_insert_alt(_S_invalid_state_id,
                                      _M_stack.top()._M_front()));
          _M_stack.top()._M_append(__r);
-         return true;
+         return;
        }
       if (_M_match_token(_ScannerT::_S_token_opt))
        {
@@ -861,7 +873,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          _StateSeq __r(_M_stack.top(), -1);
          _M_stack.pop();
          _M_stack.push(__r);
-         return true;
+         return;
        }
       if (_M_match_token(_ScannerT::_S_token_interval_begin))
        {
@@ -897,9 +909,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
              }
          if (!_M_match_token(_ScannerT::_S_token_interval_end))
            __throw_regex_error(regex_constants::error_brace);
-         return true;
+         return;
        }
-      return false;
     }
 
   template<typename _InIter, typename _TraitsT>
@@ -922,15 +933,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        {
          _M_stack.push(_StateSeq(_M_state_store,
                                   _M_state_store._M_insert_matcher
-                                  (_CMatcher(_M_cur_value[0], _M_traits))));
-         return true;
-       }
-      if (_M_match_token(_ScannerT::_S_token_quoted_char))
-       {
-         // note that in the ECMA grammar, this case covers backrefs.
-         _M_stack.push(_StateSeq(_M_state_store,
-                                 _M_state_store._M_insert_matcher
-                                 (_CMatcher(_M_cur_value[0], _M_traits))));
+                                  (_CMatcher(_M_cur_value[0], _M_flags, _M_traits))));
          return true;
        }
       if (_M_match_token(_ScannerT::_S_token_backref))
@@ -966,124 +969,74 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _Compiler<_InIter, _TraitsT>::
     _M_bracket_expression()
     {
-      if (_M_match_token(_ScannerT::_S_token_bracket_begin))
-       {
-         _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
-                              _M_traits);
-         if (!_M_bracket_list(__matcher)
-             || !_M_match_token(_ScannerT::_S_token_bracket_end))
-           __throw_regex_error(regex_constants::error_brack);
-         _M_stack.push(_StateSeq(_M_state_store,
-                                 _M_state_store._M_insert_matcher(__matcher)));
-         return true;
-       }
-      return false;
-    }
-
-  // If the dash is the last character in the bracket expression, it is not
-  // special.
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_bracket_list(_RMatcherT& __matcher)
-    {
-      if (_M_follow_list(__matcher))
-       {
-         if (_M_match_token(_ScannerT::_S_token_dash))
-           __matcher._M_add_char(_M_cur_value[0]);
-         return true;
-       }
-      return false;
-    }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_follow_list(_RMatcherT& __matcher)
-    { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_follow_list2(_RMatcherT& __matcher)
-    {
-      if (_M_expression_term(__matcher))
-       return _M_follow_list2(__matcher);
+      bool __inverse =
+        _M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
+      if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
+        return false;
+      _BMatcherT __matcher( __inverse, _M_flags, _M_traits);
+      // special case: only if  _not_ chr first after
+      // '[' or '[^' or if ECMAscript
+      if (!_M_bracket_list(__matcher) // list is empty
+          && !(_M_flags & regex_constants::ECMAScript))
+        __throw_regex_error(regex_constants::error_brack);
+      _M_stack.push(_StateSeq(_M_state_store,
+                              _M_state_store._M_insert_matcher(__matcher)));
       return true;
     }
 
   template<typename _InIter, typename _TraitsT>
-    bool
+    bool // list is non-empty
     _Compiler<_InIter, _TraitsT>::
-    _M_expression_term(_RMatcherT& __matcher)
+    _M_bracket_list(_BMatcherT& __matcher)
     {
-      return (_M_collating_symbol(__matcher)
-             || _M_character_class(__matcher)
-             || _M_equivalence_class(__matcher)
-             || (_M_start_range(__matcher)
-                 && _M_range_expression(__matcher)));
-    }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_range_expression(_RMatcherT& __matcher)
-    {
-      if (!_M_collating_symbol(__matcher))
-       if (!_M_match_token(_ScannerT::_S_token_dash))
-         __throw_regex_error(regex_constants::error_range);
-      __matcher._M_make_range();
+      if (_M_match_token(_ScannerT::_S_token_bracket_end))
+        return false;
+      _M_expression_term(__matcher);
+      _M_bracket_list(__matcher);
       return true;
     }
 
   template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_start_range(_RMatcherT& __matcher)
-    { return _M_match_token(_ScannerT::_S_token_dash); }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
+    void
     _Compiler<_InIter, _TraitsT>::
-    _M_collating_symbol(_RMatcherT& __matcher)
+    _M_expression_term(_BMatcherT& __matcher)
     {
-      if (_M_match_token(_ScannerT::_S_token_collelem_single))
-       {
-         __matcher._M_add_char(_M_cur_value[0]);
-         return true;
-       }
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
        {
          __matcher._M_add_collating_element(_M_cur_value);
-         return true;
+         return;
        }
-      return false;
-    }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_equivalence_class(_RMatcherT& __matcher)
-    {
       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
        {
          __matcher._M_add_equivalence_class(_M_cur_value);
-         return true;
+         return;
        }
-      return false;
-    }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_character_class(_RMatcherT& __matcher)
-    {
       if (_M_match_token(_ScannerT::_S_token_char_class_name))
        {
          __matcher._M_add_character_class(_M_cur_value);
-         return true;
+         return;
        }
-      return false;
+      if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
+        {
+          auto __ch = _M_cur_value[0];
+          if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
+            {
+              // If the dash is the last character in the bracket expression,
+              // it is not special.
+              if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
+                __matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
+              else // [a-z]
+                {
+                  if (!_M_match_token(_ScannerT::_S_token_collelem_single))
+                    __throw_regex_error(regex_constants::error_range);
+                  __matcher._M_make_range(__ch, _M_cur_value[0]);
+                }
+            }
+          else // [a]
+            __matcher._M_add_char(__ch);
+          return;
+        }
+      __throw_regex_error(regex_constants::error_brack);
     }
 
   template<typename _InIter, typename _TraitsT>
index 1bae1b78ed4c9d1b63a9925975d6ce5c5942e72f..b4ac452fbaa3db4f15c77a3815195fb54cbe5f57 100644 (file)
@@ -129,6 +129,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       int       _M_index;
     };
 
+  // TODO For now we use an all-in-one comparator. In the future there may be
+  // optimizations based on regex_traits::translate and regex_transform.
+  template<typename _InIterT, typename _TraitsT>
+    struct _Comparator
+    {
+      typedef regex_constants::syntax_option_type _FlagT;
+      typedef typename _TraitsT::char_type        _CharT;
+      typedef std::basic_string<_CharT>           _StringT;
+
+      _Comparator(_FlagT __flags, const _TraitsT& __traits)
+      : _M_flags(__flags), _M_traits(__traits)
+      { }
+
+      bool
+      _M_equ(_CharT __a, _CharT __b) const;
+
+      bool
+      _M_le(_CharT __a, _CharT __b) const;
+
+      _FlagT                              _M_flags;
+      _TraitsT                            _M_traits;
+    };
+
   /// Indicates if current state matches cursor current.
   typedef std::function<bool (const _PatternCursor&)> _Matcher;
 
@@ -140,12 +163,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// Matches a single character
   template<typename _InIterT, typename _TraitsT>
     struct _CharMatcher
+    : public _Comparator<_InIterT, _TraitsT>
     {
-      typedef typename _TraitsT::char_type char_type;
+      typedef _Comparator<_InIterT, _TraitsT>     _BaseT;
+      typedef typename _TraitsT::char_type        _CharT;
+      typedef regex_constants::syntax_option_type _FlagT;
 
       explicit
-      _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
-      : _M_traits(__t), _M_c(_M_traits.translate(__c))
+      _CharMatcher(_CharT __c, _FlagT __flags, const _TraitsT& __t)
+      : _BaseT(__flags, __t), _M_c(__c)
       { }
 
       bool
@@ -153,55 +179,79 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
        typedef const _SpecializedCursor<_InIterT>& _CursorT;
        _CursorT __c = static_cast<_CursorT>(__pc);
-       return _M_traits.translate(__c._M_current()) == _M_c;
+        return this->_M_equ(__c._M_current(), _M_c);
       }
 
-      const _TraitsT& _M_traits;
-      char_type       _M_c;
+      _CharT       _M_c;
     };
 
   /// Matches a character range (bracket expression)
   template<typename _InIterT, typename _TraitsT>
-    struct _RangeMatcher
+    struct _BracketMatcher
+    : public _Comparator<_InIterT, _TraitsT>
     {
-      typedef typename _TraitsT::char_type _CharT;
-      typedef std::basic_string<_CharT>    _StringT;
+      typedef _Comparator<_InIterT, _TraitsT>     _BaseT;
+      typedef typename _TraitsT::char_class_type  _CharClassT;
+      typedef regex_constants::syntax_option_type _FlagT;
+      typedef typename _TraitsT::char_type        _CharT;
+      typedef std::basic_string<_CharT>           _StringT;
 
       explicit
-      _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
-      : _M_traits(__t), _M_is_non_matching(__is_non_matching)
+      _BracketMatcher(bool __is_non_matching,
+                    _FlagT __flags,
+                    const _TraitsT& __t)
+      : _BaseT(__flags, __t), _M_flags(__flags), _M_traits(__t),
+      _M_is_non_matching(__is_non_matching), _M_class_set(0)
       { }
 
       bool
-      operator()(const _PatternCursor& __pc) const
-      {
-       typedef const _SpecializedCursor<_InIterT>& _CursorT;
-       _CursorT __c = static_cast<_CursorT>(__pc);
-       return true;
-      }
+      operator()(const _PatternCursor& __pc) const;
 
       void
       _M_add_char(_CharT __c)
-      { }
+      { _M_char_set.push_back(__c); }
 
       void
       _M_add_collating_element(const _StringT& __s)
-      { }
+      {
+        auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
+        if (__st.empty())
+          __throw_regex_error(regex_constants::error_collate);
+        // TODO: digraph
+        _M_char_set.push_back(__st[0]);
+      }
 
       void
       _M_add_equivalence_class(const _StringT& __s)
-      { }
+      {
+        _M_add_character_class(
+          _M_traits.transform_primary(&*__s.begin(), &*__s.end()));
+      }
 
       void
       _M_add_character_class(const _StringT& __s)
-      { }
+      {
+        auto __st = _M_traits.lookup_classname(
+          &*__s.begin(), &*__s.end(), (_M_flags & regex_constants::icase));
+        if (__st == 0)
+          __throw_regex_error(regex_constants::error_ctype);
+        _M_class_set |= __st;
+      }
 
       void
-      _M_make_range()
-      { }
+      _M_make_range(_CharT __l, _CharT __r)
+      {
+        if (!this->_M_le(__l, __r))
+          __throw_regex_error(regex_constants::error_range);
+        _M_range_set.push_back(make_pair(__l, __r));
+      }
 
-      const _TraitsT& _M_traits;
-      bool            _M_is_non_matching;
+      _FlagT                              _M_flags;
+      _TraitsT                            _M_traits;
+      bool                                _M_is_non_matching;
+      std::vector<_CharT>                 _M_char_set;
+      std::vector<pair<_CharT, _CharT>>   _M_range_set;
+      _CharClassT                         _M_class_set;
     };
 
   /// Identifies a state in the NFA.
index e8277f806a129aacd8a1f5e641e368647c190f29..12f6fe825a541345ffeedc4b38a83870a9e6d2c0 100644 (file)
@@ -35,6 +35,64 @@ namespace __detail
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+  template<typename _InIterT, typename _TraitsT>
+    bool _BracketMatcher<_InIterT, _TraitsT>::
+    operator()(const _PatternCursor& __pc) const
+    {
+      typedef const _SpecializedCursor<_InIterT>& _CursorT;
+      _CursorT __c = static_cast<_CursorT>(__pc);
+      _CharT __ch = __c._M_current();
+      bool __ret = false;
+      for (auto __c : _M_char_set)
+        if (this->_M_equ(__c, __ch))
+          {
+            __ret = true;
+            break;
+          }
+      if (!__ret && _M_traits.isctype(__ch, _M_class_set))
+        __ret = true;
+      else
+        {
+          for (auto& __it : _M_range_set)
+            if (this->_M_le(__it.first, __ch) && this->_M_le(__ch, __it.second))
+              {
+                __ret = true;
+                break;
+              }
+        }
+      if (_M_is_non_matching)
+        __ret = !__ret;
+      return __ret;
+    }
+
+  template<typename _InIterT, typename _TraitsT>
+    bool _Comparator<_InIterT, _TraitsT>::
+    _M_equ(_CharT __a, _CharT __b) const
+    {
+      if (_M_flags & regex_constants::icase)
+        return _M_traits.translate_nocase(__a)
+          == _M_traits.translate_nocase(__b);
+      if (_M_flags & regex_constants::collate)
+        return _M_traits.translate(__a) == _M_traits.translate(__b);
+      return __a == __b;
+    }
+
+  template<typename _InIterT, typename _TraitsT>
+    bool _Comparator<_InIterT, _TraitsT>::
+    _M_le(_CharT __a, _CharT __b) const
+    {
+      _StringT __str1 = _StringT(1,
+                                 _M_flags & regex_constants::icase
+                                 ? _M_traits.translate_nocase(__a)
+                                 : _M_traits.translate(__a));
+      _StringT __str2 = _StringT(1,
+                                 _M_flags & regex_constants::icase
+                                 ? _M_traits.translate_nocase(__b)
+                                 : _M_traits.translate(__b));
+      return _M_traits.transform(__str1.begin(), __str1.end())
+        <= _M_traits.transform(__str2.begin(), __str2.end());
+    }
+
 #ifdef _GLIBCXX_DEBUG
 inline std::ostream& _State::
 _M_print(std::ostream& ostr) const
index 10e2ff43765b31847e38d1ffdfdda190c2818a75..aee1dbe15dcb1f40ca901eabe34e7d348e816c8a 100644 (file)
@@ -37,7 +37,7 @@ test01()
     std::string target("zxcv/onetwoabc");
     std::smatch m;
 
-    VERIFY( std::regex_search(target, m, re) );
+    VERIFY( std::regex_match(target, m, re) );
     VERIFY( m.size() == 2 );
     VERIFY( m[0].matched == true );
     VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
@@ -50,7 +50,7 @@ test01()
     std::string target("zxcv/onetwoabc");
     std::smatch m;
 
-    VERIFY( std::regex_search(target, m, re) );
+    VERIFY( std::regex_match(target, m, re) );
     VERIFY( m.size() == 3 );
     VERIFY( m[0].matched == true );
     VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc
new file mode 100644 (file)
index 0000000..3a4ff31
--- /dev/null
@@ -0,0 +1,66 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-08-01  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended bracket expression against a C-string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  {
+    std::regex  re("pre/[za-x]", std::regex::extended);
+    VERIFY( std::regex_match("pre/z", re) );
+    VERIFY( std::regex_match("pre/a", re) );
+    VERIFY( !std::regex_match("pre/y", re) );
+  }
+  {
+    std::regex  re("pre/[[:uPPer:]]", std::regex::extended);
+    VERIFY( std::regex_match("pre/Z", re) );
+    VERIFY( !std::regex_match("pre/_", re) );
+    VERIFY( !std::regex_match("pre/a", re) );
+    VERIFY( !std::regex_match("pre/0", re) );
+  }
+  {
+    std::regex  re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase);
+    VERIFY( std::regex_match("pre/Z", re) );
+    VERIFY( std::regex_match("pre/a", re) );
+  }
+  {
+    std::regex  re("pre/[[:w:][.tilde.]]", std::regex::extended);
+    VERIFY( std::regex_match("pre/~", re) );
+    VERIFY( std::regex_match("pre/_", re) );
+    VERIFY( std::regex_match("pre/a", re) );
+    VERIFY( std::regex_match("pre/0", re) );
+  }
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}