2013-07-30 Paolo Carlini <paolo.carlini@oracle.com>
[gcc.git] / libstdc++-v3 / include / bits / regex_compiler.h
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex_compiler.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37 /**
38 * @addtogroup regex-detail
39 * @{
40 */
41
42 /// Base class for scanner.
43 struct _Scanner_base
44 {
45 typedef unsigned int _StateT;
46
47 static constexpr _StateT _S_state_at_start = 1 << 0;
48 static constexpr _StateT _S_state_in_brace = 1 << 2;
49 static constexpr _StateT _S_state_in_bracket = 1 << 3;
50
51 virtual ~_Scanner_base() { };
52 };
53
54 /**
55 * @brief struct _Scanner. Scans an input range for regex tokens.
56 *
57 * The %_Scanner class interprets the regular expression pattern in
58 * the input range passed to its constructor as a sequence of parse
59 * tokens passed to the regular expression compiler. The sequence
60 * of tokens provided depends on the flag settings passed to the
61 * constructor: different regular expression grammars will interpret
62 * the same input pattern in syntactically different ways.
63 */
64 template<typename _InputIterator>
65 class _Scanner: public _Scanner_base
66 {
67 public:
68 typedef _InputIterator _IteratorT;
69 typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
70 typedef std::basic_string<_CharT> _StringT;
71 typedef regex_constants::syntax_option_type _FlagT;
72 typedef const std::ctype<_CharT> _CtypeT;
73
74 /// Token types returned from the scanner.
75 enum _TokenT
76 {
77 _S_token_anychar,
78 _S_token_backref,
79 _S_token_bracket_begin,
80 _S_token_bracket_end,
81 _S_token_inverse_class,
82 _S_token_char_class_name,
83 _S_token_closure0,
84 _S_token_closure1,
85 _S_token_collelem_multi,
86 _S_token_collelem_single,
87 _S_token_collsymbol,
88 _S_token_comma,
89 _S_token_dash,
90 _S_token_dup_count,
91 _S_token_eof,
92 _S_token_equiv_class_name,
93 _S_token_interval_begin,
94 _S_token_interval_end,
95 _S_token_line_begin,
96 _S_token_line_end,
97 _S_token_opt,
98 _S_token_or,
99 _S_token_ord_char,
100 _S_token_quoted_char,
101 _S_token_subexpr_begin,
102 _S_token_subexpr_end,
103 _S_token_word_begin,
104 _S_token_word_end,
105 _S_token_unknown
106 };
107
108 _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
109 std::locale __loc)
110 : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
111 _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
112 { _M_advance(); }
113
114 void
115 _M_advance();
116
117 _TokenT
118 _M_token() const
119 { return _M_curToken; }
120
121 const _StringT&
122 _M_value() const
123 { return _M_curValue; }
124
125 #ifdef _GLIBCXX_DEBUG
126 std::ostream&
127 _M_print(std::ostream&);
128 #endif
129
130 private:
131 void
132 _M_eat_escape();
133
134 void
135 _M_scan_in_brace();
136
137 void
138 _M_scan_in_bracket();
139
140 void
141 _M_eat_charclass();
142
143 void
144 _M_eat_equivclass();
145
146 void
147 _M_eat_collsymbol();
148
149 _IteratorT _M_current;
150 _IteratorT _M_end;
151 _FlagT _M_flags;
152 _CtypeT& _M_ctype;
153 _TokenT _M_curToken;
154 _StringT _M_curValue;
155 _StateT _M_state;
156 };
157
158 template<typename _InputIterator>
159 void
160 _Scanner<_InputIterator>::
161 _M_advance()
162 {
163 if (_M_current == _M_end)
164 {
165 _M_curToken = _S_token_eof;
166 return;
167 }
168
169 _CharT __c = *_M_current;
170 if (_M_state & _S_state_in_bracket)
171 {
172 _M_scan_in_bracket();
173 return;
174 }
175 if (_M_state & _S_state_in_brace)
176 {
177 _M_scan_in_brace();
178 return;
179 }
180 #if 0
181 // TODO: re-enable line anchors when _M_assertion is implemented.
182 // See PR libstdc++/47724
183 else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
184 {
185 _M_curToken = _S_token_line_begin;
186 ++_M_current;
187 return;
188 }
189 else if (__c == _M_ctype.widen('$'))
190 {
191 _M_curToken = _S_token_line_end;
192 ++_M_current;
193 return;
194 }
195 #endif
196 else if (__c == _M_ctype.widen('.'))
197 {
198 _M_curToken = _S_token_anychar;
199 ++_M_current;
200 return;
201 }
202 else if (__c == _M_ctype.widen('*'))
203 {
204 _M_curToken = _S_token_closure0;
205 ++_M_current;
206 return;
207 }
208 else if (__c == _M_ctype.widen('+'))
209 {
210 _M_curToken = _S_token_closure1;
211 ++_M_current;
212 return;
213 }
214 else if (__c == _M_ctype.widen('|'))
215 {
216 _M_curToken = _S_token_or;
217 ++_M_current;
218 return;
219 }
220 else if (__c == _M_ctype.widen('['))
221 {
222 _M_curToken = _S_token_bracket_begin;
223 _M_state |= (_S_state_in_bracket | _S_state_at_start);
224 ++_M_current;
225 return;
226 }
227 else if (__c == _M_ctype.widen('\\'))
228 {
229 _M_eat_escape();
230 return;
231 }
232 else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
233 {
234 if (__c == _M_ctype.widen('('))
235 {
236 _M_curToken = _S_token_subexpr_begin;
237 ++_M_current;
238 return;
239 }
240 else if (__c == _M_ctype.widen(')'))
241 {
242 _M_curToken = _S_token_subexpr_end;
243 ++_M_current;
244 return;
245 }
246 else if (__c == _M_ctype.widen('{'))
247 {
248 _M_curToken = _S_token_interval_begin;
249 _M_state |= _S_state_in_brace;
250 ++_M_current;
251 return;
252 }
253 }
254
255 _M_curToken = _S_token_ord_char;
256 _M_curValue.assign(1, __c);
257 ++_M_current;
258 }
259
260
261 template<typename _InputIterator>
262 void
263 _Scanner<_InputIterator>::
264 _M_scan_in_brace()
265 {
266 if (_M_ctype.is(_CtypeT::digit, *_M_current))
267 {
268 _M_curToken = _S_token_dup_count;
269 _M_curValue.assign(1, *_M_current);
270 ++_M_current;
271 while (_M_current != _M_end
272 && _M_ctype.is(_CtypeT::digit, *_M_current))
273 {
274 _M_curValue += *_M_current;
275 ++_M_current;
276 }
277 return;
278 }
279 else if (*_M_current == _M_ctype.widen(','))
280 {
281 _M_curToken = _S_token_comma;
282 ++_M_current;
283 return;
284 }
285 if (_M_flags & (regex_constants::basic | regex_constants::grep))
286 {
287 if (*_M_current == _M_ctype.widen('\\'))
288 _M_eat_escape();
289 }
290 else
291 {
292 if (*_M_current == _M_ctype.widen('}'))
293 {
294 _M_curToken = _S_token_interval_end;
295 _M_state &= ~_S_state_in_brace;
296 ++_M_current;
297 return;
298 }
299 }
300 }
301
302 template<typename _InputIterator>
303 void
304 _Scanner<_InputIterator>::
305 _M_scan_in_bracket()
306 {
307 if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
308 {
309 _M_curToken = _S_token_inverse_class;
310 _M_state &= ~_S_state_at_start;
311 ++_M_current;
312 return;
313 }
314 else if (*_M_current == _M_ctype.widen('['))
315 {
316 ++_M_current;
317 if (_M_current == _M_end)
318 {
319 _M_curToken = _S_token_eof;
320 return;
321 }
322
323 if (*_M_current == _M_ctype.widen('.'))
324 {
325 _M_curToken = _S_token_collsymbol;
326 _M_eat_collsymbol();
327 return;
328 }
329 else if (*_M_current == _M_ctype.widen(':'))
330 {
331 _M_curToken = _S_token_char_class_name;
332 _M_eat_charclass();
333 return;
334 }
335 else if (*_M_current == _M_ctype.widen('='))
336 {
337 _M_curToken = _S_token_equiv_class_name;
338 _M_eat_equivclass();
339 return;
340 }
341 }
342 else if (*_M_current == _M_ctype.widen('-'))
343 {
344 _M_curToken = _S_token_dash;
345 ++_M_current;
346 return;
347 }
348 else if (*_M_current == _M_ctype.widen(']'))
349 {
350 if (!(_M_flags & regex_constants::ECMAScript)
351 || !(_M_state & _S_state_at_start))
352 {
353 // special case: only if _not_ chr first after
354 // '[' or '[^' and if not ECMAscript
355 _M_curToken = _S_token_bracket_end;
356 ++_M_current;
357 return;
358 }
359 }
360 _M_curToken = _S_token_collelem_single;
361 _M_curValue.assign(1, *_M_current);
362 ++_M_current;
363 }
364
365 template<typename _InputIterator>
366 void
367 _Scanner<_InputIterator>::
368 _M_eat_escape()
369 {
370 ++_M_current;
371 if (_M_current == _M_end)
372 {
373 _M_curToken = _S_token_eof;
374 return;
375 }
376 _CharT __c = *_M_current;
377 ++_M_current;
378
379 if (__c == _M_ctype.widen('('))
380 {
381 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
382 {
383 _M_curToken = _S_token_ord_char;
384 _M_curValue.assign(1, __c);
385 }
386 else
387 _M_curToken = _S_token_subexpr_begin;
388 }
389 else if (__c == _M_ctype.widen(')'))
390 {
391 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
392 {
393 _M_curToken = _S_token_ord_char;
394 _M_curValue.assign(1, __c);
395 }
396 else
397 _M_curToken = _S_token_subexpr_end;
398 }
399 else if (__c == _M_ctype.widen('{'))
400 {
401 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
402 {
403 _M_curToken = _S_token_ord_char;
404 _M_curValue.assign(1, __c);
405 }
406 else
407 {
408 _M_curToken = _S_token_interval_begin;
409 _M_state |= _S_state_in_brace;
410 }
411 }
412 else if (__c == _M_ctype.widen('}'))
413 {
414 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
415 {
416 _M_curToken = _S_token_ord_char;
417 _M_curValue.assign(1, __c);
418 }
419 else
420 {
421 if (!(_M_state && _S_state_in_brace))
422 __throw_regex_error(regex_constants::error_badbrace);
423 _M_state &= ~_S_state_in_brace;
424 _M_curToken = _S_token_interval_end;
425 }
426 }
427 else if (__c == _M_ctype.widen('x'))
428 {
429 ++_M_current;
430 if (_M_current == _M_end)
431 {
432 _M_curToken = _S_token_eof;
433 return;
434 }
435 if (_M_ctype.is(_CtypeT::digit, *_M_current))
436 {
437 _M_curValue.assign(1, *_M_current);
438 ++_M_current;
439 if (_M_current == _M_end)
440 {
441 _M_curToken = _S_token_eof;
442 return;
443 }
444 if (_M_ctype.is(_CtypeT::digit, *_M_current))
445 {
446 _M_curValue += *_M_current;
447 ++_M_current;
448 return;
449 }
450 }
451 }
452 else if (__c == _M_ctype.widen('^')
453 || __c == _M_ctype.widen('.')
454 || __c == _M_ctype.widen('*')
455 || __c == _M_ctype.widen('$')
456 || __c == _M_ctype.widen('\\'))
457 {
458 _M_curToken = _S_token_ord_char;
459 _M_curValue.assign(1, __c);
460 }
461 else if (_M_ctype.is(_CtypeT::digit, __c))
462 {
463 _M_curToken = _S_token_backref;
464 _M_curValue.assign(1, __c);
465 }
466 else
467 __throw_regex_error(regex_constants::error_escape);
468 }
469
470
471 // Eats a character class or throwns an exception.
472 // current point to ':' delimiter on entry, char after ']' on return
473 template<typename _InputIterator>
474 void
475 _Scanner<_InputIterator>::
476 _M_eat_charclass()
477 {
478 ++_M_current; // skip ':'
479 if (_M_current == _M_end)
480 __throw_regex_error(regex_constants::error_ctype);
481 for (_M_curValue.clear();
482 _M_current != _M_end && *_M_current != _M_ctype.widen(':');
483 ++_M_current)
484 _M_curValue += *_M_current;
485 if (_M_current == _M_end)
486 __throw_regex_error(regex_constants::error_ctype);
487 ++_M_current; // skip ':'
488 if (*_M_current != _M_ctype.widen(']'))
489 __throw_regex_error(regex_constants::error_ctype);
490 ++_M_current; // skip ']'
491 }
492
493
494 template<typename _InputIterator>
495 void
496 _Scanner<_InputIterator>::
497 _M_eat_equivclass()
498 {
499 ++_M_current; // skip '='
500 if (_M_current == _M_end)
501 __throw_regex_error(regex_constants::error_collate);
502 for (_M_curValue.clear();
503 _M_current != _M_end && *_M_current != _M_ctype.widen('=');
504 ++_M_current)
505 _M_curValue += *_M_current;
506 if (_M_current == _M_end)
507 __throw_regex_error(regex_constants::error_collate);
508 ++_M_current; // skip '='
509 if (*_M_current != _M_ctype.widen(']'))
510 __throw_regex_error(regex_constants::error_collate);
511 ++_M_current; // skip ']'
512 }
513
514
515 template<typename _InputIterator>
516 void
517 _Scanner<_InputIterator>::
518 _M_eat_collsymbol()
519 {
520 ++_M_current; // skip '.'
521 if (_M_current == _M_end)
522 __throw_regex_error(regex_constants::error_collate);
523 for (_M_curValue.clear();
524 _M_current != _M_end && *_M_current != _M_ctype.widen('.');
525 ++_M_current)
526 _M_curValue += *_M_current;
527 if (_M_current == _M_end)
528 __throw_regex_error(regex_constants::error_collate);
529 ++_M_current; // skip '.'
530 if (*_M_current != _M_ctype.widen(']'))
531 __throw_regex_error(regex_constants::error_collate);
532 ++_M_current; // skip ']'
533 }
534
535 #ifdef _GLIBCXX_DEBUG
536 template<typename _InputIterator>
537 std::ostream&
538 _Scanner<_InputIterator>::
539 _M_print(std::ostream& ostr)
540 {
541 switch (_M_curToken)
542 {
543 case _S_token_anychar:
544 ostr << "any-character\n";
545 break;
546 case _S_token_backref:
547 ostr << "backref\n";
548 break;
549 case _S_token_bracket_begin:
550 ostr << "bracket-begin\n";
551 break;
552 case _S_token_bracket_end:
553 ostr << "bracket-end\n";
554 break;
555 case _S_token_char_class_name:
556 ostr << "char-class-name \"" << _M_curValue << "\"\n";
557 break;
558 case _S_token_closure0:
559 ostr << "closure0\n";
560 break;
561 case _S_token_closure1:
562 ostr << "closure1\n";
563 break;
564 case _S_token_collelem_multi:
565 ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
566 break;
567 case _S_token_collelem_single:
568 ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
569 break;
570 case _S_token_collsymbol:
571 ostr << "collsymbol \"" << _M_curValue << "\"\n";
572 break;
573 case _S_token_comma:
574 ostr << "comma\n";
575 break;
576 case _S_token_dash:
577 ostr << "dash\n";
578 break;
579 case _S_token_dup_count:
580 ostr << "dup count: " << _M_curValue << "\n";
581 break;
582 case _S_token_eof:
583 ostr << "EOF\n";
584 break;
585 case _S_token_equiv_class_name:
586 ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
587 break;
588 case _S_token_interval_begin:
589 ostr << "interval begin\n";
590 break;
591 case _S_token_interval_end:
592 ostr << "interval end\n";
593 break;
594 case _S_token_line_begin:
595 ostr << "line begin\n";
596 break;
597 case _S_token_line_end:
598 ostr << "line end\n";
599 break;
600 case _S_token_opt:
601 ostr << "opt\n";
602 break;
603 case _S_token_or:
604 ostr << "or\n";
605 break;
606 case _S_token_ord_char:
607 ostr << "ordinary character: \"" << _M_value() << "\"\n";
608 break;
609 case _S_token_quoted_char:
610 ostr << "quoted char\n";
611 break;
612 case _S_token_subexpr_begin:
613 ostr << "subexpr begin\n";
614 break;
615 case _S_token_subexpr_end:
616 ostr << "subexpr end\n";
617 break;
618 case _S_token_word_begin:
619 ostr << "word begin\n";
620 break;
621 case _S_token_word_end:
622 ostr << "word end\n";
623 break;
624 case _S_token_unknown:
625 ostr << "-- unknown token --\n";
626 break;
627 }
628 return ostr;
629 }
630 #endif
631
632 /// Builds an NFA from an input iterator interval.
633 template<typename _InIter, typename _TraitsT>
634 class _Compiler
635 {
636 public:
637 typedef _InIter _IterT;
638 typedef typename std::iterator_traits<_InIter>::value_type _CharT;
639 typedef std::basic_string<_CharT> _StringT;
640 typedef regex_constants::syntax_option_type _FlagT;
641
642 _Compiler(const _InIter& __b, const _InIter& __e,
643 _TraitsT& __traits, _FlagT __flags);
644
645 const _Nfa&
646 _M_nfa() const
647 { return _M_state_store; }
648
649 private:
650 typedef _Scanner<_InIter> _ScannerT;
651 typedef typename _ScannerT::_TokenT _TokenT;
652 typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
653 typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT;
654
655 // accepts a specific token or returns false.
656 bool
657 _M_match_token(_TokenT __token);
658
659 void
660 _M_disjunction();
661
662 bool
663 _M_alternative();
664
665 bool
666 _M_term();
667
668 bool
669 _M_assertion();
670
671 bool
672 _M_quantifier();
673
674 bool
675 _M_atom();
676
677 bool
678 _M_bracket_expression();
679
680 bool
681 _M_bracket_list(_RMatcherT& __matcher);
682
683 bool
684 _M_follow_list(_RMatcherT& __matcher);
685
686 bool
687 _M_follow_list2(_RMatcherT& __matcher);
688
689 bool
690 _M_expression_term(_RMatcherT& __matcher);
691
692 bool
693 _M_range_expression(_RMatcherT& __matcher);
694
695 bool
696 _M_start_range(_RMatcherT& __matcher);
697
698 bool
699 _M_collating_symbol(_RMatcherT& __matcher);
700
701 bool
702 _M_equivalence_class(_RMatcherT& __matcher);
703
704 bool
705 _M_character_class(_RMatcherT& __matcher);
706
707 int
708 _M_cur_int_value(int __radix);
709
710 _TraitsT& _M_traits;
711 _ScannerT _M_scanner;
712 _StringT _M_cur_value;
713 _Nfa _M_state_store;
714 _StackT _M_stack;
715 };
716
717 template<typename _InIter, typename _TraitsT>
718 _Compiler<_InIter, _TraitsT>::
719 _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
720 _Compiler<_InIter, _TraitsT>::_FlagT __flags)
721 : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
722 _M_state_store(__flags)
723 {
724 typedef _StartTagger<_InIter, _TraitsT> _Start;
725 typedef _EndTagger<_InIter, _TraitsT> _End;
726
727 _StateSeq __r(_M_state_store,
728 _M_state_store._M_insert_subexpr_begin(_Start(0)));
729 _M_disjunction();
730 if (!_M_stack.empty())
731 {
732 __r._M_append(_M_stack.top());
733 _M_stack.pop();
734 }
735 __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
736 __r._M_append(_M_state_store._M_insert_accept());
737 }
738
739 template<typename _InIter, typename _TraitsT>
740 bool
741 _Compiler<_InIter, _TraitsT>::
742 _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
743 {
744 if (token == _M_scanner._M_token())
745 {
746 _M_cur_value = _M_scanner._M_value();
747 _M_scanner._M_advance();
748 return true;
749 }
750 return false;
751 }
752
753 template<typename _InIter, typename _TraitsT>
754 void
755 _Compiler<_InIter, _TraitsT>::
756 _M_disjunction()
757 {
758 this->_M_alternative();
759 if (_M_match_token(_ScannerT::_S_token_or))
760 {
761 _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
762 this->_M_disjunction();
763 _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
764 _M_stack.push(_StateSeq(__alt1, __alt2));
765 }
766 }
767
768 template<typename _InIter, typename _TraitsT>
769 bool
770 _Compiler<_InIter, _TraitsT>::
771 _M_alternative()
772 {
773 if (this->_M_term())
774 {
775 _StateSeq __re = _M_stack.top(); _M_stack.pop();
776 this->_M_alternative();
777 if (!_M_stack.empty())
778 {
779 __re._M_append(_M_stack.top());
780 _M_stack.pop();
781 }
782 _M_stack.push(__re);
783 return true;
784 }
785 return false;
786 }
787
788 template<typename _InIter, typename _TraitsT>
789 bool
790 _Compiler<_InIter, _TraitsT>::
791 _M_term()
792 {
793 if (this->_M_assertion())
794 return true;
795 if (this->_M_atom())
796 {
797 this->_M_quantifier();
798 return true;
799 }
800 return false;
801 }
802
803 template<typename _InIter, typename _TraitsT>
804 bool
805 _Compiler<_InIter, _TraitsT>::
806 _M_assertion()
807 {
808 if (_M_match_token(_ScannerT::_S_token_line_begin))
809 {
810 // __m.push(_Matcher::_S_opcode_line_begin);
811 return true;
812 }
813 if (_M_match_token(_ScannerT::_S_token_line_end))
814 {
815 // __m.push(_Matcher::_S_opcode_line_end);
816 return true;
817 }
818 if (_M_match_token(_ScannerT::_S_token_word_begin))
819 {
820 // __m.push(_Matcher::_S_opcode_word_begin);
821 return true;
822 }
823 if (_M_match_token(_ScannerT::_S_token_word_end))
824 {
825 // __m.push(_Matcher::_S_opcode_word_end);
826 return true;
827 }
828 return false;
829 }
830
831 template<typename _InIter, typename _TraitsT>
832 bool
833 _Compiler<_InIter, _TraitsT>::
834 _M_quantifier()
835 {
836 if (_M_match_token(_ScannerT::_S_token_closure0))
837 {
838 if (_M_stack.empty())
839 __throw_regex_error(regex_constants::error_badrepeat);
840 _StateSeq __r(_M_stack.top(), -1);
841 __r._M_append(__r._M_front());
842 _M_stack.pop();
843 _M_stack.push(__r);
844 return true;
845 }
846 if (_M_match_token(_ScannerT::_S_token_closure1))
847 {
848 if (_M_stack.empty())
849 __throw_regex_error(regex_constants::error_badrepeat);
850 _StateSeq __r(_M_state_store,
851 _M_state_store.
852 _M_insert_alt(_S_invalid_state_id,
853 _M_stack.top()._M_front()));
854 _M_stack.top()._M_append(__r);
855 return true;
856 }
857 if (_M_match_token(_ScannerT::_S_token_opt))
858 {
859 if (_M_stack.empty())
860 __throw_regex_error(regex_constants::error_badrepeat);
861 _StateSeq __r(_M_stack.top(), -1);
862 _M_stack.pop();
863 _M_stack.push(__r);
864 return true;
865 }
866 if (_M_match_token(_ScannerT::_S_token_interval_begin))
867 {
868 if (_M_stack.empty())
869 __throw_regex_error(regex_constants::error_badrepeat);
870 if (!_M_match_token(_ScannerT::_S_token_dup_count))
871 __throw_regex_error(regex_constants::error_badbrace);
872 _StateSeq __r(_M_stack.top());
873 int __min_rep = _M_cur_int_value(10);
874 for (int __i = 1; __i < __min_rep; ++__i)
875 _M_stack.top()._M_append(__r._M_clone());
876 if (_M_match_token(_ScannerT::_S_token_comma))
877 if (_M_match_token(_ScannerT::_S_token_dup_count))
878 {
879 int __n = _M_cur_int_value(10) - __min_rep;
880 if (__n < 0)
881 __throw_regex_error(regex_constants::error_badbrace);
882 for (int __i = 0; __i < __n; ++__i)
883 {
884 _StateSeq __r(_M_state_store,
885 _M_state_store.
886 _M_insert_alt(_S_invalid_state_id,
887 _M_stack.top()._M_front()));
888 _M_stack.top()._M_append(__r);
889 }
890 }
891 else
892 {
893 _StateSeq __r(_M_stack.top(), -1);
894 __r._M_push_back(__r._M_front());
895 _M_stack.pop();
896 _M_stack.push(__r);
897 }
898 if (!_M_match_token(_ScannerT::_S_token_interval_end))
899 __throw_regex_error(regex_constants::error_brace);
900 return true;
901 }
902 return false;
903 }
904
905 template<typename _InIter, typename _TraitsT>
906 bool
907 _Compiler<_InIter, _TraitsT>::
908 _M_atom()
909 {
910 typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
911 typedef _StartTagger<_InIter, _TraitsT> _Start;
912 typedef _EndTagger<_InIter, _TraitsT> _End;
913
914 if (_M_match_token(_ScannerT::_S_token_anychar))
915 {
916 _M_stack.push(_StateSeq(_M_state_store,
917 _M_state_store._M_insert_matcher
918 (_AnyMatcher)));
919 return true;
920 }
921 if (_M_match_token(_ScannerT::_S_token_ord_char))
922 {
923 _M_stack.push(_StateSeq(_M_state_store,
924 _M_state_store._M_insert_matcher
925 (_CMatcher(_M_cur_value[0], _M_traits))));
926 return true;
927 }
928 if (_M_match_token(_ScannerT::_S_token_quoted_char))
929 {
930 // note that in the ECMA grammar, this case covers backrefs.
931 _M_stack.push(_StateSeq(_M_state_store,
932 _M_state_store._M_insert_matcher
933 (_CMatcher(_M_cur_value[0], _M_traits))));
934 return true;
935 }
936 if (_M_match_token(_ScannerT::_S_token_backref))
937 {
938 // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
939 return true;
940 }
941 if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
942 {
943 int __mark = _M_state_store._M_sub_count();
944 _StateSeq __r(_M_state_store,
945 _M_state_store.
946 _M_insert_subexpr_begin(_Start(__mark)));
947 this->_M_disjunction();
948 if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
949 __throw_regex_error(regex_constants::error_paren);
950 if (!_M_stack.empty())
951 {
952 __r._M_append(_M_stack.top());
953 _M_stack.pop();
954 }
955 __r._M_append(_M_state_store._M_insert_subexpr_end
956 (__mark, _End(__mark)));
957 _M_stack.push(__r);
958 return true;
959 }
960 return _M_bracket_expression();
961 }
962
963 template<typename _InIter, typename _TraitsT>
964 bool
965 _Compiler<_InIter, _TraitsT>::
966 _M_bracket_expression()
967 {
968 if (_M_match_token(_ScannerT::_S_token_bracket_begin))
969 {
970 _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
971 _M_traits);
972 if (!_M_bracket_list(__matcher)
973 || !_M_match_token(_ScannerT::_S_token_bracket_end))
974 __throw_regex_error(regex_constants::error_brack);
975 _M_stack.push(_StateSeq(_M_state_store,
976 _M_state_store._M_insert_matcher(__matcher)));
977 return true;
978 }
979 return false;
980 }
981
982 // If the dash is the last character in the bracket expression, it is not
983 // special.
984 template<typename _InIter, typename _TraitsT>
985 bool
986 _Compiler<_InIter, _TraitsT>::
987 _M_bracket_list(_RMatcherT& __matcher)
988 {
989 if (_M_follow_list(__matcher))
990 {
991 if (_M_match_token(_ScannerT::_S_token_dash))
992 __matcher._M_add_char(_M_cur_value[0]);
993 return true;
994 }
995 return false;
996 }
997
998 template<typename _InIter, typename _TraitsT>
999 bool
1000 _Compiler<_InIter, _TraitsT>::
1001 _M_follow_list(_RMatcherT& __matcher)
1002 { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
1003
1004 template<typename _InIter, typename _TraitsT>
1005 bool
1006 _Compiler<_InIter, _TraitsT>::
1007 _M_follow_list2(_RMatcherT& __matcher)
1008 {
1009 if (_M_expression_term(__matcher))
1010 return _M_follow_list2(__matcher);
1011 return true;
1012 }
1013
1014 template<typename _InIter, typename _TraitsT>
1015 bool
1016 _Compiler<_InIter, _TraitsT>::
1017 _M_expression_term(_RMatcherT& __matcher)
1018 {
1019 return (_M_collating_symbol(__matcher)
1020 || _M_character_class(__matcher)
1021 || _M_equivalence_class(__matcher)
1022 || (_M_start_range(__matcher)
1023 && _M_range_expression(__matcher)));
1024 }
1025
1026 template<typename _InIter, typename _TraitsT>
1027 bool
1028 _Compiler<_InIter, _TraitsT>::
1029 _M_range_expression(_RMatcherT& __matcher)
1030 {
1031 if (!_M_collating_symbol(__matcher))
1032 if (!_M_match_token(_ScannerT::_S_token_dash))
1033 __throw_regex_error(regex_constants::error_range);
1034 __matcher._M_make_range();
1035 return true;
1036 }
1037
1038 template<typename _InIter, typename _TraitsT>
1039 bool
1040 _Compiler<_InIter, _TraitsT>::
1041 _M_start_range(_RMatcherT& __matcher)
1042 { return _M_match_token(_ScannerT::_S_token_dash); }
1043
1044 template<typename _InIter, typename _TraitsT>
1045 bool
1046 _Compiler<_InIter, _TraitsT>::
1047 _M_collating_symbol(_RMatcherT& __matcher)
1048 {
1049 if (_M_match_token(_ScannerT::_S_token_collelem_single))
1050 {
1051 __matcher._M_add_char(_M_cur_value[0]);
1052 return true;
1053 }
1054 if (_M_match_token(_ScannerT::_S_token_collsymbol))
1055 {
1056 __matcher._M_add_collating_element(_M_cur_value);
1057 return true;
1058 }
1059 return false;
1060 }
1061
1062 template<typename _InIter, typename _TraitsT>
1063 bool
1064 _Compiler<_InIter, _TraitsT>::
1065 _M_equivalence_class(_RMatcherT& __matcher)
1066 {
1067 if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1068 {
1069 __matcher._M_add_equivalence_class(_M_cur_value);
1070 return true;
1071 }
1072 return false;
1073 }
1074
1075 template<typename _InIter, typename _TraitsT>
1076 bool
1077 _Compiler<_InIter, _TraitsT>::
1078 _M_character_class(_RMatcherT& __matcher)
1079 {
1080 if (_M_match_token(_ScannerT::_S_token_char_class_name))
1081 {
1082 __matcher._M_add_character_class(_M_cur_value);
1083 return true;
1084 }
1085 return false;
1086 }
1087
1088 template<typename _InIter, typename _TraitsT>
1089 int
1090 _Compiler<_InIter, _TraitsT>::
1091 _M_cur_int_value(int __radix)
1092 {
1093 int __v = 0;
1094 for (typename _StringT::size_type __i = 0;
1095 __i < _M_cur_value.length(); ++__i)
1096 __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1097 return __v;
1098 }
1099
1100 template<typename _InIter, typename _TraitsT>
1101 _AutomatonPtr
1102 __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1103 regex_constants::syntax_option_type __f)
1104 { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1105 __f)._M_nfa())); }
1106
1107 //@} regex-detail
1108 _GLIBCXX_END_NAMESPACE_VERSION
1109 } // namespace __detail
1110 } // namespace std