Implement bracket expression.
[gcc.git] / libstdc++-v3 / include / bits / regex_compiler.h
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex_compiler.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37 /**
38 * @addtogroup regex-detail
39 * @{
40 */
41
42 /// Base class for scanner.
43 struct _Scanner_base
44 {
45 typedef unsigned int _StateT;
46
47 static constexpr _StateT _S_state_in_brace = 1 << 0;
48 static constexpr _StateT _S_state_in_bracket = 1 << 1;
49
50 virtual ~_Scanner_base() { };
51 };
52
53 /**
54 * @brief struct _Scanner. Scans an input range for regex tokens.
55 *
56 * The %_Scanner class interprets the regular expression pattern in
57 * the input range passed to its constructor as a sequence of parse
58 * tokens passed to the regular expression compiler. The sequence
59 * of tokens provided depends on the flag settings passed to the
60 * constructor: different regular expression grammars will interpret
61 * the same input pattern in syntactically different ways.
62 */
63 template<typename _InputIterator>
64 class _Scanner: public _Scanner_base
65 {
66 public:
67 typedef _InputIterator _IteratorT;
68 typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
69 typedef std::basic_string<_CharT> _StringT;
70 typedef regex_constants::syntax_option_type _FlagT;
71 typedef const std::ctype<_CharT> _CtypeT;
72
73 /// Token types returned from the scanner.
74 enum _TokenT
75 {
76 _S_token_anychar,
77 _S_token_backref,
78 _S_token_bracket_begin,
79 _S_token_bracket_inverse_begin,
80 _S_token_bracket_end,
81 _S_token_char_class_name,
82 _S_token_closure0,
83 _S_token_closure1,
84 _S_token_collelem_multi,
85 _S_token_collelem_single,
86 _S_token_collsymbol,
87 _S_token_comma,
88 _S_token_dash,
89 _S_token_dup_count,
90 _S_token_eof,
91 _S_token_equiv_class_name,
92 _S_token_interval_begin,
93 _S_token_interval_end,
94 _S_token_line_begin,
95 _S_token_line_end,
96 _S_token_opt,
97 _S_token_or,
98 _S_token_ord_char,
99 _S_token_subexpr_begin,
100 _S_token_subexpr_end,
101 _S_token_word_begin,
102 _S_token_word_end,
103 _S_token_unknown
104 };
105
106 _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
107 std::locale __loc)
108 : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
109 _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
110 { _M_advance(); }
111
112 void
113 _M_advance();
114
115 _TokenT
116 _M_token() const
117 { return _M_curToken; }
118
119 const _StringT&
120 _M_value() const
121 { return _M_curValue; }
122
123 #ifdef _GLIBCXX_DEBUG
124 std::ostream&
125 _M_print(std::ostream&);
126 #endif
127
128 private:
129 void
130 _M_eat_escape();
131
132 void
133 _M_scan_in_brace();
134
135 void
136 _M_scan_in_bracket();
137
138 void
139 _M_eat_charclass();
140
141 void
142 _M_eat_equivclass();
143
144 void
145 _M_eat_collsymbol();
146
147 _IteratorT _M_current;
148 _IteratorT _M_end;
149 _FlagT _M_flags;
150 _CtypeT& _M_ctype;
151 _TokenT _M_curToken;
152 _StringT _M_curValue;
153 _StateT _M_state;
154 };
155
156 template<typename _InputIterator>
157 void
158 _Scanner<_InputIterator>::
159 _M_advance()
160 {
161 if (_M_current == _M_end)
162 {
163 _M_curToken = _S_token_eof;
164 return;
165 }
166
167 _CharT __c = *_M_current;
168 if (_M_state & _S_state_in_bracket)
169 {
170 _M_scan_in_bracket();
171 return;
172 }
173 if (_M_state & _S_state_in_brace)
174 {
175 _M_scan_in_brace();
176 return;
177 }
178 #if 0
179 // TODO: re-enable line anchors when _M_assertion is implemented.
180 // See PR libstdc++/47724
181 else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
182 {
183 _M_curToken = _S_token_line_begin;
184 ++_M_current;
185 return;
186 }
187 else if (__c == _M_ctype.widen('$'))
188 {
189 _M_curToken = _S_token_line_end;
190 ++_M_current;
191 return;
192 }
193 #endif
194 else if (__c == _M_ctype.widen('.'))
195 {
196 _M_curToken = _S_token_anychar;
197 ++_M_current;
198 return;
199 }
200 else if (__c == _M_ctype.widen('*'))
201 {
202 _M_curToken = _S_token_closure0;
203 ++_M_current;
204 return;
205 }
206 else if (__c == _M_ctype.widen('+'))
207 {
208 _M_curToken = _S_token_closure1;
209 ++_M_current;
210 return;
211 }
212 else if (__c == _M_ctype.widen('|'))
213 {
214 _M_curToken = _S_token_or;
215 ++_M_current;
216 return;
217 }
218 else if (__c == _M_ctype.widen('['))
219 {
220 if (*++_M_current == _M_ctype.widen('^'))
221 {
222 _M_curToken = _S_token_bracket_inverse_begin;
223 ++_M_current;
224 }
225 else
226 _M_curToken = _S_token_bracket_begin;
227 _M_state |= _S_state_in_bracket;
228 return;
229 }
230 else if (__c == _M_ctype.widen('\\'))
231 {
232 _M_eat_escape();
233 return;
234 }
235 else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
236 {
237 if (__c == _M_ctype.widen('('))
238 {
239 _M_curToken = _S_token_subexpr_begin;
240 ++_M_current;
241 return;
242 }
243 else if (__c == _M_ctype.widen(')'))
244 {
245 _M_curToken = _S_token_subexpr_end;
246 ++_M_current;
247 return;
248 }
249 else if (__c == _M_ctype.widen('{'))
250 {
251 _M_curToken = _S_token_interval_begin;
252 _M_state |= _S_state_in_brace;
253 ++_M_current;
254 return;
255 }
256 }
257
258 _M_curToken = _S_token_ord_char;
259 _M_curValue.assign(1, __c);
260 ++_M_current;
261 }
262
263
264 template<typename _InputIterator>
265 void
266 _Scanner<_InputIterator>::
267 _M_scan_in_brace()
268 {
269 if (_M_ctype.is(_CtypeT::digit, *_M_current))
270 {
271 _M_curToken = _S_token_dup_count;
272 _M_curValue.assign(1, *_M_current);
273 ++_M_current;
274 while (_M_current != _M_end
275 && _M_ctype.is(_CtypeT::digit, *_M_current))
276 {
277 _M_curValue += *_M_current;
278 ++_M_current;
279 }
280 return;
281 }
282 else if (*_M_current == _M_ctype.widen(','))
283 {
284 _M_curToken = _S_token_comma;
285 ++_M_current;
286 return;
287 }
288 if (_M_flags & (regex_constants::basic | regex_constants::grep))
289 {
290 if (*_M_current == _M_ctype.widen('\\'))
291 _M_eat_escape();
292 }
293 else
294 {
295 if (*_M_current == _M_ctype.widen('}'))
296 {
297 _M_curToken = _S_token_interval_end;
298 _M_state &= ~_S_state_in_brace;
299 ++_M_current;
300 return;
301 }
302 }
303 }
304
305 template<typename _InputIterator>
306 void
307 _Scanner<_InputIterator>::
308 _M_scan_in_bracket()
309 {
310 if (*_M_current == _M_ctype.widen('['))
311 {
312 ++_M_current;
313 if (_M_current == _M_end)
314 {
315 _M_curToken = _S_token_eof;
316 return;
317 }
318
319 if (*_M_current == _M_ctype.widen('.'))
320 {
321 _M_curToken = _S_token_collsymbol;
322 _M_eat_collsymbol();
323 return;
324 }
325 else if (*_M_current == _M_ctype.widen(':'))
326 {
327 _M_curToken = _S_token_char_class_name;
328 _M_eat_charclass();
329 return;
330 }
331 else if (*_M_current == _M_ctype.widen('='))
332 {
333 _M_curToken = _S_token_equiv_class_name;
334 _M_eat_equivclass();
335 return;
336 }
337 }
338 else if (*_M_current == _M_ctype.widen('-'))
339 {
340 _M_curToken = _S_token_dash;
341 ++_M_current;
342 return;
343 }
344 else if (*_M_current == _M_ctype.widen(']'))
345 {
346 _M_curToken = _S_token_bracket_end;
347 _M_state &= ~_S_state_in_bracket;
348 ++_M_current;
349 return;
350 }
351 else if (*_M_current == _M_ctype.widen('\\'))
352 {
353 _M_eat_escape();
354 return;
355 }
356 _M_curToken = _S_token_collelem_single;
357 _M_curValue.assign(1, *_M_current);
358 ++_M_current;
359 }
360
361 // TODO implement it.
362 template<typename _InputIterator>
363 void
364 _Scanner<_InputIterator>::
365 _M_eat_escape()
366 {
367 ++_M_current;
368 if (_M_current == _M_end)
369 {
370 _M_curToken = _S_token_eof;
371 return;
372 }
373 _CharT __c = *_M_current;
374 ++_M_current;
375
376 if (__c == _M_ctype.widen('('))
377 {
378 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
379 {
380 _M_curToken = _S_token_ord_char;
381 _M_curValue.assign(1, __c);
382 }
383 else
384 _M_curToken = _S_token_subexpr_begin;
385 }
386 else if (__c == _M_ctype.widen(')'))
387 {
388 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
389 {
390 _M_curToken = _S_token_ord_char;
391 _M_curValue.assign(1, __c);
392 }
393 else
394 _M_curToken = _S_token_subexpr_end;
395 }
396 else if (__c == _M_ctype.widen('{'))
397 {
398 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
399 {
400 _M_curToken = _S_token_ord_char;
401 _M_curValue.assign(1, __c);
402 }
403 else
404 {
405 _M_curToken = _S_token_interval_begin;
406 _M_state |= _S_state_in_brace;
407 }
408 }
409 else if (__c == _M_ctype.widen('}'))
410 {
411 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
412 {
413 _M_curToken = _S_token_ord_char;
414 _M_curValue.assign(1, __c);
415 }
416 else
417 {
418 if (!(_M_state && _S_state_in_brace))
419 __throw_regex_error(regex_constants::error_badbrace);
420 _M_state &= ~_S_state_in_brace;
421 _M_curToken = _S_token_interval_end;
422 }
423 }
424 else if (__c == _M_ctype.widen('x'))
425 {
426 ++_M_current;
427 if (_M_current == _M_end)
428 {
429 _M_curToken = _S_token_eof;
430 return;
431 }
432 if (_M_ctype.is(_CtypeT::digit, *_M_current))
433 {
434 _M_curValue.assign(1, *_M_current);
435 ++_M_current;
436 if (_M_current == _M_end)
437 {
438 _M_curToken = _S_token_eof;
439 return;
440 }
441 if (_M_ctype.is(_CtypeT::digit, *_M_current))
442 {
443 _M_curValue += *_M_current;
444 ++_M_current;
445 return;
446 }
447 }
448 }
449 else if (__c == _M_ctype.widen('^')
450 || __c == _M_ctype.widen('.')
451 || __c == _M_ctype.widen('*')
452 || __c == _M_ctype.widen('$')
453 || __c == _M_ctype.widen('\\'))
454 {
455 _M_curToken = _S_token_ord_char;
456 _M_curValue.assign(1, __c);
457 }
458 else if (_M_ctype.is(_CtypeT::digit, __c))
459 {
460 _M_curToken = _S_token_backref;
461 _M_curValue.assign(1, __c);
462 }
463 else if (_M_state & _S_state_in_bracket)
464 {
465 if (__c == _M_ctype.widen('-')
466 || __c == _M_ctype.widen('[')
467 || __c == _M_ctype.widen(']'))
468 {
469 _M_curToken = _S_token_ord_char;
470 _M_curValue.assign(1, __c);
471 }
472 else if ((_M_flags & regex_constants::ECMAScript)
473 && __c == _M_ctype.widen('b'))
474 {
475 _M_curToken = _S_token_ord_char;
476 _M_curValue.assign(1, _M_ctype.widen(' '));
477 }
478 else
479 __throw_regex_error(regex_constants::error_escape);
480 }
481 else
482 __throw_regex_error(regex_constants::error_escape);
483 }
484
485 // Eats a character class or throwns an exception.
486 // current point to ':' delimiter on entry, char after ']' on return
487 template<typename _InputIterator>
488 void
489 _Scanner<_InputIterator>::
490 _M_eat_charclass()
491 {
492 ++_M_current; // skip ':'
493 if (_M_current == _M_end)
494 __throw_regex_error(regex_constants::error_ctype);
495 for (_M_curValue.clear();
496 _M_current != _M_end && *_M_current != _M_ctype.widen(':');
497 ++_M_current)
498 _M_curValue += *_M_current;
499 if (_M_current == _M_end)
500 __throw_regex_error(regex_constants::error_ctype);
501 ++_M_current; // skip ':'
502 if (*_M_current != _M_ctype.widen(']'))
503 __throw_regex_error(regex_constants::error_ctype);
504 ++_M_current; // skip ']'
505 }
506
507
508 template<typename _InputIterator>
509 void
510 _Scanner<_InputIterator>::
511 _M_eat_equivclass()
512 {
513 ++_M_current; // skip '='
514 if (_M_current == _M_end)
515 __throw_regex_error(regex_constants::error_collate);
516 for (_M_curValue.clear();
517 _M_current != _M_end && *_M_current != _M_ctype.widen('=');
518 ++_M_current)
519 _M_curValue += *_M_current;
520 if (_M_current == _M_end)
521 __throw_regex_error(regex_constants::error_collate);
522 ++_M_current; // skip '='
523 if (*_M_current != _M_ctype.widen(']'))
524 __throw_regex_error(regex_constants::error_collate);
525 ++_M_current; // skip ']'
526 }
527
528
529 template<typename _InputIterator>
530 void
531 _Scanner<_InputIterator>::
532 _M_eat_collsymbol()
533 {
534 ++_M_current; // skip '.'
535 if (_M_current == _M_end)
536 __throw_regex_error(regex_constants::error_collate);
537 for (_M_curValue.clear();
538 _M_current != _M_end && *_M_current != _M_ctype.widen('.');
539 ++_M_current)
540 _M_curValue += *_M_current;
541 if (_M_current == _M_end)
542 __throw_regex_error(regex_constants::error_collate);
543 ++_M_current; // skip '.'
544 if (*_M_current != _M_ctype.widen(']'))
545 __throw_regex_error(regex_constants::error_collate);
546 ++_M_current; // skip ']'
547 }
548
549 #ifdef _GLIBCXX_DEBUG
550 template<typename _InputIterator>
551 std::ostream&
552 _Scanner<_InputIterator>::
553 _M_print(std::ostream& ostr)
554 {
555 switch (_M_curToken)
556 {
557 case _S_token_anychar:
558 ostr << "any-character\n";
559 break;
560 case _S_token_backref:
561 ostr << "backref\n";
562 break;
563 case _S_token_bracket_begin:
564 ostr << "bracket-begin\n";
565 break;
566 case _S_token_bracket_inverse_begin:
567 ostr << "bracket-inverse-begin\n";
568 break;
569 case _S_token_bracket_end:
570 ostr << "bracket-end\n";
571 break;
572 case _S_token_char_class_name:
573 ostr << "char-class-name \"" << _M_curValue << "\"\n";
574 break;
575 case _S_token_closure0:
576 ostr << "closure0\n";
577 break;
578 case _S_token_closure1:
579 ostr << "closure1\n";
580 break;
581 case _S_token_collelem_multi:
582 ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
583 break;
584 case _S_token_collelem_single:
585 ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
586 break;
587 case _S_token_collsymbol:
588 ostr << "collsymbol \"" << _M_curValue << "\"\n";
589 break;
590 case _S_token_comma:
591 ostr << "comma\n";
592 break;
593 case _S_token_dash:
594 ostr << "dash\n";
595 break;
596 case _S_token_dup_count:
597 ostr << "dup count: " << _M_curValue << "\n";
598 break;
599 case _S_token_eof:
600 ostr << "EOF\n";
601 break;
602 case _S_token_equiv_class_name:
603 ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
604 break;
605 case _S_token_interval_begin:
606 ostr << "interval begin\n";
607 break;
608 case _S_token_interval_end:
609 ostr << "interval end\n";
610 break;
611 case _S_token_line_begin:
612 ostr << "line begin\n";
613 break;
614 case _S_token_line_end:
615 ostr << "line end\n";
616 break;
617 case _S_token_opt:
618 ostr << "opt\n";
619 break;
620 case _S_token_or:
621 ostr << "or\n";
622 break;
623 case _S_token_ord_char:
624 ostr << "ordinary character: \"" << _M_value() << "\"\n";
625 break;
626 case _S_token_subexpr_begin:
627 ostr << "subexpr begin\n";
628 break;
629 case _S_token_subexpr_end:
630 ostr << "subexpr end\n";
631 break;
632 case _S_token_word_begin:
633 ostr << "word begin\n";
634 break;
635 case _S_token_word_end:
636 ostr << "word end\n";
637 break;
638 case _S_token_unknown:
639 ostr << "-- unknown token --\n";
640 break;
641 default:
642 _GLIBCXX_DEBUG_ASSERT(false);
643 }
644 return ostr;
645 }
646 #endif
647
648 /// Builds an NFA from an input iterator interval.
649 template<typename _InIter, typename _TraitsT>
650 class _Compiler
651 {
652 public:
653 typedef _InIter _IterT;
654 typedef typename std::iterator_traits<_InIter>::value_type _CharT;
655 typedef std::basic_string<_CharT> _StringT;
656 typedef regex_constants::syntax_option_type _FlagT;
657
658 _Compiler(const _InIter& __b, const _InIter& __e,
659 _TraitsT& __traits, _FlagT __flags);
660
661 const _Nfa&
662 _M_nfa() const
663 { return _M_state_store; }
664
665 private:
666 typedef _Scanner<_InIter> _ScannerT;
667 typedef typename _ScannerT::_TokenT _TokenT;
668 typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
669 typedef _BracketMatcher<_InIter, _TraitsT> _BMatcherT;
670
671 // accepts a specific token or returns false.
672 bool
673 _M_match_token(_TokenT __token);
674
675 void
676 _M_disjunction();
677
678 void
679 _M_alternative();
680
681 bool
682 _M_term();
683
684 bool
685 _M_assertion();
686
687 void
688 _M_quantifier();
689
690 bool
691 _M_atom();
692
693 bool
694 _M_bracket_expression();
695
696 bool
697 _M_bracket_list(_BMatcherT& __matcher);
698
699 bool
700 _M_follow_list(_BMatcherT& __matcher);
701
702 void
703 _M_expression_term(_BMatcherT& __matcher);
704
705 bool
706 _M_range_expression(_BMatcherT& __matcher);
707
708 bool
709 _M_start_range(_BMatcherT& __matcher);
710
711 bool
712 _M_collating_symbol(_BMatcherT& __matcher);
713
714 bool
715 _M_equivalence_class(_BMatcherT& __matcher);
716
717 bool
718 _M_character_class(_BMatcherT& __matcher);
719
720 int
721 _M_cur_int_value(int __radix);
722
723 _TraitsT& _M_traits;
724 _ScannerT _M_scanner;
725 _StringT _M_cur_value;
726 _Nfa _M_state_store;
727 _StackT _M_stack;
728 _FlagT _M_flags;
729 };
730
731 template<typename _InIter, typename _TraitsT>
732 _Compiler<_InIter, _TraitsT>::
733 _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
734 _Compiler<_InIter, _TraitsT>::_FlagT __flags)
735 : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
736 _M_state_store(__flags), _M_flags(__flags)
737 {
738 typedef _StartTagger<_InIter, _TraitsT> _Start;
739 typedef _EndTagger<_InIter, _TraitsT> _End;
740
741 _StateSeq __r(_M_state_store,
742 _M_state_store._M_insert_subexpr_begin(_Start(0)));
743 _M_disjunction();
744 if (!_M_stack.empty())
745 {
746 __r._M_append(_M_stack.top());
747 _M_stack.pop();
748 }
749 __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
750 __r._M_append(_M_state_store._M_insert_accept());
751 }
752
753 template<typename _InIter, typename _TraitsT>
754 bool
755 _Compiler<_InIter, _TraitsT>::
756 _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
757 {
758 if (token == _M_scanner._M_token())
759 {
760 _M_cur_value = _M_scanner._M_value();
761 _M_scanner._M_advance();
762 return true;
763 }
764 return false;
765 }
766
767 template<typename _InIter, typename _TraitsT>
768 void
769 _Compiler<_InIter, _TraitsT>::
770 _M_disjunction()
771 {
772 this->_M_alternative();
773 if (_M_match_token(_ScannerT::_S_token_or))
774 {
775 _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
776 this->_M_disjunction();
777 _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
778 _M_stack.push(_StateSeq(__alt1, __alt2));
779 }
780 }
781
782 template<typename _InIter, typename _TraitsT>
783 void
784 _Compiler<_InIter, _TraitsT>::
785 _M_alternative()
786 {
787 if (this->_M_term())
788 {
789 _StateSeq __re = _M_stack.top(); _M_stack.pop();
790 this->_M_alternative();
791 if (!_M_stack.empty())
792 {
793 __re._M_append(_M_stack.top());
794 _M_stack.pop();
795 }
796 _M_stack.push(__re);
797 }
798 }
799
800 template<typename _InIter, typename _TraitsT>
801 bool
802 _Compiler<_InIter, _TraitsT>::
803 _M_term()
804 {
805 if (this->_M_assertion())
806 return true;
807 if (this->_M_atom())
808 {
809 this->_M_quantifier();
810 return true;
811 }
812 return false;
813 }
814
815 template<typename _InIter, typename _TraitsT>
816 bool
817 _Compiler<_InIter, _TraitsT>::
818 _M_assertion()
819 {
820 if (_M_match_token(_ScannerT::_S_token_line_begin))
821 {
822 // __m.push(_Matcher::_S_opcode_line_begin);
823 return true;
824 }
825 if (_M_match_token(_ScannerT::_S_token_line_end))
826 {
827 // __m.push(_Matcher::_S_opcode_line_end);
828 return true;
829 }
830 if (_M_match_token(_ScannerT::_S_token_word_begin))
831 {
832 // __m.push(_Matcher::_S_opcode_word_begin);
833 return true;
834 }
835 if (_M_match_token(_ScannerT::_S_token_word_end))
836 {
837 // __m.push(_Matcher::_S_opcode_word_end);
838 return true;
839 }
840 return false;
841 }
842
843 template<typename _InIter, typename _TraitsT>
844 void
845 _Compiler<_InIter, _TraitsT>::
846 _M_quantifier()
847 {
848 if (_M_match_token(_ScannerT::_S_token_closure0))
849 {
850 if (_M_stack.empty())
851 __throw_regex_error(regex_constants::error_badrepeat);
852 _StateSeq __r(_M_stack.top(), -1);
853 __r._M_append(__r._M_front());
854 _M_stack.pop();
855 _M_stack.push(__r);
856 return;
857 }
858 if (_M_match_token(_ScannerT::_S_token_closure1))
859 {
860 if (_M_stack.empty())
861 __throw_regex_error(regex_constants::error_badrepeat);
862 _StateSeq __r(_M_state_store,
863 _M_state_store.
864 _M_insert_alt(_S_invalid_state_id,
865 _M_stack.top()._M_front()));
866 _M_stack.top()._M_append(__r);
867 return;
868 }
869 if (_M_match_token(_ScannerT::_S_token_opt))
870 {
871 if (_M_stack.empty())
872 __throw_regex_error(regex_constants::error_badrepeat);
873 _StateSeq __r(_M_stack.top(), -1);
874 _M_stack.pop();
875 _M_stack.push(__r);
876 return;
877 }
878 if (_M_match_token(_ScannerT::_S_token_interval_begin))
879 {
880 if (_M_stack.empty())
881 __throw_regex_error(regex_constants::error_badrepeat);
882 if (!_M_match_token(_ScannerT::_S_token_dup_count))
883 __throw_regex_error(regex_constants::error_badbrace);
884 _StateSeq __r(_M_stack.top());
885 int __min_rep = _M_cur_int_value(10);
886 for (int __i = 1; __i < __min_rep; ++__i)
887 _M_stack.top()._M_append(__r._M_clone());
888 if (_M_match_token(_ScannerT::_S_token_comma))
889 if (_M_match_token(_ScannerT::_S_token_dup_count))
890 {
891 int __n = _M_cur_int_value(10) - __min_rep;
892 if (__n < 0)
893 __throw_regex_error(regex_constants::error_badbrace);
894 for (int __i = 0; __i < __n; ++__i)
895 {
896 _StateSeq __r(_M_state_store,
897 _M_state_store.
898 _M_insert_alt(_S_invalid_state_id,
899 _M_stack.top()._M_front()));
900 _M_stack.top()._M_append(__r);
901 }
902 }
903 else
904 {
905 _StateSeq __r(_M_stack.top(), -1);
906 __r._M_push_back(__r._M_front());
907 _M_stack.pop();
908 _M_stack.push(__r);
909 }
910 if (!_M_match_token(_ScannerT::_S_token_interval_end))
911 __throw_regex_error(regex_constants::error_brace);
912 return;
913 }
914 }
915
916 template<typename _InIter, typename _TraitsT>
917 bool
918 _Compiler<_InIter, _TraitsT>::
919 _M_atom()
920 {
921 typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
922 typedef _StartTagger<_InIter, _TraitsT> _Start;
923 typedef _EndTagger<_InIter, _TraitsT> _End;
924
925 if (_M_match_token(_ScannerT::_S_token_anychar))
926 {
927 _M_stack.push(_StateSeq(_M_state_store,
928 _M_state_store._M_insert_matcher
929 (_AnyMatcher)));
930 return true;
931 }
932 if (_M_match_token(_ScannerT::_S_token_ord_char))
933 {
934 _M_stack.push(_StateSeq(_M_state_store,
935 _M_state_store._M_insert_matcher
936 (_CMatcher(_M_cur_value[0], _M_flags, _M_traits))));
937 return true;
938 }
939 if (_M_match_token(_ScannerT::_S_token_backref))
940 {
941 // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
942 _M_state_store._M_set_back_ref(true);
943 //return true;
944 }
945 if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
946 {
947 int __mark = _M_state_store._M_sub_count();
948 _StateSeq __r(_M_state_store,
949 _M_state_store.
950 _M_insert_subexpr_begin(_Start(__mark)));
951 this->_M_disjunction();
952 if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
953 __throw_regex_error(regex_constants::error_paren);
954 if (!_M_stack.empty())
955 {
956 __r._M_append(_M_stack.top());
957 _M_stack.pop();
958 }
959 __r._M_append(_M_state_store._M_insert_subexpr_end
960 (__mark, _End(__mark)));
961 _M_stack.push(__r);
962 return true;
963 }
964 return _M_bracket_expression();
965 }
966
967 template<typename _InIter, typename _TraitsT>
968 bool
969 _Compiler<_InIter, _TraitsT>::
970 _M_bracket_expression()
971 {
972 bool __inverse =
973 _M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
974 if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
975 return false;
976 _BMatcherT __matcher( __inverse, _M_flags, _M_traits);
977 // special case: only if _not_ chr first after
978 // '[' or '[^' or if ECMAscript
979 if (!_M_bracket_list(__matcher) // list is empty
980 && !(_M_flags & regex_constants::ECMAScript))
981 __throw_regex_error(regex_constants::error_brack);
982 _M_stack.push(_StateSeq(_M_state_store,
983 _M_state_store._M_insert_matcher(__matcher)));
984 return true;
985 }
986
987 template<typename _InIter, typename _TraitsT>
988 bool // list is non-empty
989 _Compiler<_InIter, _TraitsT>::
990 _M_bracket_list(_BMatcherT& __matcher)
991 {
992 if (_M_match_token(_ScannerT::_S_token_bracket_end))
993 return false;
994 _M_expression_term(__matcher);
995 _M_bracket_list(__matcher);
996 return true;
997 }
998
999 template<typename _InIter, typename _TraitsT>
1000 void
1001 _Compiler<_InIter, _TraitsT>::
1002 _M_expression_term(_BMatcherT& __matcher)
1003 {
1004 if (_M_match_token(_ScannerT::_S_token_collsymbol))
1005 {
1006 __matcher._M_add_collating_element(_M_cur_value);
1007 return;
1008 }
1009 if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1010 {
1011 __matcher._M_add_equivalence_class(_M_cur_value);
1012 return;
1013 }
1014 if (_M_match_token(_ScannerT::_S_token_char_class_name))
1015 {
1016 __matcher._M_add_character_class(_M_cur_value);
1017 return;
1018 }
1019 if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
1020 {
1021 auto __ch = _M_cur_value[0];
1022 if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
1023 {
1024 // If the dash is the last character in the bracket expression,
1025 // it is not special.
1026 if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
1027 __matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
1028 else // [a-z]
1029 {
1030 if (!_M_match_token(_ScannerT::_S_token_collelem_single))
1031 __throw_regex_error(regex_constants::error_range);
1032 __matcher._M_make_range(__ch, _M_cur_value[0]);
1033 }
1034 }
1035 else // [a]
1036 __matcher._M_add_char(__ch);
1037 return;
1038 }
1039 __throw_regex_error(regex_constants::error_brack);
1040 }
1041
1042 template<typename _InIter, typename _TraitsT>
1043 int
1044 _Compiler<_InIter, _TraitsT>::
1045 _M_cur_int_value(int __radix)
1046 {
1047 int __v = 0;
1048 for (typename _StringT::size_type __i = 0;
1049 __i < _M_cur_value.length(); ++__i)
1050 __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1051 return __v;
1052 }
1053
1054 template<typename _InIter, typename _TraitsT>
1055 _AutomatonPtr
1056 __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1057 regex_constants::syntax_option_type __f)
1058 { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1059 __f)._M_nfa())); }
1060
1061 //@} regex-detail
1062 _GLIBCXX_END_NAMESPACE_VERSION
1063 } // namespace __detail
1064 } // namespace std