regex.h (regex_token_iterator<>::regex_token_iterator): Fix initialization orders...
[gcc.git] / libstdc++-v3 / include / bits / regex.tcc
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35 namespace __detail
36 {
37 _GLIBCXX_BEGIN_NAMESPACE_VERSION
38
39 // Result of merging regex_match and regex_search.
40 //
41 // __policy now can be _S_auto(auto dispatch by checking back-references)
42 // and _S_force_dfs(just use _DFSExecutor).
43 //
44 // That __match_mode is true means regex_match, else regex_search.
45 template<typename _BiIter, typename _Alloc,
46 typename _CharT, typename _TraitsT,
47 _RegexExecutorPolicy __policy,
48 bool __match_mode>
49 bool
50 __regex_algo_impl(_BiIter __s,
51 _BiIter __e,
52 match_results<_BiIter, _Alloc>& __m,
53 const basic_regex<_CharT, _TraitsT>& __re,
54 regex_constants::match_flag_type __flags)
55 {
56 if (__re._M_automaton == nullptr)
57 return false;
58
59 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
60 __res.resize(__re._M_automaton->_M_sub_count() + 2);
61 for (auto& __it : __res)
62 __it.matched = false;
63
64 auto __executor = __get_executor<_BiIter, _Alloc, _CharT, _TraitsT,
65 __policy>(__s, __e, __res, __re, __flags);
66
67 bool __ret;
68 if (__match_mode)
69 __ret = __executor->_M_match();
70 else
71 __ret = __executor->_M_search();
72 if (__ret)
73 {
74 for (auto __it : __res)
75 if (!__it.matched)
76 __it.first = __it.second = __e;
77 auto& __pre = __res[__res.size()-2];
78 auto& __suf = __res[__res.size()-1];
79 if (__match_mode)
80 {
81 __pre.matched = false;
82 __pre.first = __s;
83 __pre.second = __s;
84 __suf.matched = false;
85 __suf.first = __e;
86 __suf.second = __e;
87 }
88 else
89 {
90 __pre.first = __s;
91 __pre.second = __res[0].first;
92 __pre.matched = (__pre.first != __pre.second);
93 __suf.first = __res[0].second;
94 __suf.second = __e;
95 __suf.matched = (__suf.first != __suf.second);
96 }
97 if (__re.flags() & regex_constants::nosubs)
98 __res.resize(3);
99 }
100 return __ret;
101 }
102
103 _GLIBCXX_END_NAMESPACE_VERSION
104 }
105
106 template<typename _Ch_type>
107 template<typename _Fwd_iter>
108 typename regex_traits<_Ch_type>::string_type
109 regex_traits<_Ch_type>::
110 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
111 {
112 typedef std::ctype<char_type> __ctype_type;
113 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
114
115 static const char* __collatenames[] =
116 {
117 "NUL",
118 "SOH",
119 "STX",
120 "ETX",
121 "EOT",
122 "ENQ",
123 "ACK",
124 "alert",
125 "backspace",
126 "tab",
127 "newline",
128 "vertical-tab",
129 "form-feed",
130 "carriage-return",
131 "SO",
132 "SI",
133 "DLE",
134 "DC1",
135 "DC2",
136 "DC3",
137 "DC4",
138 "NAK",
139 "SYN",
140 "ETB",
141 "CAN",
142 "EM",
143 "SUB",
144 "ESC",
145 "IS4",
146 "IS3",
147 "IS2",
148 "IS1",
149 "space",
150 "exclamation-mark",
151 "quotation-mark",
152 "number-sign",
153 "dollar-sign",
154 "percent-sign",
155 "ampersand",
156 "apostrophe",
157 "left-parenthesis",
158 "right-parenthesis",
159 "asterisk",
160 "plus-sign",
161 "comma",
162 "hyphen",
163 "period",
164 "slash",
165 "zero",
166 "one",
167 "two",
168 "three",
169 "four",
170 "five",
171 "six",
172 "seven",
173 "eight",
174 "nine",
175 "colon",
176 "semicolon",
177 "less-than-sign",
178 "equals-sign",
179 "greater-than-sign",
180 "question-mark",
181 "commercial-at",
182 "A",
183 "B",
184 "C",
185 "D",
186 "E",
187 "F",
188 "G",
189 "H",
190 "I",
191 "J",
192 "K",
193 "L",
194 "M",
195 "N",
196 "O",
197 "P",
198 "Q",
199 "R",
200 "S",
201 "T",
202 "U",
203 "V",
204 "W",
205 "X",
206 "Y",
207 "Z",
208 "left-square-bracket",
209 "backslash",
210 "right-square-bracket",
211 "circumflex",
212 "underscore",
213 "grave-accent",
214 "a",
215 "b",
216 "c",
217 "d",
218 "e",
219 "f",
220 "g",
221 "h",
222 "i",
223 "j",
224 "k",
225 "l",
226 "m",
227 "n",
228 "o",
229 "p",
230 "q",
231 "r",
232 "s",
233 "t",
234 "u",
235 "v",
236 "w",
237 "x",
238 "y",
239 "z",
240 "left-curly-bracket",
241 "vertical-line",
242 "right-curly-bracket",
243 "tilde",
244 "DEL",
245 ""
246 };
247
248 // same as boost
249 //static const char* __digraphs[] =
250 // {
251 // "ae",
252 // "Ae",
253 // "AE",
254 // "ch",
255 // "Ch",
256 // "CH",
257 // "ll",
258 // "Ll",
259 // "LL",
260 // "ss",
261 // "Ss",
262 // "SS",
263 // "nj",
264 // "Nj",
265 // "NJ",
266 // "dz",
267 // "Dz",
268 // "DZ",
269 // "lj",
270 // "Lj",
271 // "LJ",
272 // ""
273 // };
274
275 std::string __s(__last - __first, '?');
276 __fctyp.narrow(__first, __last, '?', &*__s.begin());
277
278 for (unsigned int __i = 0; *__collatenames[__i]; __i++)
279 if (__s == __collatenames[__i])
280 return string_type(1, __fctyp.widen(static_cast<char>(__i)));
281
282 //for (unsigned int __i = 0; *__digraphs[__i]; __i++)
283 // {
284 // const char* __now = __digraphs[__i];
285 // if (__s == __now)
286 // {
287 // string_type ret(__s.size(), __fctyp.widen('?'));
288 // __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin());
289 // return ret;
290 // }
291 // }
292 return string_type();
293 }
294
295 template<typename _Ch_type>
296 template<typename _Fwd_iter>
297 typename regex_traits<_Ch_type>::char_class_type
298 regex_traits<_Ch_type>::
299 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
300 {
301 typedef std::ctype<char_type> __ctype_type;
302 typedef std::ctype<char> __cctype_type;
303 typedef const pair<const char*, char_class_type> _ClassnameEntry;
304 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
305 const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale));
306
307 static _ClassnameEntry __classnames[] =
308 {
309 {"d", ctype_base::digit},
310 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
311 {"s", ctype_base::space},
312 {"alnum", ctype_base::alnum},
313 {"alpha", ctype_base::alpha},
314 {"blank", {0, _RegexMask::_S_blank}},
315 {"cntrl", ctype_base::cntrl},
316 {"digit", ctype_base::digit},
317 {"graph", ctype_base::graph},
318 {"lower", ctype_base::lower},
319 {"print", ctype_base::print},
320 {"punct", ctype_base::punct},
321 {"space", ctype_base::space},
322 {"upper", ctype_base::upper},
323 {"xdigit", ctype_base::xdigit},
324 };
325
326 std::string __s(__last - __first, '?');
327 __fctyp.narrow(__first, __last, '?', &__s[0]);
328 __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size());
329 for (_ClassnameEntry* __it = __classnames;
330 __it < *(&__classnames + 1);
331 ++__it)
332 {
333 if (__s == __it->first)
334 {
335 if (__icase
336 && ((__it->second
337 & (ctype_base::lower | ctype_base::upper)) != 0))
338 return ctype_base::alpha;
339 return __it->second;
340 }
341 }
342 return 0;
343 }
344
345 template<typename _Ch_type>
346 bool
347 regex_traits<_Ch_type>::
348 isctype(_Ch_type __c, char_class_type __f) const
349 {
350 typedef std::ctype<char_type> __ctype_type;
351 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
352
353 return __fctyp.is(__f._M_base, __c)
354 // [[:w:]]
355 || ((__f._M_extended & _RegexMask::_S_under)
356 && __c == __fctyp.widen('_'))
357 // [[:blank:]]
358 || ((__f._M_extended & _RegexMask::_S_blank)
359 && (__c == __fctyp.widen(' ')
360 || __c == __fctyp.widen('\t')));
361 }
362
363 template<typename _Ch_type>
364 int
365 regex_traits<_Ch_type>::
366 value(_Ch_type __ch, int __radix) const
367 {
368 std::basic_istringstream<char_type> __is(string_type(1, __ch));
369 long __v;
370 if (__radix == 8)
371 __is >> std::oct;
372 else if (__radix == 16)
373 __is >> std::hex;
374 __is >> __v;
375 return __is.fail() ? -1 : __v;
376 }
377
378 template<typename _Bi_iter, typename _Alloc>
379 template<typename _Out_iter>
380 _Out_iter match_results<_Bi_iter, _Alloc>::
381 format(_Out_iter __out,
382 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
383 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
384 match_flag_type __flags) const
385 {
386 _GLIBCXX_DEBUG_ASSERT( ready() );
387 regex_traits<char_type> __traits;
388 typedef std::ctype<char_type> __ctype_type;
389 const __ctype_type&
390 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
391
392 auto __output = [&](size_t __idx)
393 {
394 auto& __sub = _Base_type::operator[](__idx);
395 if (__sub.matched)
396 std::copy(__sub.first, __sub.second, __out);
397 };
398
399 if (__flags & regex_constants::format_sed)
400 {
401 for (; __fmt_first != __fmt_last;)
402 if (*__fmt_first == '&')
403 {
404 __output(0);
405 ++__fmt_first;
406 }
407 else if (*__fmt_first == '\\')
408 {
409 if (++__fmt_first != __fmt_last
410 && __fctyp.is(__ctype_type::digit, *__fmt_first))
411 __output(__traits.value(*__fmt_first++, 10));
412 else
413 *__out++ = '\\';
414 }
415 else
416 *__out++ = *__fmt_first++;
417 }
418 else
419 {
420 while (1)
421 {
422 auto __next = std::find(__fmt_first, __fmt_last, '$');
423 if (__next == __fmt_last)
424 break;
425
426 std::copy(__fmt_first, __next, __out);
427
428 auto __eat = [&](char __ch) -> bool
429 {
430 if (*__next == __ch)
431 {
432 ++__next;
433 return true;
434 }
435 return false;
436 };
437
438 if (++__next == __fmt_last)
439 *__out++ = '$';
440 else if (__eat('$'))
441 *__out++ = '$';
442 else if (__eat('&'))
443 __output(0);
444 else if (__eat('`'))
445 __output(_Base_type::size()-2);
446 else if (__eat('\''))
447 __output(_Base_type::size()-1);
448 else if (__fctyp.is(__ctype_type::digit, *__next))
449 {
450 long __num = __traits.value(*__next, 10);
451 if (++__next != __fmt_last
452 && __fctyp.is(__ctype_type::digit, *__next))
453 {
454 __num *= 10;
455 __num += __traits.value(*__next++, 10);
456 }
457 if (0 <= __num && __num < this->size())
458 __output(__num);
459 }
460 else
461 *__out++ = '$';
462 __fmt_first = __next;
463 }
464 std::copy(__fmt_first, __fmt_last, __out);
465 }
466 return __out;
467 }
468
469 template<typename _Out_iter, typename _Bi_iter,
470 typename _Rx_traits, typename _Ch_type>
471 _Out_iter
472 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
473 const basic_regex<_Ch_type, _Rx_traits>& __e,
474 const _Ch_type* __fmt,
475 regex_constants::match_flag_type __flags)
476 {
477 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
478 _IterT __i(__first, __last, __e, __flags);
479 _IterT __end;
480 if (__i == __end)
481 {
482 if (!(__flags & regex_constants::format_no_copy))
483 std::copy(__first, __last, __out);
484 }
485 else
486 {
487 sub_match<_Bi_iter> __last;
488 auto __len = char_traits<_Ch_type>::length(__fmt);
489 for (; __i != __end; ++__i)
490 {
491 if (!(__flags & regex_constants::format_no_copy))
492 std::copy(__i->prefix().first, __i->prefix().second, __out);
493 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
494 __last = __i->suffix();
495 if (__flags & regex_constants::format_first_only)
496 break;
497 }
498 if (!(__flags & regex_constants::format_no_copy))
499 std::copy(__last.first, __last.second, __out);
500 }
501 return __out;
502 }
503
504 template<typename _Bi_iter,
505 typename _Ch_type,
506 typename _Rx_traits>
507 bool
508 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
509 operator==(const regex_iterator& __rhs) const
510 {
511 return (_M_match.empty() && __rhs._M_match.empty())
512 || (_M_begin == __rhs._M_begin
513 && _M_end == __rhs._M_end
514 && _M_pregex == __rhs._M_pregex
515 && _M_flags == __rhs._M_flags
516 && _M_match[0] == __rhs._M_match[0]);
517 }
518
519 template<typename _Bi_iter,
520 typename _Ch_type,
521 typename _Rx_traits>
522 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
523 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
524 operator++()
525 {
526 // In all cases in which the call to regex_search returns true,
527 // match.prefix().first shall be equal to the previous value of
528 // match[0].second, and for each index i in the half-open range
529 // [0, match.size()) for which match[i].matched is true,
530 // match[i].position() shall return distance(begin, match[i].first).
531 // [28.12.1.4.5]
532 if (_M_match[0].matched)
533 {
534 auto __start = _M_match[0].second;
535 auto __prefix_first = _M_match[0].second;
536 if (_M_match[0].first == _M_match[0].second)
537 {
538 if (__start == _M_end)
539 {
540 _M_match = value_type();
541 return *this;
542 }
543 else
544 {
545 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
546 _M_flags
547 | regex_constants::match_not_null
548 | regex_constants::match_continuous))
549 {
550 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
551 _M_match.at(_M_match.size()).first = __prefix_first;
552 _M_match._M_in_iterator = true;
553 _M_match._M_begin = _M_begin;
554 return *this;
555 }
556 else
557 ++__start;
558 }
559 }
560 _M_flags |= regex_constants::match_prev_avail;
561 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
562 {
563 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
564 _M_match.at(_M_match.size()).first = __prefix_first;
565 _M_match._M_in_iterator = true;
566 _M_match._M_begin = _M_begin;
567 }
568 else
569 _M_match = value_type();
570 }
571 return *this;
572 }
573
574 template<typename _Bi_iter,
575 typename _Ch_type,
576 typename _Rx_traits>
577 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
578 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
579 operator=(const regex_token_iterator& __rhs)
580 {
581 _M_position = __rhs._M_position;
582 _M_subs = __rhs._M_subs;
583 _M_n = __rhs._M_n;
584 _M_result = __rhs._M_result;
585 _M_suffix = __rhs._M_suffix;
586 _M_has_m1 = __rhs._M_has_m1;
587 if (__rhs._M_result == &__rhs._M_suffix)
588 _M_result = &_M_suffix;
589 return *this;
590 }
591
592 template<typename _Bi_iter,
593 typename _Ch_type,
594 typename _Rx_traits>
595 bool
596 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
597 operator==(const regex_token_iterator& __rhs) const
598 {
599 if (_M_end_of_seq() && __rhs._M_end_of_seq())
600 return true;
601 if (_M_suffix.matched && __rhs._M_suffix.matched
602 && _M_suffix == __rhs._M_suffix)
603 return true;
604 if (_M_end_of_seq() || _M_suffix.matched
605 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
606 return false;
607 return _M_position == __rhs._M_position
608 && _M_n == __rhs._M_n
609 && _M_subs == __rhs._M_subs;
610 }
611
612 template<typename _Bi_iter,
613 typename _Ch_type,
614 typename _Rx_traits>
615 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
616 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
617 operator++()
618 {
619 _Position __prev = _M_position;
620 if (_M_suffix.matched)
621 *this = regex_token_iterator();
622 else if (_M_n + 1 < _M_subs.size())
623 {
624 _M_n++;
625 _M_result = &_M_current_match();
626 }
627 else
628 {
629 _M_n = 0;
630 ++_M_position;
631 if (_M_position != _Position())
632 _M_result = &_M_current_match();
633 else if (_M_has_m1 && __prev->suffix().length() != 0)
634 {
635 _M_suffix.matched = true;
636 _M_suffix.first = __prev->suffix().first;
637 _M_suffix.second = __prev->suffix().second;
638 _M_result = &_M_suffix;
639 }
640 else
641 *this = regex_token_iterator();
642 }
643 return *this;
644 }
645
646 template<typename _Bi_iter,
647 typename _Ch_type,
648 typename _Rx_traits>
649 void
650 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
651 _M_init(_Bi_iter __a, _Bi_iter __b)
652 {
653 _M_has_m1 = false;
654 for (auto __it : _M_subs)
655 if (__it == -1)
656 {
657 _M_has_m1 = true;
658 break;
659 }
660 if (_M_position != _Position())
661 _M_result = &_M_current_match();
662 else if (_M_has_m1)
663 {
664 _M_suffix.matched = true;
665 _M_suffix.first = __a;
666 _M_suffix.second = __b;
667 _M_result = &_M_suffix;
668 }
669 else
670 _M_result = nullptr;
671 }
672
673 _GLIBCXX_END_NAMESPACE_VERSION
674 } // namespace
675