0576422b5d91900e6d67a9b7436d1b0cbb42de05
[gcc.git] / libstdc++-v3 / include / bits / regex.tcc
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35 namespace __detail
36 {
37 _GLIBCXX_BEGIN_NAMESPACE_VERSION
38
39 // Result of merging regex_match and regex_search.
40 //
41 // __policy now can be _S_auto(auto dispatch by checking back-references)
42 // and _S_force_dfs(just use _DFSExecutor).
43 //
44 // That __match_mode is true means regex_match, else regex_search.
45 template<typename _BiIter, typename _Alloc,
46 typename _CharT, typename _TraitsT,
47 _RegexExecutorPolicy __policy,
48 bool __match_mode>
49 bool
50 __regex_algo_impl(_BiIter __s,
51 _BiIter __e,
52 match_results<_BiIter, _Alloc>& __m,
53 const basic_regex<_CharT, _TraitsT>& __re,
54 regex_constants::match_flag_type __flags)
55 {
56 if (__re._M_automaton == nullptr)
57 return false;
58
59 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
60 __res.resize(__re._M_automaton->_M_sub_count() + 2);
61 for (auto& __it : __res)
62 __it.matched = false;
63
64 typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
65 _ExecutorPtr;
66 typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
67 typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT;
68
69 _ExecutorPtr __executor =
70 __get_executor<_BiIter, _Alloc, _CharT, _TraitsT,
71 __policy>(__s, __e, __res, __re, __flags);
72
73 bool __ret;
74 if (__match_mode)
75 __ret = __executor->_M_match();
76 else
77 __ret = __executor->_M_search();
78 if (__ret)
79 {
80 for (auto __it : __res)
81 if (!__it.matched)
82 __it.first = __it.second = __e;
83 auto& __pre = __res[__res.size()-2];
84 auto& __suf = __res[__res.size()-1];
85 if (__match_mode)
86 {
87 __pre.matched = false;
88 __pre.first = __s;
89 __pre.second = __s;
90 __suf.matched = false;
91 __suf.first = __e;
92 __suf.second = __e;
93 }
94 else
95 {
96 __pre.first = __s;
97 __pre.second = __res[0].first;
98 __pre.matched = (__pre.first != __pre.second);
99 __suf.first = __res[0].second;
100 __suf.second = __e;
101 __suf.matched = (__suf.first != __suf.second);
102 }
103 if (__re.flags() & regex_constants::nosubs)
104 __res.resize(3);
105 }
106 return __ret;
107 }
108
109 _GLIBCXX_END_NAMESPACE_VERSION
110 }
111
112 template<typename _Ch_type>
113 template<typename _Fwd_iter>
114 typename regex_traits<_Ch_type>::string_type
115 regex_traits<_Ch_type>::
116 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
117 {
118 typedef std::ctype<char_type> __ctype_type;
119 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
120
121 static const char* __collatenames[] =
122 {
123 "NUL",
124 "SOH",
125 "STX",
126 "ETX",
127 "EOT",
128 "ENQ",
129 "ACK",
130 "alert",
131 "backspace",
132 "tab",
133 "newline",
134 "vertical-tab",
135 "form-feed",
136 "carriage-return",
137 "SO",
138 "SI",
139 "DLE",
140 "DC1",
141 "DC2",
142 "DC3",
143 "DC4",
144 "NAK",
145 "SYN",
146 "ETB",
147 "CAN",
148 "EM",
149 "SUB",
150 "ESC",
151 "IS4",
152 "IS3",
153 "IS2",
154 "IS1",
155 "space",
156 "exclamation-mark",
157 "quotation-mark",
158 "number-sign",
159 "dollar-sign",
160 "percent-sign",
161 "ampersand",
162 "apostrophe",
163 "left-parenthesis",
164 "right-parenthesis",
165 "asterisk",
166 "plus-sign",
167 "comma",
168 "hyphen",
169 "period",
170 "slash",
171 "zero",
172 "one",
173 "two",
174 "three",
175 "four",
176 "five",
177 "six",
178 "seven",
179 "eight",
180 "nine",
181 "colon",
182 "semicolon",
183 "less-than-sign",
184 "equals-sign",
185 "greater-than-sign",
186 "question-mark",
187 "commercial-at",
188 "A",
189 "B",
190 "C",
191 "D",
192 "E",
193 "F",
194 "G",
195 "H",
196 "I",
197 "J",
198 "K",
199 "L",
200 "M",
201 "N",
202 "O",
203 "P",
204 "Q",
205 "R",
206 "S",
207 "T",
208 "U",
209 "V",
210 "W",
211 "X",
212 "Y",
213 "Z",
214 "left-square-bracket",
215 "backslash",
216 "right-square-bracket",
217 "circumflex",
218 "underscore",
219 "grave-accent",
220 "a",
221 "b",
222 "c",
223 "d",
224 "e",
225 "f",
226 "g",
227 "h",
228 "i",
229 "j",
230 "k",
231 "l",
232 "m",
233 "n",
234 "o",
235 "p",
236 "q",
237 "r",
238 "s",
239 "t",
240 "u",
241 "v",
242 "w",
243 "x",
244 "y",
245 "z",
246 "left-curly-bracket",
247 "vertical-line",
248 "right-curly-bracket",
249 "tilde",
250 "DEL",
251 ""
252 };
253
254 // same as boost
255 //static const char* __digraphs[] =
256 // {
257 // "ae",
258 // "Ae",
259 // "AE",
260 // "ch",
261 // "Ch",
262 // "CH",
263 // "ll",
264 // "Ll",
265 // "LL",
266 // "ss",
267 // "Ss",
268 // "SS",
269 // "nj",
270 // "Nj",
271 // "NJ",
272 // "dz",
273 // "Dz",
274 // "DZ",
275 // "lj",
276 // "Lj",
277 // "LJ",
278 // ""
279 // };
280
281 std::string __s(__last - __first, '?');
282 __fctyp.narrow(__first, __last, '?', &*__s.begin());
283
284 for (unsigned int __i = 0; *__collatenames[__i]; __i++)
285 if (__s == __collatenames[__i])
286 return string_type(1, __fctyp.widen(static_cast<char>(__i)));
287
288 //for (unsigned int __i = 0; *__digraphs[__i]; __i++)
289 // {
290 // const char* __now = __digraphs[__i];
291 // if (__s == __now)
292 // {
293 // string_type ret(__s.size(), __fctyp.widen('?'));
294 // __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin());
295 // return ret;
296 // }
297 // }
298 return string_type();
299 }
300
301 template<typename _Ch_type>
302 template<typename _Fwd_iter>
303 typename regex_traits<_Ch_type>::char_class_type
304 regex_traits<_Ch_type>::
305 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
306 {
307 typedef std::ctype<char_type> __ctype_type;
308 typedef std::ctype<char> __cctype_type;
309 typedef const pair<const char*, char_class_type> _ClassnameEntry;
310 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
311 const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale));
312
313 static _ClassnameEntry __classnames[] =
314 {
315 {"d", ctype_base::digit},
316 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
317 {"s", ctype_base::space},
318 {"alnum", ctype_base::alnum},
319 {"alpha", ctype_base::alpha},
320 {"blank", {0, _RegexMask::_S_blank}},
321 {"cntrl", ctype_base::cntrl},
322 {"digit", ctype_base::digit},
323 {"graph", ctype_base::graph},
324 {"lower", ctype_base::lower},
325 {"print", ctype_base::print},
326 {"punct", ctype_base::punct},
327 {"space", ctype_base::space},
328 {"upper", ctype_base::upper},
329 {"xdigit", ctype_base::xdigit},
330 };
331
332 std::string __s(__last - __first, '?');
333 __fctyp.narrow(__first, __last, '?', &__s[0]);
334 __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size());
335 for (_ClassnameEntry* __it = __classnames;
336 __it < *(&__classnames + 1);
337 ++__it)
338 {
339 if (__s == __it->first)
340 {
341 if (__icase
342 && ((__it->second
343 & (ctype_base::lower | ctype_base::upper)) != 0))
344 return ctype_base::alpha;
345 return __it->second;
346 }
347 }
348 return 0;
349 }
350
351 template<typename _Ch_type>
352 bool
353 regex_traits<_Ch_type>::
354 isctype(_Ch_type __c, char_class_type __f) const
355 {
356 typedef std::ctype<char_type> __ctype_type;
357 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
358
359 return __fctyp.is(__f._M_base, __c)
360 // [[:w:]]
361 || ((__f._M_extended & _RegexMask::_S_under)
362 && __c == __fctyp.widen('_'))
363 // [[:blank:]]
364 || ((__f._M_extended & _RegexMask::_S_blank)
365 && (__c == __fctyp.widen(' ')
366 || __c == __fctyp.widen('\t')));
367 }
368
369 template<typename _Ch_type>
370 int
371 regex_traits<_Ch_type>::
372 value(_Ch_type __ch, int __radix) const
373 {
374 std::basic_istringstream<char_type> __is(string_type(1, __ch));
375 long __v;
376 if (__radix == 8)
377 __is >> std::oct;
378 else if (__radix == 16)
379 __is >> std::hex;
380 __is >> __v;
381 return __is.fail() ? -1 : __v;
382 }
383
384 template<typename _Bi_iter, typename _Alloc>
385 template<typename _Out_iter>
386 _Out_iter match_results<_Bi_iter, _Alloc>::
387 format(_Out_iter __out,
388 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
389 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
390 match_flag_type __flags) const
391 {
392 _GLIBCXX_DEBUG_ASSERT( ready() );
393 regex_traits<char_type> __traits;
394 typedef std::ctype<char_type> __ctype_type;
395 const __ctype_type&
396 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
397
398 auto __output = [&](size_t __idx)
399 {
400 auto& __sub = _Base_type::operator[](__idx);
401 if (__sub.matched)
402 std::copy(__sub.first, __sub.second, __out);
403 };
404
405 if (__flags & regex_constants::format_sed)
406 {
407 for (; __fmt_first != __fmt_last;)
408 if (*__fmt_first == '&')
409 {
410 __output(0);
411 ++__fmt_first;
412 }
413 else if (*__fmt_first == '\\')
414 {
415 if (++__fmt_first != __fmt_last
416 && __fctyp.is(__ctype_type::digit, *__fmt_first))
417 __output(__traits.value(*__fmt_first++, 10));
418 else
419 *__out++ = '\\';
420 }
421 else
422 *__out++ = *__fmt_first++;
423 }
424 else
425 {
426 while (1)
427 {
428 auto __next = std::find(__fmt_first, __fmt_last, '$');
429 if (__next == __fmt_last)
430 break;
431
432 std::copy(__fmt_first, __next, __out);
433
434 auto __eat = [&](char __ch) -> bool
435 {
436 if (*__next == __ch)
437 {
438 ++__next;
439 return true;
440 }
441 return false;
442 };
443
444 if (++__next == __fmt_last)
445 *__out++ = '$';
446 else if (__eat('$'))
447 *__out++ = '$';
448 else if (__eat('&'))
449 __output(0);
450 else if (__eat('`'))
451 __output(_Base_type::size()-2);
452 else if (__eat('\''))
453 __output(_Base_type::size()-1);
454 else if (__fctyp.is(__ctype_type::digit, *__next))
455 {
456 long __num = __traits.value(*__next, 10);
457 if (++__next != __fmt_last
458 && __fctyp.is(__ctype_type::digit, *__next))
459 {
460 __num *= 10;
461 __num += __traits.value(*__next++, 10);
462 }
463 if (0 <= __num && __num < this->size())
464 __output(__num);
465 }
466 else
467 *__out++ = '$';
468 __fmt_first = __next;
469 }
470 std::copy(__fmt_first, __fmt_last, __out);
471 }
472 return __out;
473 }
474
475 template<typename _Out_iter, typename _Bi_iter,
476 typename _Rx_traits, typename _Ch_type>
477 _Out_iter
478 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
479 const basic_regex<_Ch_type, _Rx_traits>& __e,
480 const _Ch_type* __fmt,
481 regex_constants::match_flag_type __flags)
482 {
483 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
484 _IterT __i(__first, __last, __e, __flags);
485 _IterT __end;
486 if (__i == __end)
487 {
488 if (!(__flags & regex_constants::format_no_copy))
489 std::copy(__first, __last, __out);
490 }
491 else
492 {
493 sub_match<_Bi_iter> __last;
494 auto __len = char_traits<_Ch_type>::length(__fmt);
495 for (; __i != __end; ++__i)
496 {
497 if (!(__flags & regex_constants::format_no_copy))
498 std::copy(__i->prefix().first, __i->prefix().second, __out);
499 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
500 __last = __i->suffix();
501 if (__flags & regex_constants::format_first_only)
502 break;
503 }
504 if (!(__flags & regex_constants::format_no_copy))
505 std::copy(__last.first, __last.second, __out);
506 }
507 return __out;
508 }
509
510 template<typename _Bi_iter,
511 typename _Ch_type,
512 typename _Rx_traits>
513 bool
514 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
515 operator==(const regex_iterator& __rhs) const
516 {
517 return (_M_match.empty() && __rhs._M_match.empty())
518 || (_M_begin == __rhs._M_begin
519 && _M_end == __rhs._M_end
520 && _M_pregex == __rhs._M_pregex
521 && _M_flags == __rhs._M_flags
522 && _M_match[0] == __rhs._M_match[0]);
523 }
524
525 template<typename _Bi_iter,
526 typename _Ch_type,
527 typename _Rx_traits>
528 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
529 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
530 operator++()
531 {
532 // In all cases in which the call to regex_search returns true,
533 // match.prefix().first shall be equal to the previous value of
534 // match[0].second, and for each index i in the half-open range
535 // [0, match.size()) for which match[i].matched is true,
536 // match[i].position() shall return distance(begin, match[i].first).
537 // [28.12.1.4.5]
538 if (_M_match[0].matched)
539 {
540 auto __start = _M_match[0].second;
541 auto __prefix_first = _M_match[0].second;
542 if (_M_match[0].first == _M_match[0].second)
543 if (__start == _M_end)
544 {
545 _M_match = value_type();
546 return *this;
547 }
548 else
549 {
550 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags
551 | regex_constants::match_not_null
552 | regex_constants::match_continuous))
553 {
554 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
555 _M_match.at(_M_match.size()).first = __prefix_first;
556 _M_match._M_in_iterator = true;
557 _M_match._M_begin = _M_begin;
558 return *this;
559 }
560 else
561 ++__start;
562 }
563 _M_flags |= regex_constants::match_prev_avail;
564 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
565 {
566 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
567 _M_match.at(_M_match.size()).first = __prefix_first;
568 _M_match._M_in_iterator = true;
569 _M_match._M_begin = _M_begin;
570 }
571 else
572 _M_match = value_type();
573 }
574 return *this;
575 }
576
577 template<typename _Bi_iter,
578 typename _Ch_type,
579 typename _Rx_traits>
580 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
581 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
582 operator=(const regex_token_iterator& __rhs)
583 {
584 _M_position = __rhs._M_position;
585 _M_subs = __rhs._M_subs;
586 _M_n = __rhs._M_n;
587 _M_result = __rhs._M_result;
588 _M_suffix = __rhs._M_suffix;
589 _M_has_m1 = __rhs._M_has_m1;
590 if (__rhs._M_result == &__rhs._M_suffix)
591 _M_result = &_M_suffix;
592 return *this;
593 }
594
595 template<typename _Bi_iter,
596 typename _Ch_type,
597 typename _Rx_traits>
598 bool
599 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
600 operator==(const regex_token_iterator& __rhs) const
601 {
602 if (_M_end_of_seq() && __rhs._M_end_of_seq())
603 return true;
604 if (_M_suffix.matched && __rhs._M_suffix.matched
605 && _M_suffix == __rhs._M_suffix)
606 return true;
607 if (_M_end_of_seq() || _M_suffix.matched
608 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
609 return false;
610 return _M_position == __rhs._M_position
611 && _M_n == __rhs._M_n
612 && _M_subs == __rhs._M_subs;
613 }
614
615 template<typename _Bi_iter,
616 typename _Ch_type,
617 typename _Rx_traits>
618 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
619 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
620 operator++()
621 {
622 _Position __prev = _M_position;
623 if (_M_suffix.matched)
624 *this = regex_token_iterator();
625 else if (_M_n + 1 < _M_subs.size())
626 {
627 _M_n++;
628 _M_result = &_M_current_match();
629 }
630 else
631 {
632 _M_n = 0;
633 ++_M_position;
634 if (_M_position != _Position())
635 _M_result = &_M_current_match();
636 else if (_M_has_m1 && __prev->suffix().length() != 0)
637 {
638 _M_suffix.matched = true;
639 _M_suffix.first = __prev->suffix().first;
640 _M_suffix.second = __prev->suffix().second;
641 _M_result = &_M_suffix;
642 }
643 else
644 *this = regex_token_iterator();
645 }
646 return *this;
647 }
648
649 template<typename _Bi_iter,
650 typename _Ch_type,
651 typename _Rx_traits>
652 void
653 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
654 _M_init(_Bi_iter __a, _Bi_iter __b)
655 {
656 _M_has_m1 = false;
657 for (auto __it : _M_subs)
658 if (__it == -1)
659 {
660 _M_has_m1 = true;
661 break;
662 }
663 if (_M_position != _Position())
664 _M_result = &_M_current_match();
665 else if (_M_has_m1)
666 {
667 _M_suffix.matched = true;
668 _M_suffix.first = __a;
669 _M_suffix.second = __b;
670 _M_result = &_M_suffix;
671 }
672 else
673 _M_result = nullptr;
674 }
675
676 _GLIBCXX_END_NAMESPACE_VERSION
677 } // namespace
678