re PR libstdc++/63775 ([C++11] Regex range with leading dash (-) not working)
[gcc.git] / libstdc++-v3 / include / bits / regex.tcc
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 // A non-standard switch to let the user pick the matching algorithm.
32 // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
33 // algorithm will be used. This algorithm is not enabled by default,
34 // and cannot be used if the regex contains back-references, but has better
35 // (polynomial instead of exponential) worst case performance.
36 // See __regex_algo_impl below.
37
38 namespace std _GLIBCXX_VISIBILITY(default)
39 {
40 namespace __detail
41 {
42 _GLIBCXX_BEGIN_NAMESPACE_VERSION
43
44 // Result of merging regex_match and regex_search.
45 //
46 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
47 // the other one if possible, for test purpose).
48 //
49 // That __match_mode is true means regex_match, else regex_search.
50 template<typename _BiIter, typename _Alloc,
51 typename _CharT, typename _TraitsT,
52 _RegexExecutorPolicy __policy,
53 bool __match_mode>
54 bool
55 __regex_algo_impl(_BiIter __s,
56 _BiIter __e,
57 match_results<_BiIter, _Alloc>& __m,
58 const basic_regex<_CharT, _TraitsT>& __re,
59 regex_constants::match_flag_type __flags)
60 {
61 if (__re._M_automaton == nullptr)
62 return false;
63
64 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
65 __res.resize(__re._M_automaton->_M_sub_count() + 2);
66 for (auto& __it : __res)
67 __it.matched = false;
68
69 // __policy is used by testsuites so that they can use Thompson NFA
70 // without defining a macro. Users should define
71 // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
72 bool __ret;
73 if (!__re._M_automaton->_M_has_backref
74 && !(__re._M_flags & regex_constants::ECMAScript)
75 #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
76 && __policy == _RegexExecutorPolicy::_S_alternate
77 #endif
78 )
79 {
80 _Executor<_BiIter, _Alloc, _TraitsT, false>
81 __executor(__s, __e, __m, __re, __flags);
82 if (__match_mode)
83 __ret = __executor._M_match();
84 else
85 __ret = __executor._M_search();
86 }
87 else
88 {
89 _Executor<_BiIter, _Alloc, _TraitsT, true>
90 __executor(__s, __e, __m, __re, __flags);
91 if (__match_mode)
92 __ret = __executor._M_match();
93 else
94 __ret = __executor._M_search();
95 }
96 if (__ret)
97 {
98 for (auto __it : __res)
99 if (!__it.matched)
100 __it.first = __it.second = __e;
101 auto& __pre = __res[__res.size()-2];
102 auto& __suf = __res[__res.size()-1];
103 if (__match_mode)
104 {
105 __pre.matched = false;
106 __pre.first = __s;
107 __pre.second = __s;
108 __suf.matched = false;
109 __suf.first = __e;
110 __suf.second = __e;
111 }
112 else
113 {
114 __pre.first = __s;
115 __pre.second = __res[0].first;
116 __pre.matched = (__pre.first != __pre.second);
117 __suf.first = __res[0].second;
118 __suf.second = __e;
119 __suf.matched = (__suf.first != __suf.second);
120 }
121 }
122 return __ret;
123 }
124
125 _GLIBCXX_END_NAMESPACE_VERSION
126 }
127
128 _GLIBCXX_BEGIN_NAMESPACE_VERSION
129
130 template<typename _Ch_type>
131 template<typename _Fwd_iter>
132 typename regex_traits<_Ch_type>::string_type
133 regex_traits<_Ch_type>::
134 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
135 {
136 typedef std::ctype<char_type> __ctype_type;
137 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
138
139 static const char* __collatenames[] =
140 {
141 "NUL",
142 "SOH",
143 "STX",
144 "ETX",
145 "EOT",
146 "ENQ",
147 "ACK",
148 "alert",
149 "backspace",
150 "tab",
151 "newline",
152 "vertical-tab",
153 "form-feed",
154 "carriage-return",
155 "SO",
156 "SI",
157 "DLE",
158 "DC1",
159 "DC2",
160 "DC3",
161 "DC4",
162 "NAK",
163 "SYN",
164 "ETB",
165 "CAN",
166 "EM",
167 "SUB",
168 "ESC",
169 "IS4",
170 "IS3",
171 "IS2",
172 "IS1",
173 "space",
174 "exclamation-mark",
175 "quotation-mark",
176 "number-sign",
177 "dollar-sign",
178 "percent-sign",
179 "ampersand",
180 "apostrophe",
181 "left-parenthesis",
182 "right-parenthesis",
183 "asterisk",
184 "plus-sign",
185 "comma",
186 "hyphen",
187 "period",
188 "slash",
189 "zero",
190 "one",
191 "two",
192 "three",
193 "four",
194 "five",
195 "six",
196 "seven",
197 "eight",
198 "nine",
199 "colon",
200 "semicolon",
201 "less-than-sign",
202 "equals-sign",
203 "greater-than-sign",
204 "question-mark",
205 "commercial-at",
206 "A",
207 "B",
208 "C",
209 "D",
210 "E",
211 "F",
212 "G",
213 "H",
214 "I",
215 "J",
216 "K",
217 "L",
218 "M",
219 "N",
220 "O",
221 "P",
222 "Q",
223 "R",
224 "S",
225 "T",
226 "U",
227 "V",
228 "W",
229 "X",
230 "Y",
231 "Z",
232 "left-square-bracket",
233 "backslash",
234 "right-square-bracket",
235 "circumflex",
236 "underscore",
237 "grave-accent",
238 "a",
239 "b",
240 "c",
241 "d",
242 "e",
243 "f",
244 "g",
245 "h",
246 "i",
247 "j",
248 "k",
249 "l",
250 "m",
251 "n",
252 "o",
253 "p",
254 "q",
255 "r",
256 "s",
257 "t",
258 "u",
259 "v",
260 "w",
261 "x",
262 "y",
263 "z",
264 "left-curly-bracket",
265 "vertical-line",
266 "right-curly-bracket",
267 "tilde",
268 "DEL",
269 ""
270 };
271
272 // same as boost
273 //static const char* __digraphs[] =
274 // {
275 // "ae",
276 // "Ae",
277 // "AE",
278 // "ch",
279 // "Ch",
280 // "CH",
281 // "ll",
282 // "Ll",
283 // "LL",
284 // "ss",
285 // "Ss",
286 // "SS",
287 // "nj",
288 // "Nj",
289 // "NJ",
290 // "dz",
291 // "Dz",
292 // "DZ",
293 // "lj",
294 // "Lj",
295 // "LJ",
296 // ""
297 // };
298
299 std::string __s(__last - __first, '?');
300 __fctyp.narrow(__first, __last, '?', &*__s.begin());
301
302 for (unsigned int __i = 0; *__collatenames[__i]; __i++)
303 if (__s == __collatenames[__i])
304 return string_type(1, __fctyp.widen(static_cast<char>(__i)));
305
306 //for (unsigned int __i = 0; *__digraphs[__i]; __i++)
307 // {
308 // const char* __now = __digraphs[__i];
309 // if (__s == __now)
310 // {
311 // string_type ret(__s.size(), __fctyp.widen('?'));
312 // __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin());
313 // return ret;
314 // }
315 // }
316 return string_type();
317 }
318
319 template<typename _Ch_type>
320 template<typename _Fwd_iter>
321 typename regex_traits<_Ch_type>::char_class_type
322 regex_traits<_Ch_type>::
323 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
324 {
325 typedef std::ctype<char_type> __ctype_type;
326 typedef std::ctype<char> __cctype_type;
327 typedef const pair<const char*, char_class_type> _ClassnameEntry;
328 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
329 const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale));
330
331 static _ClassnameEntry __classnames[] =
332 {
333 {"d", ctype_base::digit},
334 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
335 {"s", ctype_base::space},
336 {"alnum", ctype_base::alnum},
337 {"alpha", ctype_base::alpha},
338 {"blank", ctype_base::blank},
339 {"cntrl", ctype_base::cntrl},
340 {"digit", ctype_base::digit},
341 {"graph", ctype_base::graph},
342 {"lower", ctype_base::lower},
343 {"print", ctype_base::print},
344 {"punct", ctype_base::punct},
345 {"space", ctype_base::space},
346 {"upper", ctype_base::upper},
347 {"xdigit", ctype_base::xdigit},
348 };
349
350 std::string __s(__last - __first, '?');
351 __fctyp.narrow(__first, __last, '?', &__s[0]);
352 __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size());
353 for (_ClassnameEntry* __it = __classnames;
354 __it < *(&__classnames + 1);
355 ++__it)
356 {
357 if (__s == __it->first)
358 {
359 if (__icase
360 && ((__it->second
361 & (ctype_base::lower | ctype_base::upper)) != 0))
362 return ctype_base::alpha;
363 return __it->second;
364 }
365 }
366 return 0;
367 }
368
369 template<typename _Ch_type>
370 bool
371 regex_traits<_Ch_type>::
372 isctype(_Ch_type __c, char_class_type __f) const
373 {
374 typedef std::ctype<char_type> __ctype_type;
375 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
376
377 return __fctyp.is(__f._M_base, __c)
378 // [[:w:]]
379 || ((__f._M_extended & _RegexMask::_S_under)
380 && __c == __fctyp.widen('_'));
381 }
382
383 template<typename _Ch_type>
384 int
385 regex_traits<_Ch_type>::
386 value(_Ch_type __ch, int __radix) const
387 {
388 std::basic_istringstream<char_type> __is(string_type(1, __ch));
389 long __v;
390 if (__radix == 8)
391 __is >> std::oct;
392 else if (__radix == 16)
393 __is >> std::hex;
394 __is >> __v;
395 return __is.fail() ? -1 : __v;
396 }
397
398 template<typename _Bi_iter, typename _Alloc>
399 template<typename _Out_iter>
400 _Out_iter match_results<_Bi_iter, _Alloc>::
401 format(_Out_iter __out,
402 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
403 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
404 match_flag_type __flags) const
405 {
406 _GLIBCXX_DEBUG_ASSERT( ready() );
407 regex_traits<char_type> __traits;
408 typedef std::ctype<char_type> __ctype_type;
409 const __ctype_type&
410 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
411
412 auto __output = [&](size_t __idx)
413 {
414 auto& __sub = _Base_type::operator[](__idx);
415 if (__sub.matched)
416 __out = std::copy(__sub.first, __sub.second, __out);
417 };
418
419 if (__flags & regex_constants::format_sed)
420 {
421 for (; __fmt_first != __fmt_last;)
422 if (*__fmt_first == '&')
423 {
424 __output(0);
425 ++__fmt_first;
426 }
427 else if (*__fmt_first == '\\')
428 {
429 if (++__fmt_first != __fmt_last
430 && __fctyp.is(__ctype_type::digit, *__fmt_first))
431 __output(__traits.value(*__fmt_first++, 10));
432 else
433 *__out++ = '\\';
434 }
435 else
436 *__out++ = *__fmt_first++;
437 }
438 else
439 {
440 while (1)
441 {
442 auto __next = std::find(__fmt_first, __fmt_last, '$');
443 if (__next == __fmt_last)
444 break;
445
446 __out = std::copy(__fmt_first, __next, __out);
447
448 auto __eat = [&](char __ch) -> bool
449 {
450 if (*__next == __ch)
451 {
452 ++__next;
453 return true;
454 }
455 return false;
456 };
457
458 if (++__next == __fmt_last)
459 *__out++ = '$';
460 else if (__eat('$'))
461 *__out++ = '$';
462 else if (__eat('&'))
463 __output(0);
464 else if (__eat('`'))
465 __output(_Base_type::size()-2);
466 else if (__eat('\''))
467 __output(_Base_type::size()-1);
468 else if (__fctyp.is(__ctype_type::digit, *__next))
469 {
470 long __num = __traits.value(*__next, 10);
471 if (++__next != __fmt_last
472 && __fctyp.is(__ctype_type::digit, *__next))
473 {
474 __num *= 10;
475 __num += __traits.value(*__next++, 10);
476 }
477 if (0 <= __num && __num < this->size())
478 __output(__num);
479 }
480 else
481 *__out++ = '$';
482 __fmt_first = __next;
483 }
484 __out = std::copy(__fmt_first, __fmt_last, __out);
485 }
486 return __out;
487 }
488
489 template<typename _Out_iter, typename _Bi_iter,
490 typename _Rx_traits, typename _Ch_type>
491 _Out_iter
492 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
493 const basic_regex<_Ch_type, _Rx_traits>& __e,
494 const _Ch_type* __fmt,
495 regex_constants::match_flag_type __flags)
496 {
497 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
498 _IterT __i(__first, __last, __e, __flags);
499 _IterT __end;
500 if (__i == __end)
501 {
502 if (!(__flags & regex_constants::format_no_copy))
503 __out = std::copy(__first, __last, __out);
504 }
505 else
506 {
507 sub_match<_Bi_iter> __last;
508 auto __len = char_traits<_Ch_type>::length(__fmt);
509 for (; __i != __end; ++__i)
510 {
511 if (!(__flags & regex_constants::format_no_copy))
512 __out = std::copy(__i->prefix().first, __i->prefix().second,
513 __out);
514 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
515 __last = __i->suffix();
516 if (__flags & regex_constants::format_first_only)
517 break;
518 }
519 if (!(__flags & regex_constants::format_no_copy))
520 __out = std::copy(__last.first, __last.second, __out);
521 }
522 return __out;
523 }
524
525 template<typename _Bi_iter,
526 typename _Ch_type,
527 typename _Rx_traits>
528 bool
529 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
530 operator==(const regex_iterator& __rhs) const
531 {
532 return (_M_match.empty() && __rhs._M_match.empty())
533 || (_M_begin == __rhs._M_begin
534 && _M_end == __rhs._M_end
535 && _M_pregex == __rhs._M_pregex
536 && _M_flags == __rhs._M_flags
537 && _M_match[0] == __rhs._M_match[0]);
538 }
539
540 template<typename _Bi_iter,
541 typename _Ch_type,
542 typename _Rx_traits>
543 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
544 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
545 operator++()
546 {
547 // In all cases in which the call to regex_search returns true,
548 // match.prefix().first shall be equal to the previous value of
549 // match[0].second, and for each index i in the half-open range
550 // [0, match.size()) for which match[i].matched is true,
551 // match[i].position() shall return distance(begin, match[i].first).
552 // [28.12.1.4.5]
553 if (_M_match[0].matched)
554 {
555 auto __start = _M_match[0].second;
556 auto __prefix_first = _M_match[0].second;
557 if (_M_match[0].first == _M_match[0].second)
558 {
559 if (__start == _M_end)
560 {
561 _M_match = value_type();
562 return *this;
563 }
564 else
565 {
566 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
567 _M_flags
568 | regex_constants::match_not_null
569 | regex_constants::match_continuous))
570 {
571 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
572 _M_match.at(_M_match.size()).first = __prefix_first;
573 _M_match._M_in_iterator = true;
574 _M_match._M_begin = _M_begin;
575 return *this;
576 }
577 else
578 ++__start;
579 }
580 }
581 _M_flags |= regex_constants::match_prev_avail;
582 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
583 {
584 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
585 _M_match.at(_M_match.size()).first = __prefix_first;
586 _M_match._M_in_iterator = true;
587 _M_match._M_begin = _M_begin;
588 }
589 else
590 _M_match = value_type();
591 }
592 return *this;
593 }
594
595 template<typename _Bi_iter,
596 typename _Ch_type,
597 typename _Rx_traits>
598 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
599 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
600 operator=(const regex_token_iterator& __rhs)
601 {
602 _M_position = __rhs._M_position;
603 _M_subs = __rhs._M_subs;
604 _M_n = __rhs._M_n;
605 _M_result = __rhs._M_result;
606 _M_suffix = __rhs._M_suffix;
607 _M_has_m1 = __rhs._M_has_m1;
608 if (__rhs._M_result == &__rhs._M_suffix)
609 _M_result = &_M_suffix;
610 return *this;
611 }
612
613 template<typename _Bi_iter,
614 typename _Ch_type,
615 typename _Rx_traits>
616 bool
617 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
618 operator==(const regex_token_iterator& __rhs) const
619 {
620 if (_M_end_of_seq() && __rhs._M_end_of_seq())
621 return true;
622 if (_M_suffix.matched && __rhs._M_suffix.matched
623 && _M_suffix == __rhs._M_suffix)
624 return true;
625 if (_M_end_of_seq() || _M_suffix.matched
626 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
627 return false;
628 return _M_position == __rhs._M_position
629 && _M_n == __rhs._M_n
630 && _M_subs == __rhs._M_subs;
631 }
632
633 template<typename _Bi_iter,
634 typename _Ch_type,
635 typename _Rx_traits>
636 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
637 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
638 operator++()
639 {
640 _Position __prev = _M_position;
641 if (_M_suffix.matched)
642 *this = regex_token_iterator();
643 else if (_M_n + 1 < _M_subs.size())
644 {
645 _M_n++;
646 _M_result = &_M_current_match();
647 }
648 else
649 {
650 _M_n = 0;
651 ++_M_position;
652 if (_M_position != _Position())
653 _M_result = &_M_current_match();
654 else if (_M_has_m1 && __prev->suffix().length() != 0)
655 {
656 _M_suffix.matched = true;
657 _M_suffix.first = __prev->suffix().first;
658 _M_suffix.second = __prev->suffix().second;
659 _M_result = &_M_suffix;
660 }
661 else
662 *this = regex_token_iterator();
663 }
664 return *this;
665 }
666
667 template<typename _Bi_iter,
668 typename _Ch_type,
669 typename _Rx_traits>
670 void
671 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
672 _M_init(_Bi_iter __a, _Bi_iter __b)
673 {
674 _M_has_m1 = false;
675 for (auto __it : _M_subs)
676 if (__it == -1)
677 {
678 _M_has_m1 = true;
679 break;
680 }
681 if (_M_position != _Position())
682 _M_result = &_M_current_match();
683 else if (_M_has_m1)
684 {
685 _M_suffix.matched = true;
686 _M_suffix.first = __a;
687 _M_suffix.second = __b;
688 _M_result = &_M_suffix;
689 }
690 else
691 _M_result = nullptr;
692 }
693
694 _GLIBCXX_END_NAMESPACE_VERSION
695 } // namespace
696