unordered_map.h (unordered_map, [...]): Add missing constructors.
[gcc.git] / libstdc++-v3 / include / bits / regex_scanner.h
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex_scanner.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37 /**
38 * @addtogroup regex-detail
39 * @{
40 */
41
42 struct _ScannerBase
43 {
44 public:
45 /// Token types returned from the scanner.
46 enum _TokenT
47 {
48 _S_token_anychar,
49 _S_token_ord_char,
50 _S_token_oct_num,
51 _S_token_hex_num,
52 _S_token_backref,
53 _S_token_subexpr_begin,
54 _S_token_subexpr_no_group_begin,
55 _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
56 _S_token_subexpr_end,
57 _S_token_bracket_begin,
58 _S_token_bracket_neg_begin,
59 _S_token_bracket_end,
60 _S_token_interval_begin,
61 _S_token_interval_end,
62 _S_token_quoted_class,
63 _S_token_char_class_name,
64 _S_token_collsymbol,
65 _S_token_equiv_class_name,
66 _S_token_opt,
67 _S_token_or,
68 _S_token_closure0,
69 _S_token_closure1,
70 _S_token_line_begin,
71 _S_token_line_end,
72 _S_token_word_bound, // neg if _M_value[0] == 'n'
73 _S_token_comma,
74 _S_token_dup_count,
75 _S_token_eof,
76 _S_token_unknown
77 };
78
79 protected:
80 typedef regex_constants::syntax_option_type _FlagT;
81
82 enum _StateT
83 {
84 _S_state_normal,
85 _S_state_in_brace,
86 _S_state_in_bracket,
87 };
88
89 protected:
90 _ScannerBase(_FlagT __flags)
91 : _M_state(_S_state_normal),
92 _M_flags(__flags),
93 _M_escape_tbl(_M_is_ecma()
94 ? _M_ecma_escape_tbl
95 : _M_awk_escape_tbl),
96 _M_spec_char(_M_is_ecma()
97 ? _M_ecma_spec_char
98 : _M_is_basic()
99 ? _M_basic_spec_char
100 : _M_extended_spec_char),
101 _M_at_bracket_start(false)
102 { }
103
104 protected:
105 const char*
106 _M_find_escape(char __c)
107 {
108 auto __it = _M_escape_tbl;
109 for (; __it->first != '\0'; ++__it)
110 if (__it->first == __c)
111 return &__it->second;
112 return nullptr;
113 }
114
115 bool
116 _M_is_ecma() const
117 { return _M_flags & regex_constants::ECMAScript; }
118
119 bool
120 _M_is_basic() const
121 { return _M_flags & (regex_constants::basic | regex_constants::grep); }
122
123 bool
124 _M_is_extended() const
125 {
126 return _M_flags & (regex_constants::extended
127 | regex_constants::egrep
128 | regex_constants::awk);
129 }
130
131 bool
132 _M_is_grep() const
133 { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
134
135 bool
136 _M_is_awk() const
137 { return _M_flags & regex_constants::awk; }
138
139 protected:
140 const std::pair<char, _TokenT> _M_token_tbl[9] =
141 {
142 {'^', _S_token_line_begin},
143 {'$', _S_token_line_end},
144 {'.', _S_token_anychar},
145 {'*', _S_token_closure0},
146 {'+', _S_token_closure1},
147 {'?', _S_token_opt},
148 {'|', _S_token_or},
149 {'\n', _S_token_or}, // grep and egrep
150 {'\0', _S_token_or},
151 };
152 const std::pair<char, char> _M_ecma_escape_tbl[8] =
153 {
154 {'0', '\0'},
155 {'b', '\b'},
156 {'f', '\f'},
157 {'n', '\n'},
158 {'r', '\r'},
159 {'t', '\t'},
160 {'v', '\v'},
161 {'\0', '\0'},
162 };
163 const std::pair<char, char> _M_awk_escape_tbl[11] =
164 {
165 {'"', '"'},
166 {'/', '/'},
167 {'\\', '\\'},
168 {'a', '\a'},
169 {'b', '\b'},
170 {'f', '\f'},
171 {'n', '\n'},
172 {'r', '\r'},
173 {'t', '\t'},
174 {'v', '\v'},
175 {'\0', '\0'},
176 };
177 const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
178 const char* _M_basic_spec_char = ".[\\*^$";
179 const char* _M_extended_spec_char = ".[\\()*+?{|^$";
180
181 _StateT _M_state;
182 _FlagT _M_flags;
183 _TokenT _M_token;
184 const std::pair<char, char>* _M_escape_tbl;
185 const char* _M_spec_char;
186 bool _M_at_bracket_start;
187 };
188
189 /**
190 * @brief Scans an input range for regex tokens.
191 *
192 * The %_Scanner class interprets the regular expression pattern in
193 * the input range passed to its constructor as a sequence of parse
194 * tokens passed to the regular expression compiler. The sequence
195 * of tokens provided depends on the flag settings passed to the
196 * constructor: different regular expression grammars will interpret
197 * the same input pattern in syntactically different ways.
198 */
199 template<typename _CharT>
200 class _Scanner
201 : public _ScannerBase
202 {
203 public:
204 typedef const _CharT* _IterT;
205 typedef std::basic_string<_CharT> _StringT;
206 typedef regex_constants::syntax_option_type _FlagT;
207 typedef const std::ctype<_CharT> _CtypeT;
208
209 _Scanner(_IterT __begin, _IterT __end,
210 _FlagT __flags, std::locale __loc);
211
212 void
213 _M_advance();
214
215 _TokenT
216 _M_get_token() const
217 { return _M_token; }
218
219 const _StringT&
220 _M_get_value() const
221 { return _M_value; }
222
223 #ifdef _GLIBCXX_DEBUG
224 std::ostream&
225 _M_print(std::ostream&);
226 #endif
227
228 private:
229 void
230 _M_scan_normal();
231
232 void
233 _M_scan_in_bracket();
234
235 void
236 _M_scan_in_brace();
237
238 void
239 _M_eat_escape_ecma();
240
241 void
242 _M_eat_escape_posix();
243
244 void
245 _M_eat_escape_awk();
246
247 void
248 _M_eat_class(char);
249
250 _IterT _M_current;
251 _IterT _M_end;
252 _CtypeT& _M_ctype;
253 _StringT _M_value;
254 void (_Scanner::* _M_eat_escape)();
255 };
256
257 //@} regex-detail
258 _GLIBCXX_END_NAMESPACE_VERSION
259 } // namespace __detail
260 } // namespace std
261
262 #include <bits/regex_scanner.tcc>