Makefile.am: Adjust to new files.
[gcc.git] / libstdc++-v3 / include / bits / regex_compiler.h
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex_compiler.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37 /**
38 * @addtogroup regex-detail
39 * @{
40 */
41
42 /// Matches a character range (bracket expression)
43 template<typename _CharT, typename _TraitsT>
44 struct _BracketMatcher
45 {
46 typedef typename _TraitsT::char_class_type _CharClassT;
47 typedef typename _TraitsT::string_type _StringT;
48 typedef regex_constants::syntax_option_type _FlagT;
49
50 explicit
51 _BracketMatcher(bool __is_non_matching,
52 const _TraitsT& __t,
53 _FlagT __flags)
54 : _M_is_non_matching(__is_non_matching), _M_traits(__t),
55 _M_flags(__flags), _M_class_set(0)
56 { }
57
58 bool
59 operator()(_CharT) const;
60
61 void
62 _M_add_char(_CharT __c)
63 {
64 if (_M_flags & regex_constants::collate)
65 if (_M_is_icase())
66 _M_char_set.push_back(_M_traits.translate_nocase(__c));
67 else
68 _M_char_set.push_back(_M_traits.translate(__c));
69 else
70 _M_char_set.push_back(__c);
71 }
72
73 void
74 _M_add_collating_element(const _StringT& __s)
75 {
76 auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
77 if (__st.empty())
78 __throw_regex_error(regex_constants::error_collate);
79 // TODO: digraph
80 _M_char_set.push_back(__st[0]);
81 }
82
83 void
84 _M_add_equivalence_class(const _StringT& __s)
85 {
86 _M_add_character_class(
87 _M_traits.transform_primary(&*__s.begin(), &*__s.end()));
88 }
89
90 void
91 _M_add_character_class(const _StringT& __s)
92 {
93 auto __st = _M_traits.
94 lookup_classname(&*__s.begin(), &*__s.end(), _M_is_icase());
95 if (__st == 0)
96 __throw_regex_error(regex_constants::error_ctype);
97 _M_class_set |= __st;
98 }
99
100 void
101 _M_make_range(_CharT __l, _CharT __r)
102 { _M_range_set.push_back(make_pair(_M_get_str(__l), _M_get_str(__r))); }
103
104 bool
105 _M_is_icase() const
106 { return _M_flags & regex_constants::icase; }
107
108 _StringT
109 _M_get_str(_CharT __c) const
110 {
111 auto __s = _StringT(1,
112 _M_is_icase()
113 ? _M_traits.translate_nocase(__c)
114 : _M_traits.translate(__c));
115 return _M_traits.transform(__s.begin(), __s.end());
116 }
117
118 _TraitsT _M_traits;
119 _FlagT _M_flags;
120 bool _M_is_non_matching;
121 std::vector<_CharT> _M_char_set;
122 std::vector<pair<_StringT, _StringT>> _M_range_set;
123 _CharClassT _M_class_set;
124 };
125
126 /**
127 * @brief struct _Scanner. Scans an input range for regex tokens.
128 *
129 * The %_Scanner class interprets the regular expression pattern in
130 * the input range passed to its constructor as a sequence of parse
131 * tokens passed to the regular expression compiler. The sequence
132 * of tokens provided depends on the flag settings passed to the
133 * constructor: different regular expression grammars will interpret
134 * the same input pattern in syntactically different ways.
135 */
136 template<typename _InputIter>
137 class _Scanner
138 {
139 public:
140 typedef unsigned int _StateT;
141 typedef typename std::iterator_traits<_InputIter>::value_type _CharT;
142 typedef std::basic_string<_CharT> _StringT;
143 typedef regex_constants::syntax_option_type _FlagT;
144 typedef const std::ctype<_CharT> _CtypeT;
145
146 /// Token types returned from the scanner.
147 enum _TokenT
148 {
149 _S_token_anychar,
150 _S_token_backref,
151 _S_token_bracket_begin,
152 _S_token_bracket_inverse_begin,
153 _S_token_bracket_end,
154 _S_token_char_class_name,
155 _S_token_closure0,
156 _S_token_closure1,
157 _S_token_collelem_multi,
158 _S_token_collelem_single,
159 _S_token_collsymbol,
160 _S_token_comma,
161 _S_token_dash,
162 _S_token_dup_count,
163 _S_token_eof,
164 _S_token_equiv_class_name,
165 _S_token_interval_begin,
166 _S_token_interval_end,
167 _S_token_line_begin,
168 _S_token_line_end,
169 _S_token_opt,
170 _S_token_or,
171 _S_token_ord_char,
172 _S_token_subexpr_begin,
173 _S_token_subexpr_end,
174 _S_token_word_begin,
175 _S_token_word_end,
176 _S_token_unknown
177 };
178
179 _Scanner(_InputIter __begin, _InputIter __end,
180 _FlagT __flags, std::locale __loc)
181 : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
182 _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
183 { _M_advance(); }
184
185 void
186 _M_advance();
187
188 _TokenT
189 _M_token() const
190 { return _M_curToken; }
191
192 const _StringT&
193 _M_value() const
194 { return _M_curValue; }
195
196 #ifdef _GLIBCXX_DEBUG
197 std::ostream&
198 _M_print(std::ostream&);
199 #endif
200
201 private:
202 void
203 _M_eat_escape();
204
205 void
206 _M_scan_in_brace();
207
208 void
209 _M_scan_in_bracket();
210
211 void
212 _M_eat_charclass();
213
214 void
215 _M_eat_equivclass();
216
217 void
218 _M_eat_collsymbol();
219
220 static constexpr _StateT _S_state_in_brace = 1 << 0;
221 static constexpr _StateT _S_state_in_bracket = 1 << 1;
222 _InputIter _M_current;
223 _InputIter _M_end;
224 _FlagT _M_flags;
225 _CtypeT& _M_ctype;
226 _TokenT _M_curToken;
227 _StringT _M_curValue;
228 _StateT _M_state;
229 };
230
231 /// Builds an NFA from an input iterator interval.
232 template<typename _InputIter, typename _CharT, typename _TraitsT>
233 class _Compiler
234 {
235 public:
236 typedef typename _TraitsT::string_type _StringT;
237 typedef _NFA<_CharT, _TraitsT> _RegexT;
238 typedef regex_constants::syntax_option_type _FlagT;
239
240 _Compiler(_InputIter __b, _InputIter __e,
241 const _TraitsT& __traits, _FlagT __flags);
242
243 std::shared_ptr<_RegexT>
244 _M_get_nfa() const
245 { return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); }
246
247 private:
248 typedef _Scanner<_InputIter> _ScannerT;
249 typedef typename _ScannerT::_TokenT _TokenT;
250 typedef _StateSeq<_CharT, _TraitsT> _StateSeqT;
251 typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
252 typedef _BracketMatcher<_CharT, _TraitsT> _BMatcherT;
253
254 // accepts a specific token or returns false.
255 bool
256 _M_match_token(_TokenT __token);
257
258 void
259 _M_disjunction();
260
261 void
262 _M_alternative();
263
264 bool
265 _M_term();
266
267 bool
268 _M_assertion();
269
270 void
271 _M_quantifier();
272
273 bool
274 _M_atom();
275
276 bool
277 _M_bracket_expression();
278
279 bool
280 _M_bracket_list(_BMatcherT& __matcher);
281
282 bool
283 _M_follow_list(_BMatcherT& __matcher);
284
285 void
286 _M_expression_term(_BMatcherT& __matcher);
287
288 bool
289 _M_range_expression(_BMatcherT& __matcher);
290
291 bool
292 _M_start_range(_BMatcherT& __matcher);
293
294 bool
295 _M_collating_symbol(_BMatcherT& __matcher);
296
297 bool
298 _M_equivalence_class(_BMatcherT& __matcher);
299
300 bool
301 _M_character_class(_BMatcherT& __matcher);
302
303 int
304 _M_cur_int_value(int __radix);
305
306 const _TraitsT& _M_traits;
307 _ScannerT _M_scanner;
308 _StringT _M_cur_value;
309 _RegexT _M_state_store;
310 _StackT _M_stack;
311 _FlagT _M_flags;
312 };
313
314 //@} regex-detail
315 _GLIBCXX_END_NAMESPACE_VERSION
316 } // namespace __detail
317 } // namespace std
318
319 #include <bits/regex_compiler.tcc>