Update copyright years in libstdc++-v3/
[gcc.git] / libstdc++-v3 / include / bits / regex_automaton.h
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex_automaton.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37 /**
38 * @defgroup regex-detail Base and Implementation Classes
39 * @ingroup regex
40 * @{
41 */
42
43 typedef long _StateIdT;
44 typedef std::set<_StateIdT> _StateSet;
45 static const _StateIdT _S_invalid_state_id = -1;
46
47 template<typename _CharT>
48 using _Matcher = std::function<bool (_CharT)>;
49
50 /// Operation codes that define the type of transitions within the base NFA
51 /// that represents the regular expression.
52 enum _Opcode : int
53 {
54 _S_opcode_unknown,
55 _S_opcode_alternative,
56 _S_opcode_backref,
57 _S_opcode_line_begin_assertion,
58 _S_opcode_line_end_assertion,
59 _S_opcode_word_boundary,
60 _S_opcode_subexpr_lookahead,
61 _S_opcode_subexpr_begin,
62 _S_opcode_subexpr_end,
63 _S_opcode_dummy,
64 _S_opcode_match,
65 _S_opcode_accept,
66 };
67
68 struct _State_base
69 {
70 _Opcode _M_opcode; // type of outgoing transition
71 _StateIdT _M_next; // outgoing transition
72 union // Since they are mutually exclusive.
73 {
74 size_t _M_subexpr; // for _S_opcode_subexpr_*
75 size_t _M_backref_index; // for _S_opcode_backref
76 struct
77 {
78 // for _S_opcode_alternative.
79 _StateIdT _M_quant_index;
80 // for _S_opcode_alternative or _S_opcode_subexpr_lookahead
81 _StateIdT _M_alt;
82 // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
83 // quantifiers (ungreedy if set true)
84 bool _M_neg;
85 };
86 };
87
88 explicit _State_base(_Opcode __opcode)
89 : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
90 { }
91
92 protected:
93 ~_State_base() = default;
94
95 public:
96 #ifdef _GLIBCXX_DEBUG
97 std::ostream&
98 _M_print(std::ostream& ostr) const;
99
100 // Prints graphviz dot commands for state.
101 std::ostream&
102 _M_dot(std::ostream& __ostr, _StateIdT __id) const;
103 #endif
104 };
105
106 template<typename _TraitsT>
107 struct _State : _State_base
108 {
109 typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
110
111 _MatcherT _M_matches; // for _S_opcode_match
112
113 explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
114 };
115
116 struct _NFA_base
117 {
118 typedef size_t _SizeT;
119 typedef regex_constants::syntax_option_type _FlagT;
120
121 explicit
122 _NFA_base(_FlagT __f)
123 : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
124 _M_quant_count(0), _M_has_backref(false)
125 { }
126
127 _NFA_base(_NFA_base&&) = default;
128
129 protected:
130 ~_NFA_base() = default;
131
132 public:
133 _FlagT
134 _M_options() const
135 { return _M_flags; }
136
137 _StateIdT
138 _M_start() const
139 { return _M_start_state; }
140
141 const _StateSet&
142 _M_final_states() const
143 { return _M_accepting_states; }
144
145 _SizeT
146 _M_sub_count() const
147 { return _M_subexpr_count; }
148
149 std::vector<size_t> _M_paren_stack;
150 _StateSet _M_accepting_states;
151 _FlagT _M_flags;
152 _StateIdT _M_start_state;
153 _SizeT _M_subexpr_count;
154 _SizeT _M_quant_count;
155 bool _M_has_backref;
156 };
157
158 template<typename _TraitsT>
159 struct _NFA
160 : _NFA_base, std::vector<_State<_TraitsT>>
161 {
162 typedef _State<_TraitsT> _StateT;
163 typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
164
165 using _NFA_base::_NFA_base;
166
167 // for performance reasons _NFA objects should only be moved not copied
168 _NFA(const _NFA&) = delete;
169 _NFA(_NFA&&) = default;
170
171 _StateIdT
172 _M_insert_accept()
173 {
174 auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
175 this->_M_accepting_states.insert(__ret);
176 return __ret;
177 }
178
179 _StateIdT
180 _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
181 {
182 _StateT __tmp(_S_opcode_alternative);
183 // It labels every quantifier to make greedy comparison easier in BFS
184 // approach.
185 __tmp._M_quant_index = this->_M_quant_count++;
186 __tmp._M_next = __next;
187 __tmp._M_alt = __alt;
188 __tmp._M_neg = __neg;
189 return _M_insert_state(std::move(__tmp));
190 }
191
192 _StateIdT
193 _M_insert_matcher(_MatcherT __m)
194 {
195 _StateT __tmp(_S_opcode_match);
196 __tmp._M_matches = std::move(__m);
197 return _M_insert_state(std::move(__tmp));
198 }
199
200 _StateIdT
201 _M_insert_subexpr_begin()
202 {
203 auto __id = this->_M_subexpr_count++;
204 this->_M_paren_stack.push_back(__id);
205 _StateT __tmp(_S_opcode_subexpr_begin);
206 __tmp._M_subexpr = __id;
207 return _M_insert_state(std::move(__tmp));
208 }
209
210 _StateIdT
211 _M_insert_subexpr_end()
212 {
213 _StateT __tmp(_S_opcode_subexpr_end);
214 __tmp._M_subexpr = this->_M_paren_stack.back();
215 this->_M_paren_stack.pop_back();
216 return _M_insert_state(std::move(__tmp));
217 }
218
219 _StateIdT
220 _M_insert_backref(size_t __index);
221
222 _StateIdT
223 _M_insert_line_begin()
224 { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
225
226 _StateIdT
227 _M_insert_line_end()
228 { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
229
230 _StateIdT
231 _M_insert_word_bound(bool __neg)
232 {
233 _StateT __tmp(_S_opcode_word_boundary);
234 __tmp._M_neg = __neg;
235 return _M_insert_state(std::move(__tmp));
236 }
237
238 _StateIdT
239 _M_insert_lookahead(_StateIdT __alt, bool __neg)
240 {
241 _StateT __tmp(_S_opcode_subexpr_lookahead);
242 __tmp._M_alt = __alt;
243 __tmp._M_neg = __neg;
244 return _M_insert_state(std::move(__tmp));
245 }
246
247 _StateIdT
248 _M_insert_dummy()
249 { return _M_insert_state(_StateT(_S_opcode_dummy)); }
250
251 _StateIdT
252 _M_insert_state(_StateT __s)
253 {
254 this->push_back(std::move(__s));
255 return this->size()-1;
256 }
257
258 // Eliminate dummy node in this NFA to make it compact.
259 void
260 _M_eliminate_dummy();
261
262 #ifdef _GLIBCXX_DEBUG
263 std::ostream&
264 _M_dot(std::ostream& __ostr) const;
265 #endif
266 };
267
268 /// Describes a sequence of one or more %_State, its current start
269 /// and end(s). This structure contains fragments of an NFA during
270 /// construction.
271 template<typename _TraitsT>
272 class _StateSeq
273 {
274 public:
275 typedef _NFA<_TraitsT> _RegexT;
276
277 public:
278 _StateSeq(_RegexT& __nfa, _StateIdT __s)
279 : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
280 { }
281
282 _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
283 : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
284 { }
285
286 // Append a state on *this and change *this to the new sequence.
287 void
288 _M_append(_StateIdT __id)
289 {
290 _M_nfa[_M_end]._M_next = __id;
291 _M_end = __id;
292 }
293
294 // Append a sequence on *this and change *this to the new sequence.
295 void
296 _M_append(const _StateSeq& __s)
297 {
298 _M_nfa[_M_end]._M_next = __s._M_start;
299 _M_end = __s._M_end;
300 }
301
302 // Clones an entire sequence.
303 _StateSeq
304 _M_clone();
305
306 public:
307 _RegexT& _M_nfa;
308 _StateIdT _M_start;
309 _StateIdT _M_end;
310 };
311
312 //@} regex-detail
313 _GLIBCXX_END_NAMESPACE_VERSION
314 } // namespace __detail
315 } // namespace std
316
317 #include <bits/regex_automaton.tcc>