1 // class template regex -*- C++ -*-
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_automaton.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 // This macro defines the maximal state number a NFA can have.
32 #ifndef _GLIBCXX_REGEX_STATE_LIMIT
33 #define _GLIBCXX_REGEX_STATE_LIMIT 100000
36 namespace std
_GLIBCXX_VISIBILITY(default)
40 _GLIBCXX_BEGIN_NAMESPACE_VERSION
43 * @defgroup regex-detail Base and Implementation Classes
48 typedef long _StateIdT
;
49 static const _StateIdT _S_invalid_state_id
= -1;
51 template<typename _CharT
>
52 using _Matcher
= std::function
<bool (_CharT
)>;
54 /// Operation codes that define the type of transitions within the base NFA
55 /// that represents the regular expression.
59 _S_opcode_alternative
,
62 _S_opcode_line_begin_assertion
,
63 _S_opcode_line_end_assertion
,
64 _S_opcode_word_boundary
,
65 _S_opcode_subexpr_lookahead
,
66 _S_opcode_subexpr_begin
,
67 _S_opcode_subexpr_end
,
75 _Opcode _M_opcode
; // type of outgoing transition
76 _StateIdT _M_next
; // outgoing transition
77 union // Since they are mutually exclusive.
79 size_t _M_subexpr
; // for _S_opcode_subexpr_*
80 size_t _M_backref_index
; // for _S_opcode_backref
83 // for _S_opcode_alternative, _S_opcode_repeat and
84 // _S_opcode_subexpr_lookahead
86 // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
87 // quantifiers (ungreedy if set true)
92 explicit _State_base(_Opcode __opcode
)
93 : _M_opcode(__opcode
), _M_next(_S_invalid_state_id
)
97 ~_State_base() = default;
100 #ifdef _GLIBCXX_DEBUG
102 _M_print(std::ostream
& ostr
) const;
104 // Prints graphviz dot commands for state.
106 _M_dot(std::ostream
& __ostr
, _StateIdT __id
) const;
110 template<typename _TraitsT
>
111 struct _State
: _State_base
113 typedef _Matcher
<typename
_TraitsT::char_type
> _MatcherT
;
115 _MatcherT _M_matches
; // for _S_opcode_match
117 explicit _State(_Opcode __opcode
) : _State_base(__opcode
) { }
122 typedef size_t _SizeT
;
123 typedef regex_constants::syntax_option_type _FlagT
;
126 _NFA_base(_FlagT __f
)
127 : _M_flags(__f
), _M_start_state(0), _M_subexpr_count(0),
128 _M_has_backref(false)
131 _NFA_base(_NFA_base
&&) = default;
134 ~_NFA_base() = default;
143 { return _M_start_state
; }
147 { return _M_subexpr_count
; }
149 std::vector
<size_t> _M_paren_stack
;
151 _StateIdT _M_start_state
;
152 _SizeT _M_subexpr_count
;
156 template<typename _TraitsT
>
158 : _NFA_base
, std::vector
<_State
<_TraitsT
>>
160 typedef _State
<_TraitsT
> _StateT
;
161 typedef _Matcher
<typename
_TraitsT::char_type
> _MatcherT
;
163 _NFA(const typename
_TraitsT::locale_type
& __loc
, _FlagT __flags
)
165 { _M_traits
.imbue(__loc
); }
167 // for performance reasons _NFA objects should only be moved not copied
168 _NFA(const _NFA
&) = delete;
169 _NFA(_NFA
&&) = default;
174 auto __ret
= _M_insert_state(_StateT(_S_opcode_accept
));
179 _M_insert_alt(_StateIdT __next
, _StateIdT __alt
, bool __neg
)
181 _StateT
__tmp(_S_opcode_alternative
);
182 // It labels every quantifier to make greedy comparison easier in BFS
184 __tmp
._M_next
= __next
;
185 __tmp
._M_alt
= __alt
;
186 return _M_insert_state(std::move(__tmp
));
190 _M_insert_repeat(_StateIdT __next
, _StateIdT __alt
, bool __neg
)
192 _StateT
__tmp(_S_opcode_repeat
);
193 // It labels every quantifier to make greedy comparison easier in BFS
195 __tmp
._M_next
= __next
;
196 __tmp
._M_alt
= __alt
;
197 __tmp
._M_neg
= __neg
;
198 return _M_insert_state(std::move(__tmp
));
202 _M_insert_matcher(_MatcherT __m
)
204 _StateT
__tmp(_S_opcode_match
);
205 __tmp
._M_matches
= std::move(__m
);
206 return _M_insert_state(std::move(__tmp
));
210 _M_insert_subexpr_begin()
212 auto __id
= this->_M_subexpr_count
++;
213 this->_M_paren_stack
.push_back(__id
);
214 _StateT
__tmp(_S_opcode_subexpr_begin
);
215 __tmp
._M_subexpr
= __id
;
216 return _M_insert_state(std::move(__tmp
));
220 _M_insert_subexpr_end()
222 _StateT
__tmp(_S_opcode_subexpr_end
);
223 __tmp
._M_subexpr
= this->_M_paren_stack
.back();
224 this->_M_paren_stack
.pop_back();
225 return _M_insert_state(std::move(__tmp
));
229 _M_insert_backref(size_t __index
);
232 _M_insert_line_begin()
233 { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion
)); }
237 { return _M_insert_state(_StateT(_S_opcode_line_end_assertion
)); }
240 _M_insert_word_bound(bool __neg
)
242 _StateT
__tmp(_S_opcode_word_boundary
);
243 __tmp
._M_neg
= __neg
;
244 return _M_insert_state(std::move(__tmp
));
248 _M_insert_lookahead(_StateIdT __alt
, bool __neg
)
250 _StateT
__tmp(_S_opcode_subexpr_lookahead
);
251 __tmp
._M_alt
= __alt
;
252 __tmp
._M_neg
= __neg
;
253 return _M_insert_state(std::move(__tmp
));
258 { return _M_insert_state(_StateT(_S_opcode_dummy
)); }
261 _M_insert_state(_StateT __s
)
263 this->push_back(std::move(__s
));
264 if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT
)
265 __throw_regex_error(regex_constants::error_space
);
266 return this->size()-1;
269 // Eliminate dummy node in this NFA to make it compact.
271 _M_eliminate_dummy();
273 #ifdef _GLIBCXX_DEBUG
275 _M_dot(std::ostream
& __ostr
) const;
281 /// Describes a sequence of one or more %_State, its current start
282 /// and end(s). This structure contains fragments of an NFA during
284 template<typename _TraitsT
>
288 typedef _NFA
<_TraitsT
> _RegexT
;
291 _StateSeq(_RegexT
& __nfa
, _StateIdT __s
)
292 : _M_nfa(__nfa
), _M_start(__s
), _M_end(__s
)
295 _StateSeq(_RegexT
& __nfa
, _StateIdT __s
, _StateIdT __end
)
296 : _M_nfa(__nfa
), _M_start(__s
), _M_end(__end
)
299 // Append a state on *this and change *this to the new sequence.
301 _M_append(_StateIdT __id
)
303 _M_nfa
[_M_end
]._M_next
= __id
;
307 // Append a sequence on *this and change *this to the new sequence.
309 _M_append(const _StateSeq
& __s
)
311 _M_nfa
[_M_end
]._M_next
= __s
._M_start
;
315 // Clones an entire sequence.
326 _GLIBCXX_END_NAMESPACE_VERSION
327 } // namespace __detail
330 #include <bits/regex_automaton.tcc>