1 // class template regex -*- C++ -*-
3 // Copyright (C) 2010-2014 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_compiler.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 namespace std
_GLIBCXX_VISIBILITY(default)
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
38 * @addtogroup regex-detail
42 template<typename
, bool, bool>
43 struct _BracketMatcher
;
46 * @brief Builds an NFA from an input iterator range.
48 * The %_TraitsT type should fulfill requirements [28.3].
50 template<typename _TraitsT
>
54 typedef typename
_TraitsT::char_type _CharT
;
55 typedef const _CharT
* _IterT
;
56 typedef _NFA
<_TraitsT
> _RegexT
;
57 typedef regex_constants::syntax_option_type _FlagT
;
59 _Compiler(_IterT __b
, _IterT __e
,
60 const typename
_TraitsT::locale_type
& __traits
, _FlagT __flags
);
62 std::shared_ptr
<_RegexT
>
64 { return std::move(_M_nfa
); }
67 typedef _Scanner
<_CharT
> _ScannerT
;
68 typedef typename
_TraitsT::string_type _StringT
;
69 typedef typename
_ScannerT::_TokenT _TokenT
;
70 typedef _StateSeq
<_TraitsT
> _StateSeqT
;
71 typedef std::stack
<_StateSeqT
> _StackT
;
72 typedef std::ctype
<_CharT
> _CtypeT
;
74 // accepts a specific token or returns false.
76 _M_match_token(_TokenT __token
);
97 _M_bracket_expression();
99 template<bool __icase
, bool __collate
>
101 _M_insert_any_matcher_ecma();
103 template<bool __icase
, bool __collate
>
105 _M_insert_any_matcher_posix();
107 template<bool __icase
, bool __collate
>
109 _M_insert_char_matcher();
111 template<bool __icase
, bool __collate
>
113 _M_insert_character_class_matcher();
115 template<bool __icase
, bool __collate
>
117 _M_insert_bracket_matcher(bool __neg
);
119 template<bool __icase
, bool __collate
>
121 _M_expression_term(pair
<bool, _CharT
>& __last_char
,
122 _BracketMatcher
<_TraitsT
, __icase
, __collate
>&
126 _M_cur_int_value(int __radix
);
134 auto ret
= _M_stack
.top();
140 _ScannerT _M_scanner
;
141 shared_ptr
<_RegexT
> _M_nfa
;
144 const _TraitsT
& _M_traits
;
145 const _CtypeT
& _M_ctype
;
148 template<typename _TraitsT
>
149 inline std::shared_ptr
<_NFA
<_TraitsT
>>
150 __compile_nfa(const typename
_TraitsT::char_type
* __first
,
151 const typename
_TraitsT::char_type
* __last
,
152 const typename
_TraitsT::locale_type
& __loc
,
153 regex_constants::syntax_option_type __flags
)
155 using _Cmplr
= _Compiler
<_TraitsT
>;
156 return _Cmplr(__first
, __last
, __loc
, __flags
)._M_get_nfa();
160 template<typename _TraitsT
, bool __icase
, bool __collate
>
161 class _RegexTranslator
164 typedef typename
_TraitsT::char_type _CharT
;
165 typedef typename
_TraitsT::string_type _StringT
;
166 typedef typename
std::conditional
<__collate
,
168 _CharT
>::type _StrTransT
;
171 _RegexTranslator(const _TraitsT
& __traits
)
172 : _M_traits(__traits
)
176 _M_translate(_CharT __ch
) const
179 return _M_traits
.translate_nocase(__ch
);
181 return _M_traits
.translate(__ch
);
187 _M_transform(_CharT __ch
) const
189 return _M_transform_impl(__ch
, typename integral_constant
<bool,
195 _M_transform_impl(_CharT __ch
, false_type
) const
199 _M_transform_impl(_CharT __ch
, true_type
) const
201 _StrTransT __str
= _StrTransT(1, _M_translate(__ch
));
202 return _M_traits
.transform(__str
.begin(), __str
.end());
205 const _TraitsT
& _M_traits
;
208 template<typename _TraitsT
>
209 class _RegexTranslator
<_TraitsT
, false, false>
212 typedef typename
_TraitsT::char_type _CharT
;
213 typedef _CharT _StrTransT
;
216 _RegexTranslator(const _TraitsT
&)
220 _M_translate(_CharT __ch
) const
224 _M_transform(_CharT __ch
) const
228 template<typename _TraitsT
, bool __is_ecma
, bool __icase
, bool __collate
>
231 template<typename _TraitsT
, bool __icase
, bool __collate
>
232 struct _AnyMatcher
<_TraitsT
, false, __icase
, __collate
>
234 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
235 typedef typename
_TransT::_CharT _CharT
;
238 _AnyMatcher(const _TraitsT
& __traits
)
239 : _M_translator(__traits
)
243 operator()(_CharT __ch
) const
245 static auto __nul
= _M_translator
._M_translate('\0');
246 return _M_translator
._M_translate(__ch
) != __nul
;
249 _TransT _M_translator
;
252 template<typename _TraitsT
, bool __icase
, bool __collate
>
253 struct _AnyMatcher
<_TraitsT
, true, __icase
, __collate
>
255 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
256 typedef typename
_TransT::_CharT _CharT
;
259 _AnyMatcher(const _TraitsT
& __traits
)
260 : _M_translator(__traits
)
264 operator()(_CharT __ch
) const
265 { return _M_apply(__ch
, typename is_same
<_CharT
, char>::type()); }
268 _M_apply(_CharT __ch
, true_type
) const
270 auto __c
= _M_translator
._M_translate(__ch
);
271 auto __n
= _M_translator
._M_translate('\n');
272 auto __r
= _M_translator
._M_translate('\r');
273 return __c
!= __n
&& __c
!= __r
;
277 _M_apply(_CharT __ch
, false_type
) const
279 auto __c
= _M_translator
._M_translate(__ch
);
280 auto __n
= _M_translator
._M_translate('\n');
281 auto __r
= _M_translator
._M_translate('\r');
282 auto __u2028
= _M_translator
._M_translate(u
'\u2028');
283 auto __u2029
= _M_translator
._M_translate(u
'\u2029');
284 return __c
!= __n
&& __c
!= __r
&& __c
!= __u2028
&& __c
!= __u2029
;
287 _TransT _M_translator
;
290 template<typename _TraitsT
, bool __icase
, bool __collate
>
293 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
294 typedef typename
_TransT::_CharT _CharT
;
296 _CharMatcher(_CharT __ch
, const _TraitsT
& __traits
)
297 : _M_translator(__traits
), _M_ch(_M_translator
._M_translate(__ch
))
301 operator()(_CharT __ch
) const
302 { return _M_ch
== _M_translator
._M_translate(__ch
); }
304 _TransT _M_translator
;
308 /// Matches a character range (bracket expression)
309 template<typename _TraitsT
, bool __icase
, bool __collate
>
310 struct _BracketMatcher
313 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
314 typedef typename
_TransT::_CharT _CharT
;
315 typedef typename
_TransT::_StrTransT _StrTransT
;
316 typedef typename
_TraitsT::string_type _StringT
;
317 typedef typename
_TraitsT::char_class_type _CharClassT
;
320 _BracketMatcher(bool __is_non_matching
,
321 const _TraitsT
& __traits
)
322 : _M_class_set(0), _M_translator(__traits
), _M_traits(__traits
),
323 _M_is_non_matching(__is_non_matching
)
324 #ifdef _GLIBCXX_DEBUG
330 operator()(_CharT __ch
) const
332 _GLIBCXX_DEBUG_ASSERT(_M_is_ready
);
333 return _M_apply(__ch
, _UseCache());
337 _M_add_char(_CharT __c
)
339 _M_char_set
.push_back(_M_translator
._M_translate(__c
));
340 #ifdef _GLIBCXX_DEBUG
346 _M_add_collating_element(const _StringT
& __s
)
348 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
349 __s
.data() + __s
.size());
351 __throw_regex_error(regex_constants::error_collate
);
352 _M_char_set
.push_back(_M_translator
._M_translate(__st
[0]));
353 #ifdef _GLIBCXX_DEBUG
359 _M_add_equivalence_class(const _StringT
& __s
)
361 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
362 __s
.data() + __s
.size());
364 __throw_regex_error(regex_constants::error_collate
);
365 __st
= _M_traits
.transform_primary(__st
.data(),
366 __st
.data() + __st
.size());
367 _M_equiv_set
.push_back(__st
);
368 #ifdef _GLIBCXX_DEBUG
373 // __neg should be true for \D, \S and \W only.
375 _M_add_character_class(const _StringT
& __s
, bool __neg
)
377 auto __mask
= _M_traits
.lookup_classname(__s
.data(),
378 __s
.data() + __s
.size(),
381 __throw_regex_error(regex_constants::error_ctype
);
383 _M_class_set
|= __mask
;
385 _M_neg_class_set
.push_back(__mask
);
386 #ifdef _GLIBCXX_DEBUG
392 _M_make_range(_CharT __l
, _CharT __r
)
395 __throw_regex_error(regex_constants::error_range
);
396 _M_range_set
.push_back(make_pair(_M_translator
._M_transform(__l
),
397 _M_translator
._M_transform(__r
)));
398 #ifdef _GLIBCXX_DEBUG
406 std::sort(_M_char_set
.begin(), _M_char_set
.end());
407 auto __end
= std::unique(_M_char_set
.begin(), _M_char_set
.end());
408 _M_char_set
.erase(__end
, _M_char_set
.end());
409 _M_make_cache(_UseCache());
410 #ifdef _GLIBCXX_DEBUG
416 // Currently we only use the cache for char
417 typedef typename
std::is_same
<_CharT
, char>::type _UseCache
;
419 static constexpr size_t
422 return 1ul << (sizeof(_CharT
) * __CHAR_BIT__
* int(_UseCache::value
));
426 typedef typename
std::conditional
<_UseCache::value
,
427 std::bitset
<_S_cache_size()>,
428 _Dummy
>::type _CacheT
;
429 typedef typename
std::make_unsigned
<_CharT
>::type _UnsignedCharT
;
432 _M_apply(_CharT __ch
, false_type
) const;
435 _M_apply(_CharT __ch
, true_type
) const
436 { return _M_cache
[static_cast<_UnsignedCharT
>(__ch
)]; }
439 _M_make_cache(true_type
)
441 for (unsigned __i
= 0; __i
< _M_cache
.size(); __i
++)
442 _M_cache
[__i
] = _M_apply(static_cast<_CharT
>(__i
), false_type());
446 _M_make_cache(false_type
)
450 std::vector
<_CharT
> _M_char_set
;
451 std::vector
<_StringT
> _M_equiv_set
;
452 std::vector
<pair
<_StrTransT
, _StrTransT
>> _M_range_set
;
453 std::vector
<_CharClassT
> _M_neg_class_set
;
454 _CharClassT _M_class_set
;
455 _TransT _M_translator
;
456 const _TraitsT
& _M_traits
;
457 bool _M_is_non_matching
;
459 #ifdef _GLIBCXX_DEBUG
465 _GLIBCXX_END_NAMESPACE_VERSION
466 } // namespace __detail
469 #include <bits/regex_compiler.tcc>