pair<bool, _CharT> __last_char; // Optional<_CharT>
__last_char.first = false;
if (!(_M_flags & regex_constants::ECMAScript))
- if (_M_try_char())
- {
- __matcher._M_add_char(_M_value[0]);
- __last_char.first = true;
- __last_char.second = _M_value[0];
- }
+ {
+ if (_M_try_char())
+ {
+ __last_char.first = true;
+ __last_char.second = _M_value[0];
+ }
+ else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
+ {
+ __last_char.first = true;
+ __last_char.second = '-';
+ }
+ }
while (_M_expression_term(__last_char, __matcher));
+ if (__last_char.first)
+ __matcher._M_add_char(__last_char.second);
__matcher._M_ready();
_M_stack.push(_StateSeqT(
*_M_nfa,
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return false;
+ const auto __push_char = [&](_CharT __ch)
+ {
+ if (__last_char.first)
+ __matcher._M_add_char(__last_char.second);
+ else
+ __last_char.first = true;
+ __last_char.second = __ch;
+ };
+ const auto __flush = [&]
+ {
+ if (__last_char.first)
+ {
+ __matcher._M_add_char(__last_char.second);
+ __last_char.first = false;
+ }
+ };
+
if (_M_match_token(_ScannerT::_S_token_collsymbol))
{
auto __symbol = __matcher._M_add_collate_element(_M_value);
if (__symbol.size() == 1)
- {
- __last_char.first = true;
- __last_char.second = __symbol[0];
- }
+ __push_char(__symbol[0]);
+ else
+ __flush();
}
else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
- __matcher._M_add_equivalence_class(_M_value);
+ {
+ __flush();
+ __matcher._M_add_equivalence_class(_M_value);
+ }
else if (_M_match_token(_ScannerT::_S_token_char_class_name))
- __matcher._M_add_character_class(_M_value, false);
+ {
+ __flush();
+ __matcher._M_add_character_class(_M_value, false);
+ }
+ else if (_M_try_char())
+ __push_char(_M_value[0]);
// POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
// except when the '-' is the first or last character in the bracket
// expression ([--0]). ECMAScript treats all '-' after a range as a
// Clang (3.5) always uses ECMAScript style even in its POSIX syntax.
//
// It turns out that no one reads BNFs ;)
- else if (_M_try_char())
+ else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
{
if (!__last_char.first)
{
- __matcher._M_add_char(_M_value[0]);
- if (_M_value[0] == '-'
- && !(_M_flags & regex_constants::ECMAScript))
+ if (!(_M_flags & regex_constants::ECMAScript))
{
if (_M_match_token(_ScannerT::_S_token_bracket_end))
- return false;
+ {
+ __push_char('-');
+ return false;
+ }
__throw_regex_error(
regex_constants::error_range,
"Unexpected dash in bracket expression. For POSIX syntax, "
"a dash is not treated literally only when it is at "
"beginning or end.");
}
- __last_char.first = true;
- __last_char.second = _M_value[0];
+ __push_char('-');
}
else
{
- if (_M_value[0] == '-')
+ if (_M_try_char())
{
- if (_M_try_char())
- {
- __matcher._M_make_range(__last_char.second , _M_value[0]);
- __last_char.first = false;
- }
- else
- {
- if (_M_scanner._M_get_token()
- != _ScannerT::_S_token_bracket_end)
- __throw_regex_error(
- regex_constants::error_range,
- "Unexpected end of bracket expression.");
- __matcher._M_add_char(_M_value[0]);
- }
+ __matcher._M_make_range(__last_char.second, _M_value[0]);
+ __last_char.first = false;
+ }
+ else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
+ {
+ __matcher._M_make_range(__last_char.second, '-');
+ __last_char.first = false;
}
else
{
- __matcher._M_add_char(_M_value[0]);
- __last_char.second = _M_value[0];
+ if (_M_scanner._M_get_token()
+ != _ScannerT::_S_token_bracket_end)
+ __throw_regex_error(
+ regex_constants::error_range,
+ "Character is expected after a dash.");
+ __push_char('-');
}
}
}
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
- __matcher._M_add_character_class(_M_value,
- _M_ctype.is(_CtypeT::upper,
- _M_value[0]));
+ {
+ __flush();
+ __matcher._M_add_character_class(_M_value,
+ _M_ctype.is(_CtypeT::upper,
+ _M_value[0]));
+ }
else
__throw_regex_error(regex_constants::error_brack,
"Unexpected character in bracket expression.");