/* CPP Library - lexical analysis.
- Copyright (C) 2000-2017 Free Software Foundation, Inc.
+ Copyright (C) 2000-2020 Free Software Foundation, Inc.
Contributed by Per Bothner, 1994-95.
Based on CCCP program by Paul Rubin, June 1986
Adapted to ANSI C, Richard Stallman, Jan 1987
search_line_fast = impl;
}
-#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
+#elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
/* A vection of the fast scanner using AltiVec vectorized byte compares
and VSX unaligned loads (when VSX is available). This is otherwise
- the same as the pre-GCC 5 version. */
+ the same as the AltiVec version. */
ATTRIBUTE_NO_SANITIZE_UNDEFINED
static const uchar *
{
vc m_nl, m_cr, m_bs, m_qm;
- data = *((const vc *)s);
+ data = __builtin_vec_vsx_ld (0, s);
s += 16;
m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
d = (uchar *) s;
/* Handle DOS line endings. */
- if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
+ if (*s == '\r' && s + 1 != buffer->rlimit && s[1] == '\n')
s++;
}
skip_line_comment (cpp_reader *pfile)
{
cpp_buffer *buffer = pfile->buffer;
- source_location orig_line = pfile->line_table->highest_line;
+ location_t orig_line = pfile->line_table->highest_line;
while (*buffer->cur != '\n')
buffer->cur++;
}
}
-/* Returns TRUE if the sequence starting at buffer->cur is invalid in
+static const cppchar_t utf8_signifier = 0xC0;
+
+/* Returns TRUE if the sequence starting at buffer->cur is valid in
an identifier. FIRST is TRUE if this starts an identifier. */
static bool
forms_identifier_p (cpp_reader *pfile, int first,
return true;
}
- /* Is this a syntactically valid UCN? */
- if (CPP_OPTION (pfile, extended_identifiers)
- && *buffer->cur == '\\'
- && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
+ /* Is this a syntactically valid UCN or a valid UTF-8 char? */
+ if (CPP_OPTION (pfile, extended_identifiers))
{
cppchar_t s;
- buffer->cur += 2;
- if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
- state, &s, NULL, NULL))
- return true;
- buffer->cur -= 2;
+ if (*buffer->cur >= utf8_signifier)
+ {
+ if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
+ state, &s))
+ return true;
+ }
+ else if (*buffer->cur == '\\'
+ && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
+ {
+ buffer->cur += 2;
+ if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
+ state, &s, NULL, NULL))
+ return true;
+ buffer->cur -= 2;
+ }
}
return false;
{
/* __VA_OPT__ should not be accepted at all, but allow it in
system headers. */
- if (!cpp_in_system_header (pfile))
+ if (!_cpp_in_system_header (pfile))
cpp_error (pfile, CPP_DL_PEDWARN,
- "__VA_OPT__ is not available until C++2a");
+ "__VA_OPT__ is not available until C++20");
}
else if (!pfile->state.va_args_ok)
{
variadic macro. */
cpp_error (pfile, CPP_DL_PEDWARN,
"__VA_OPT__ can only appear in the expansion"
- " of a C++2a variadic macro");
+ " of a C++20 variadic macro");
}
}
pfile->buffer->cur = cur;
if (starts_ucn || forms_identifier_p (pfile, false, nst))
{
- /* Slower version for identifiers containing UCNs (or $). */
+ /* Slower version for identifiers containing UCNs
+ or extended chars (including $). */
do {
while (ISIDNUM (*pfile->buffer->cur))
{
create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
unsigned int len, enum cpp_ttype type)
{
- uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
-
- memcpy (dest, base, len);
- dest[len] = '\0';
token->type = type;
token->val.str.len = len;
- token->val.str.text = dest;
+ token->val.str.text = cpp_alloc_token_string (pfile, base, len);
+}
+
+const uchar *
+cpp_alloc_token_string (cpp_reader *pfile,
+ const unsigned char *ptr, unsigned len)
+{
+ uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
+
+ dest[len] = 0;
+ memcpy (dest, ptr, len);
+ return dest;
}
+/* A pair of raw buffer pointers. The currently open one is [1], the
+ first one is [0]. Used for string literal lexing. */
+struct lit_accum {
+ _cpp_buff *first;
+ _cpp_buff *last;
+ const uchar *rpos;
+ size_t accum;
+
+ lit_accum ()
+ : first (NULL), last (NULL), rpos (0), accum (0)
+ {
+ }
+
+ void append (cpp_reader *, const uchar *, size_t);
+
+ void read_begin (cpp_reader *);
+ bool reading_p () const
+ {
+ return rpos != NULL;
+ }
+ char read_char ()
+ {
+ char c = *rpos++;
+ if (rpos == BUFF_FRONT (last))
+ rpos = NULL;
+ return c;
+ }
+};
+
/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
-static void
-bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
- _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
+void
+lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len)
{
- _cpp_buff *first_buff = *first_buff_p;
- _cpp_buff *last_buff = *last_buff_p;
-
- if (first_buff == NULL)
- first_buff = last_buff = _cpp_get_buff (pfile, len);
- else if (len > BUFF_ROOM (last_buff))
+ if (!last)
+ /* Starting. */
+ first = last = _cpp_get_buff (pfile, len);
+ else if (len > BUFF_ROOM (last))
{
- size_t room = BUFF_ROOM (last_buff);
- memcpy (BUFF_FRONT (last_buff), base, room);
- BUFF_FRONT (last_buff) += room;
+ /* There is insufficient room in the buffer. Copy what we can,
+ and then either extend or create a new one. */
+ size_t room = BUFF_ROOM (last);
+ memcpy (BUFF_FRONT (last), base, room);
+ BUFF_FRONT (last) += room;
base += room;
len -= room;
- last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
- }
+ accum += room;
- memcpy (BUFF_FRONT (last_buff), base, len);
- BUFF_FRONT (last_buff) += len;
+ gcc_checking_assert (!rpos);
- *first_buff_p = first_buff;
- *last_buff_p = last_buff;
+ last = _cpp_append_extend_buff (pfile, last, len);
+ }
+
+ memcpy (BUFF_FRONT (last), base, len);
+ BUFF_FRONT (last) += len;
+ accum += len;
}
+void
+lit_accum::read_begin (cpp_reader *pfile)
+{
+ /* We never accumulate more than 4 chars to read. */
+ if (BUFF_ROOM (last) < 4)
+
+ last = _cpp_append_extend_buff (pfile, last, 4);
+ rpos = BUFF_FRONT (last);
+}
/* Returns true if a macro has been defined.
This might not work if compile with -save-temps,
cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
base, cur - base, hash, HT_NO_INSERT));
- return !result ? false : (result->type == NT_MACRO);
+ return result && cpp_macro_p (result);
}
+/* Returns true if a literal suffix does not have the expected form
+ and is defined as a macro. */
-/* Lexes a raw string. The stored string contains the spelling, including
- double quotes, delimiter string, '(' and ')', any leading
- 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
- literal, or CPP_OTHER if it was not properly terminated.
+static bool
+is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base)
+{
+ /* User-defined literals outside of namespace std must start with a single
+ underscore, so assume anything of that form really is a UDL suffix.
+ We don't need to worry about UDLs defined inside namespace std because
+ their names are reserved, so cannot be used as macro names in valid
+ programs. */
+ if (base[0] == '_' && base[1] != '_')
+ return false;
+ return is_macro (pfile, base);
+}
+
+/* Lexes a raw string. The stored string contains the spelling,
+ including double quotes, delimiter string, '(' and ')', any leading
+ 'L', 'u', 'U' or 'u8' and 'R' modifier. The created token contains
+ the type of the literal, or CPP_OTHER if it was not properly
+ terminated.
+
+ BASE is the start of the token. Updates pfile->buffer->cur to just
+ after the lexed string.
The spelling is NUL-terminated, but it is not guaranteed that this
is the first NUL since embedded NULs are preserved. */
static void
-lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
- const uchar *cur)
-{
- uchar raw_prefix[17];
- uchar temp_buffer[18];
- const uchar *orig_base;
- unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
- enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
- raw_str_phase phase = RAW_STR_PREFIX;
- enum cpp_ttype type;
- size_t total_len = 0;
- /* Index into temp_buffer during phases other than RAW_STR,
- during RAW_STR phase 17 to tell BUF_APPEND that nothing should
- be appended to temp_buffer. */
- size_t temp_buffer_len = 0;
- _cpp_buff *first_buff = NULL, *last_buff = NULL;
- size_t raw_prefix_start;
+lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
+{
+ const uchar *pos = base;
+
+ /* 'tis a pity this information isn't passed down from the lexer's
+ initial categorization of the token. */
+ enum cpp_ttype type = CPP_STRING;
+
+ if (*pos == 'L')
+ {
+ type = CPP_WSTRING;
+ pos++;
+ }
+ else if (*pos == 'U')
+ {
+ type = CPP_STRING32;
+ pos++;
+ }
+ else if (*pos == 'u')
+ {
+ if (pos[1] == '8')
+ {
+ type = CPP_UTF8STRING;
+ pos++;
+ }
+ else
+ type = CPP_STRING16;
+ pos++;
+ }
+
+ gcc_checking_assert (pos[0] == 'R' && pos[1] == '"');
+ pos += 2;
+
_cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
- type = (*base == 'L' ? CPP_WSTRING :
- *base == 'U' ? CPP_STRING32 :
- *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
- : CPP_STRING);
-
-#define BUF_APPEND(STR,LEN) \
- do { \
- bufring_append (pfile, (const uchar *)(STR), (LEN), \
- &first_buff, &last_buff); \
- total_len += (LEN); \
- if (__builtin_expect (temp_buffer_len < 17, 0) \
- && (const uchar *)(STR) != base \
- && (LEN) <= 2) \
- { \
- memcpy (temp_buffer + temp_buffer_len, \
- (const uchar *)(STR), (LEN)); \
- temp_buffer_len += (LEN); \
- } \
- } while (0)
-
- orig_base = base;
- ++cur;
- raw_prefix_start = cur - base;
+ /* Skip notes before the ". */
+ while (note->pos < pos)
+ ++note;
+
+ lit_accum accum;
+
+ uchar prefix[17];
+ unsigned prefix_len = 0;
+ enum Phase
+ {
+ PHASE_PREFIX = -2,
+ PHASE_NONE = -1,
+ PHASE_SUFFIX = 0
+ } phase = PHASE_PREFIX;
+
for (;;)
{
- cppchar_t c;
+ gcc_checking_assert (note->pos >= pos);
- /* If we previously performed any trigraph or line splicing
- transformations, undo them in between the opening and closing
- double quote. */
- while (note->pos < cur)
- ++note;
- for (; note->pos == cur; ++note)
- {
- switch (note->type)
- {
- case '\\':
- case ' ':
- /* Restore backslash followed by newline. */
- BUF_APPEND (base, cur - base);
- base = cur;
- BUF_APPEND ("\\", 1);
- after_backslash:
- if (note->type == ' ')
- {
- /* GNU backslash whitespace newline extension. FIXME
- could be any sequence of non-vertical space. When we
- can properly restore any such sequence, we should mark
- this note as handled so _cpp_process_line_notes
- doesn't warn. */
- BUF_APPEND (" ", 1);
- }
-
- BUF_APPEND ("\n", 1);
- break;
+ /* Undo any escaped newlines and trigraphs. */
+ if (!accum.reading_p () && note->pos == pos)
+ switch (note->type)
+ {
+ case '\\':
+ case ' ':
+ /* Restore backslash followed by newline. */
+ accum.append (pfile, base, pos - base);
+ base = pos;
+ accum.read_begin (pfile);
+ accum.append (pfile, UC"\\", 1);
+
+ after_backslash:
+ if (note->type == ' ')
+ /* GNU backslash whitespace newline extension. FIXME
+ could be any sequence of non-vertical space. When we
+ can properly restore any such sequence, we should
+ mark this note as handled so _cpp_process_line_notes
+ doesn't warn. */
+ accum.append (pfile, UC" ", 1);
+
+ accum.append (pfile, UC"\n", 1);
+ note++;
+ break;
- case 0:
- /* Already handled. */
- break;
+ case '\n':
+ /* This can happen for ??/<NEWLINE> when trigraphs are not
+ being interpretted. */
+ gcc_checking_assert (!CPP_OPTION (pfile, trigraphs));
+ note->type = 0;
+ note++;
+ break;
- default:
- if (_cpp_trigraph_map[note->type])
- {
- /* Don't warn about this trigraph in
- _cpp_process_line_notes, since trigraphs show up as
- trigraphs in raw strings. */
- uchar type = note->type;
- note->type = 0;
-
- if (!CPP_OPTION (pfile, trigraphs))
- /* If we didn't convert the trigraph in the first
- place, don't do anything now either. */
- break;
+ default:
+ gcc_checking_assert (_cpp_trigraph_map[note->type]);
+
+ /* Don't warn about this trigraph in
+ _cpp_process_line_notes, since trigraphs show up as
+ trigraphs in raw strings. */
+ uchar type = note->type;
+ note->type = 0;
+
+ if (CPP_OPTION (pfile, trigraphs))
+ {
+ accum.append (pfile, base, pos - base);
+ base = pos;
+ accum.read_begin (pfile);
+ accum.append (pfile, UC"??", 2);
+ accum.append (pfile, &type, 1);
+
+ /* ??/ followed by newline gets two line notes, one for
+ the trigraph and one for the backslash/newline. */
+ if (type == '/' && note[1].pos == pos)
+ {
+ note++;
+ gcc_assert (note->type == '\\' || note->type == ' ');
+ goto after_backslash;
+ }
+ /* Skip the replacement character. */
+ base = ++pos;
+ }
+
+ note++;
+ break;
+ }
- BUF_APPEND (base, cur - base);
- base = cur;
- BUF_APPEND ("??", 2);
+ /* Now get a char to process. Either from an expanded note, or
+ from the line buffer. */
+ bool read_note = accum.reading_p ();
+ char c = read_note ? accum.read_char () : *pos++;
- /* ??/ followed by newline gets two line notes, one for
- the trigraph and one for the backslash/newline. */
- if (type == '/' && note[1].pos == cur)
- {
- if (note[1].type != '\\'
- && note[1].type != ' ')
- abort ();
- BUF_APPEND ("/", 1);
- ++note;
- goto after_backslash;
- }
- else
- {
- /* Skip the replacement character. */
- base = ++cur;
- BUF_APPEND (&type, 1);
- c = type;
- goto check_c;
- }
- }
+ if (phase == PHASE_PREFIX)
+ {
+ if (c == '(')
+ {
+ /* Done. */
+ phase = PHASE_NONE;
+ prefix[prefix_len++] = '"';
+ }
+ else if (prefix_len < 16
+ /* Prefix chars are any of the basic character set,
+ [lex.charset] except for '
+ ()\\\t\v\f\n'. Optimized for a contiguous
+ alphabet. */
+ /* Unlike a switch, this collapses down to one or
+ two shift and bitmask operations on an ASCII
+ system, with an outlier or two. */
+ && (('Z' - 'A' == 25
+ ? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+ : ISIDST (c))
+ || (c >= '0' && c <= '9')
+ || c == '_' || c == '{' || c == '}'
+ || c == '[' || c == ']' || c == '#'
+ || c == '<' || c == '>' || c == '%'
+ || c == ':' || c == ';' || c == '.' || c == '?'
+ || c == '*' || c == '+' || c == '-' || c == '/'
+ || c == '^' || c == '&' || c == '|' || c == '~'
+ || c == '!' || c == '=' || c == ','
+ || c == '"' || c == '\''))
+ prefix[prefix_len++] = c;
+ else
+ {
+ /* Something is wrong. */
+ int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note;
+ if (prefix_len == 16)
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+ col, "raw string delimiter longer "
+ "than 16 characters");
+ else if (c == '\n')
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+ col, "invalid new-line in raw "
+ "string delimiter");
else
- abort ();
- break;
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+ col, "invalid character '%c' in "
+ "raw string delimiter", c);
+ type = CPP_OTHER;
+ phase = PHASE_NONE;
+ /* Continue until we get a close quote, that's probably
+ the best failure mode. */
+ prefix_len = 0;
}
+ if (c != '\n')
+ continue;
}
- c = *cur++;
- if (__builtin_expect (temp_buffer_len < 17, 0))
- temp_buffer[temp_buffer_len++] = c;
- check_c:
- if (phase == RAW_STR_PREFIX)
+ if (phase != PHASE_NONE)
{
- while (raw_prefix_len < temp_buffer_len)
+ if (prefix[phase] != c)
+ phase = PHASE_NONE;
+ else if (unsigned (phase + 1) == prefix_len)
+ break;
+ else
{
- raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
- switch (raw_prefix[raw_prefix_len])
- {
- case ' ': case '(': case ')': case '\\': case '\t':
- case '\v': case '\f': case '\n': default:
- break;
- /* Basic source charset except the above chars. */
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- case '_': case '{': case '}': case '#': case '[': case ']':
- case '<': case '>': case '%': case ':': case ';': case '.':
- case '?': case '*': case '+': case '-': case '/': case '^':
- case '&': case '|': case '~': case '!': case '=': case ',':
- case '"': case '\'':
- if (raw_prefix_len < 16)
- {
- raw_prefix_len++;
- continue;
- }
- break;
- }
-
- if (raw_prefix[raw_prefix_len] != '(')
- {
- int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
- if (raw_prefix_len == 16)
- cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
- col, "raw string delimiter longer "
- "than 16 characters");
- else if (raw_prefix[raw_prefix_len] == '\n')
- cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
- col, "invalid new-line in raw "
- "string delimiter");
- else
- cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
- col, "invalid character '%c' in "
- "raw string delimiter",
- (int) raw_prefix[raw_prefix_len]);
- pfile->buffer->cur = orig_base + raw_prefix_start - 1;
- create_literal (pfile, token, orig_base,
- raw_prefix_start - 1, CPP_OTHER);
- if (first_buff)
- _cpp_release_buff (pfile, first_buff);
- return;
- }
- raw_prefix[raw_prefix_len] = '"';
- phase = RAW_STR;
- /* Nothing should be appended to temp_buffer during
- RAW_STR phase. */
- temp_buffer_len = 17;
- break;
+ phase = Phase (phase + 1);
+ continue;
}
- continue;
}
- else if (phase == RAW_STR_SUFFIX)
- {
- while (raw_suffix_len <= raw_prefix_len
- && raw_suffix_len < temp_buffer_len
- && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
- raw_suffix_len++;
- if (raw_suffix_len > raw_prefix_len)
- break;
- if (raw_suffix_len == temp_buffer_len)
- continue;
- phase = RAW_STR;
- /* Nothing should be appended to temp_buffer during
- RAW_STR phase. */
- temp_buffer_len = 17;
- }
- if (c == ')')
- {
- phase = RAW_STR_SUFFIX;
- raw_suffix_len = 0;
- temp_buffer_len = 0;
- }
- else if (c == '\n')
+
+ if (!prefix_len && c == '"')
+ /* Failure mode lexing. */
+ goto out;
+ else if (prefix_len && c == ')')
+ phase = PHASE_SUFFIX;
+ else if (!read_note && c == '\n')
{
+ pos--;
+ pfile->buffer->cur = pos;
if (pfile->state.in_directive
|| (pfile->state.parsing_args
&& pfile->buffer->next_line >= pfile->buffer->rlimit))
{
- cur--;
- type = CPP_OTHER;
cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
"unterminated raw string");
- break;
+ type = CPP_OTHER;
+ goto out;
}
- BUF_APPEND (base, cur - base);
+ accum.append (pfile, base, pos - base + 1);
+ _cpp_process_line_notes (pfile, false);
- if (pfile->buffer->cur < pfile->buffer->rlimit)
+ if (pfile->buffer->next_line < pfile->buffer->rlimit)
CPP_INCREMENT_LINE (pfile, 0);
pfile->buffer->need_line = true;
- pfile->buffer->cur = cur-1;
- _cpp_process_line_notes (pfile, false);
if (!_cpp_get_fresh_line (pfile))
{
- source_location src_loc = token->src_loc;
+ /* We ran out of file and failed to get a line. */
+ location_t src_loc = token->src_loc;
token->type = CPP_EOF;
/* Tell the compiler the line number of the EOF token. */
token->src_loc = pfile->line_table->highest_line;
token->flags = BOL;
- if (first_buff != NULL)
- _cpp_release_buff (pfile, first_buff);
+ if (accum.first)
+ _cpp_release_buff (pfile, accum.first);
cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
"unterminated raw string");
+ /* Now pop the buffer that _cpp_get_fresh_line did not. */
+ _cpp_pop_buffer (pfile);
return;
}
- cur = base = pfile->buffer->cur;
+ pos = base = pfile->buffer->cur;
note = &pfile->buffer->notes[pfile->buffer->cur_note];
}
}
{
/* If a string format macro, say from inttypes.h, is placed touching
a string literal it could be parsed as a C++11 user-defined string
- literal thus breaking the program.
- Try to identify macros with is_macro. A warning is issued.
- The macro name should not start with '_' for this warning. */
- if ((*cur != '_') && is_macro (pfile, cur))
+ literal thus breaking the program. */
+ if (is_macro_not_literal_suffix (pfile, pos))
{
/* Raise a warning, but do not consume subsequent tokens. */
if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
"a space between literal and string macro");
}
/* Grab user defined literal suffix. */
- else if (ISIDST (*cur))
+ else if (ISIDST (*pos))
{
type = cpp_userdef_string_add_type (type);
- ++cur;
+ ++pos;
- while (ISIDNUM (*cur))
- ++cur;
+ while (ISIDNUM (*pos))
+ ++pos;
}
}
- pfile->buffer->cur = cur;
- if (first_buff == NULL)
- create_literal (pfile, token, base, cur - base, type);
+ out:
+ pfile->buffer->cur = pos;
+ if (!accum.accum)
+ create_literal (pfile, token, base, pos - base, type);
else
{
- uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
+ size_t extra_len = pos - base;
+ uchar *dest = _cpp_unaligned_alloc (pfile, accum.accum + extra_len + 1);
token->type = type;
- token->val.str.len = total_len + (cur - base);
+ token->val.str.len = accum.accum + extra_len;
token->val.str.text = dest;
- last_buff = first_buff;
- while (last_buff != NULL)
+ for (_cpp_buff *buf = accum.first; buf; buf = buf->next)
{
- memcpy (dest, last_buff->base,
- BUFF_FRONT (last_buff) - last_buff->base);
- dest += BUFF_FRONT (last_buff) - last_buff->base;
- last_buff = last_buff->next;
+ size_t len = BUFF_FRONT (buf) - buf->base;
+ memcpy (dest, buf->base, len);
+ dest += len;
}
- _cpp_release_buff (pfile, first_buff);
- memcpy (dest, base, cur - base);
- dest[cur - base] = '\0';
+ _cpp_release_buff (pfile, accum.first);
+ memcpy (dest, base, extra_len);
+ dest[extra_len] = '\0';
}
}
}
if (terminator == 'R')
{
- lex_raw_string (pfile, token, base, cur);
+ lex_raw_string (pfile, token, base);
return;
}
if (terminator == '"')
{
/* If a string format macro, say from inttypes.h, is placed touching
a string literal it could be parsed as a C++11 user-defined string
- literal thus breaking the program.
- Try to identify macros with is_macro. A warning is issued.
- The macro name should not start with '_' for this warning. */
- if ((*cur != '_') && is_macro (pfile, cur))
+ literal thus breaking the program. */
+ if (is_macro_not_literal_suffix (pfile, cur))
{
/* Raise a warning, but do not consume subsequent tokens. */
if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
index--;
break;
}
+ else if (peektok->type == CPP_PRAGMA)
+ {
+ /* Don't peek past a pragma. */
+ if (peektok == &pfile->directive_result)
+ /* Save the pragma in the buffer. */
+ *pfile->cur_token++ = *peektok;
+ index--;
+ break;
+ }
}
while (index--);
return result;
}
+/* We're at the beginning of a logical line (so not in
+ directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set. See
+ if we should enter deferred_pragma mode to tokenize the rest of the
+ line as a module control-line. */
+
+static void
+cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result)
+{
+ unsigned backup = 0; /* Tokens we peeked. */
+ cpp_hashnode *node = result->val.node.node;
+ cpp_token *peek = result;
+ cpp_token *keyword = peek;
+ cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules;
+ int header_count = 0;
+
+ /* Make sure the incoming state is as we expect it. This way we
+ can restore it using constants. */
+ gcc_checking_assert (!pfile->state.in_deferred_pragma
+ && !pfile->state.skipping
+ && !pfile->state.parsing_args
+ && !pfile->state.angled_headers
+ && (pfile->state.save_comments
+ == !CPP_OPTION (pfile, discard_comments)));
+
+ /* Enter directives mode sufficiently for peeking. We don't have
+ to actually set in_directive. */
+ pfile->state.in_deferred_pragma = true;
+
+ /* These two fields are needed to process tokenization in deferred
+ pragma mode. They are not used outside deferred pragma mode or
+ directives mode. */
+ pfile->state.pragma_allow_expansion = true;
+ pfile->directive_line = result->src_loc;
+
+ /* Saving comments is incompatible with directives mode. */
+ pfile->state.save_comments = 0;
+
+ if (node == n_modules[spec_nodes::M_EXPORT][0])
+ {
+ peek = _cpp_lex_direct (pfile);
+ keyword = peek;
+ backup++;
+ if (keyword->type != CPP_NAME)
+ goto not_module;
+ node = keyword->val.node.node;
+ if (!(node->flags & NODE_MODULE))
+ goto not_module;
+ }
+
+ if (node == n_modules[spec_nodes::M__IMPORT][0])
+ /* __import */
+ header_count = backup + 2 + 16;
+ else if (node == n_modules[spec_nodes::M_IMPORT][0])
+ /* import */
+ header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed) ? 16 : 0);
+ else if (node == n_modules[spec_nodes::M_MODULE][0])
+ ; /* module */
+ else
+ goto not_module;
+
+ /* We've seen [export] {module|import|__import}. Check the next token. */
+ if (header_count)
+ /* After '{,__}import' a header name may appear. */
+ pfile->state.angled_headers = true;
+ peek = _cpp_lex_direct (pfile);
+ backup++;
+
+ /* ... import followed by identifier, ':', '<' or
+ header-name preprocessing tokens, or module
+ followed by cpp-identifier, ':' or ';' preprocessing
+ tokens. C++ keywords are not yet relevant. */
+ if (peek->type == CPP_NAME
+ || peek->type == CPP_COLON
+ || (header_count
+ ? (peek->type == CPP_LESS
+ || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R')
+ || peek->type == CPP_HEADER_NAME)
+ : peek->type == CPP_SEMICOLON))
+ {
+ pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed);
+ if (!pfile->state.pragma_allow_expansion)
+ pfile->state.prevent_expansion++;
+
+ if (!header_count && linemap_included_from
+ (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table)))
+ cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0,
+ "module control-line cannot be in included file");
+
+ /* The first one or two tokens cannot be macro names. */
+ for (int ix = backup; ix--;)
+ {
+ cpp_token *tok = ix ? keyword : result;
+ cpp_hashnode *node = tok->val.node.node;
+
+ /* Don't attempt to expand the token. */
+ tok->flags |= NO_EXPAND;
+ if (_cpp_defined_macro_p (node)
+ && _cpp_maybe_notify_macro_use (pfile, node, tok->src_loc)
+ && !cpp_fun_like_macro_p (node))
+ cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0,
+ "module control-line \"%s\" cannot be"
+ " an object-like macro",
+ NODE_NAME (node));
+ }
+
+ /* Map to underbar variants. */
+ keyword->val.node.node = n_modules[header_count
+ ? spec_nodes::M_IMPORT
+ : spec_nodes::M_MODULE][1];
+ if (backup != 1)
+ result->val.node.node = n_modules[spec_nodes::M_EXPORT][1];
+
+ /* Maybe tell the tokenizer we expect a header-name down the
+ road. */
+ pfile->state.directive_file_token = header_count;
+ }
+ else
+ {
+ not_module:
+ /* Drop out of directive mode. */
+ /* We aaserted save_comments had this value upon entry. */
+ pfile->state.save_comments
+ = !CPP_OPTION (pfile, discard_comments);
+ pfile->state.in_deferred_pragma = false;
+ /* Do not let this remain on. */
+ pfile->state.angled_headers = false;
+ }
+
+ /* In either case we want to backup the peeked tokens. */
+ if (backup)
+ {
+ /* If we saw EOL, we should drop it, because this isn't a module
+ control-line after all. */
+ bool eol = peek->type == CPP_PRAGMA_EOL;
+ if (!eol || backup > 1)
+ {
+ /* Put put the peeked tokens back */
+ _cpp_backup_tokens_direct (pfile, backup);
+ /* But if the last one was an EOL, forget it. */
+ if (eol)
+ pfile->lookaheads--;
+ }
+ }
+}
+
/* Lex a token into RESULT (external interface). Takes care of issues
like directive handling, token lookahead, multiple include
optimization and skipping. */
}
else if (pfile->state.in_deferred_pragma)
result = &pfile->directive_result;
+ else if (result->type == CPP_NAME
+ && (result->val.node.node->flags & NODE_MODULE)
+ && !pfile->state.skipping
+ /* Unlike regular directives, we do not deal with
+ tokenizing module directives as macro arguments.
+ That's not permitted. */
+ && !pfile->state.parsing_args)
+ {
+ /* P1857. Before macro expansion, At start of logical
+ line ... */
+ /* We don't have to consider lookaheads at this point. */
+ gcc_checking_assert (!pfile->lookaheads);
+
+ cpp_maybe_module_directive (pfile, result);
+ }
if (pfile->cb.line_change && !pfile->state.skipping)
pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
bool
_cpp_get_fresh_line (cpp_reader *pfile)
{
- int return_at_eof;
-
/* We can't get a new line until we leave the current directive. */
if (pfile->state.in_directive)
return false;
buffer->next_line = buffer->rlimit;
}
- return_at_eof = buffer->return_at_eof;
- _cpp_pop_buffer (pfile);
- if (pfile->buffer == NULL || return_at_eof)
- return false;
+ if (buffer->prev && !buffer->return_at_eof)
+ _cpp_pop_buffer (pfile);
+ else
+ {
+ /* End of translation. Do not pop the buffer yet. Increment
+ line number so that the EOF token is on a line of its own
+ (_cpp_lex_direct doesn't increment in that case, because
+ it's hard for it to distinguish this special case). */
+ CPP_INCREMENT_LINE (pfile, 0);
+ return false;
+ }
}
}
buffer = pfile->buffer;
if (buffer->need_line)
{
- if (pfile->state.in_deferred_pragma)
- {
- result->type = CPP_PRAGMA_EOL;
- pfile->state.in_deferred_pragma = false;
- if (!pfile->state.pragma_allow_expansion)
- pfile->state.prevent_expansion--;
- return result;
- }
+ gcc_assert (!pfile->state.in_deferred_pragma);
if (!_cpp_get_fresh_line (pfile))
{
result->type = CPP_EOF;
- if (!pfile->state.in_directive)
+ /* Not a real EOF in a directive or arg parsing -- we refuse
+ to advance to the next file now, and will once we're out
+ of those modes. */
+ if (!pfile->state.in_directive && !pfile->state.parsing_args)
{
/* Tell the compiler the line number of the EOF token. */
result->src_loc = pfile->line_table->highest_line;
result->flags = BOL;
+ /* Now pop the buffer that _cpp_get_fresh_line did not. */
+ _cpp_pop_buffer (pfile);
}
return result;
}
}
c = *buffer->cur++;
- if (pfile->forced_token_location_p)
- result->src_loc = *pfile->forced_token_location_p;
+ if (pfile->forced_token_location)
+ result->src_loc = pfile->forced_token_location;
else
result->src_loc = linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (buffer, buffer->cur));
goto skipped_white;
case '\n':
- if (buffer->cur < buffer->rlimit)
+ /* Increment the line, unless this is the last line ... */
+ if (buffer->cur < buffer->rlimit
+ /* ... or this is a #include, (where _cpp_stack_file needs to
+ unwind by one line) ... */
+ || (pfile->state.in_directive > 1
+ /* ... except traditional-cpp increments this elsewhere. */
+ && !CPP_OPTION (pfile, traditional)))
CPP_INCREMENT_LINE (pfile, 0);
buffer->need_line = true;
+ if (pfile->state.in_deferred_pragma)
+ {
+ /* Produce the PRAGMA_EOL on this line. File reading
+ ensures there is always a \n at end of the buffer, thus
+ in a deferred pragma we always see CPP_PRAGMA_EOL before
+ any CPP_EOF. */
+ result->type = CPP_PRAGMA_EOL;
+ result->flags &= ~PREV_WHITE;
+ pfile->state.in_deferred_pragma = false;
+ if (!pfile->state.pragma_allow_expansion)
+ pfile->state.prevent_expansion--;
+ return result;
+ }
goto fresh_line;
case '0': case '1': case '2': case '3': case '4':
else if (c == '/' && ! CPP_OPTION (pfile, traditional))
{
/* Don't warn for system headers. */
- if (cpp_in_system_header (pfile))
+ if (_cpp_in_system_header (pfile))
;
/* Warn about comments if pedantically GNUC89, and not
in system headers. */
&& CPP_PEDANTIC (pfile)
&& ! buffer->warned_cplusplus_comments)
{
- cpp_error (pfile, CPP_DL_PEDWARN,
- "C++ style comments are not allowed in ISO C90");
- cpp_error (pfile, CPP_DL_PEDWARN,
- "(this will be reported only once per input file)");
+ if (cpp_error (pfile, CPP_DL_PEDWARN,
+ "C++ style comments are not allowed in ISO C90"))
+ cpp_error (pfile, CPP_DL_NOTE,
+ "(this will be reported only once per input file)");
buffer->warned_cplusplus_comments = 1;
}
/* Or if specifically desired via -Wc90-c99-compat. */
&& ! CPP_OPTION (pfile, cplusplus)
&& ! buffer->warned_cplusplus_comments)
{
- cpp_error (pfile, CPP_DL_WARNING,
- "C++ style comments are incompatible with C90");
- cpp_error (pfile, CPP_DL_WARNING,
- "(this will be reported only once per input file)");
+ if (cpp_error (pfile, CPP_DL_WARNING,
+ "C++ style comments are incompatible with C90"))
+ cpp_error (pfile, CPP_DL_NOTE,
+ "(this will be reported only once per input file)");
buffer->warned_cplusplus_comments = 1;
}
/* In C89/C94, C++ style comments are forbidden. */
}
else if (! buffer->warned_cplusplus_comments)
{
- cpp_error (pfile, CPP_DL_ERROR,
- "C++ style comments are not allowed in ISO C90");
- cpp_error (pfile, CPP_DL_ERROR,
- "(this will be reported only once per input "
- "file)");
+ if (cpp_error (pfile, CPP_DL_ERROR,
+ "C++ style comments are not allowed in "
+ "ISO C90"))
+ cpp_error (pfile, CPP_DL_NOTE,
+ "(this will be reported only once per input "
+ "file)");
buffer->warned_cplusplus_comments = 1;
}
}
result->type = CPP_LESS;
if (*buffer->cur == '=')
- buffer->cur++, result->type = CPP_LESS_EQ;
+ {
+ buffer->cur++, result->type = CPP_LESS_EQ;
+ if (*buffer->cur == '>'
+ && CPP_OPTION (pfile, cplusplus)
+ && CPP_OPTION (pfile, lang) >= CLK_GNUCXX20)
+ buffer->cur++, result->type = CPP_SPACESHIP;
+ }
else if (*buffer->cur == '<')
{
buffer->cur++;
case ':':
result->type = CPP_COLON;
- if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
+ if (*buffer->cur == ':' && CPP_OPTION (pfile, scope))
buffer->cur++, result->type = CPP_SCOPE;
else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
{
/* @ is a punctuator in Objective-C. */
case '@': result->type = CPP_ATSIGN; break;
- case '$':
- case '\\':
+ default:
{
const uchar *base = --buffer->cur;
- struct normalize_state nst = INITIAL_NORMALIZE_STATE;
+ /* Check for an extended identifier ($ or UCN or UTF-8). */
+ struct normalize_state nst = INITIAL_NORMALIZE_STATE;
if (forms_identifier_p (pfile, true, &nst))
{
result->type = CPP_NAME;
warn_about_normalization (pfile, result, &nst);
break;
}
+
+ /* Otherwise this will form a CPP_OTHER token. Parse valid UTF-8 as a
+ single token. */
buffer->cur++;
+ if (c >= utf8_signifier)
+ {
+ const uchar *pstr = base;
+ cppchar_t s;
+ if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
+ buffer->cur = pstr;
+ }
+ create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
+ break;
}
- /* FALLTHRU */
- default:
- create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
- break;
}
/* Potentially convert the location of the token to a range. */
break;
case SPELL_LITERAL:
+ if (token->type == CPP_HEADER_NAME)
+ fputc ('"', fp);
fwrite (token->val.str.text, 1, token->val.str.len, fp);
+ if (token->type == CPP_HEADER_NAME)
+ fputc ('"', fp);
break;
case SPELL_NONE:
|| (CPP_OPTION (pfile, objc)
&& token1->val.str.text[0] == '@'
&& (b == CPP_NAME || b == CPP_STRING)));
+ case CPP_LESS_EQ: return c == '>';
case CPP_STRING:
case CPP_WSTRING:
case CPP_UTF8STRING:
return result;
}
+/* Commit or allocate storage from a buffer. */
+
+void *
+_cpp_commit_buff (cpp_reader *pfile, size_t size)
+{
+ void *ptr = BUFF_FRONT (pfile->a_buff);
+
+ if (pfile->hash_table->alloc_subobject)
+ {
+ void *copy = pfile->hash_table->alloc_subobject (size);
+ memcpy (copy, ptr, size);
+ ptr = copy;
+ }
+ else
+ BUFF_FRONT (pfile->a_buff) += size;
+
+ return ptr;
+}
+
/* Say which field of TOK is in use. */
enum cpp_token_fld_kind
case SPELL_LITERAL:
return CPP_TOKEN_FLD_STR;
case SPELL_OPERATOR:
- if (tok->type == CPP_PASTE)
+ /* Operands which were originally spelled as ident keep around
+ the node for the exact spelling. */
+ if (tok->flags & NAMED_OP)
+ return CPP_TOKEN_FLD_NODE;
+ else if (tok->type == CPP_PASTE)
return CPP_TOKEN_FLD_TOKEN_NO;
else
return CPP_TOKEN_FLD_NONE;
}
}
-/* All tokens lexed in R after calling this function will be forced to have
- their source_location the same as the location referenced by P, until
+/* All tokens lexed in R after calling this function will be forced to
+ have their location_t to be P, until
cpp_stop_forcing_token_locations is called for R. */
void
-cpp_force_token_locations (cpp_reader *r, source_location *p)
+cpp_force_token_locations (cpp_reader *r, location_t loc)
{
- r->forced_token_location_p = p;
+ r->forced_token_location = loc;
}
/* Go back to assigning locations naturally for lexed tokens. */
void
cpp_stop_forcing_token_locations (cpp_reader *r)
{
- r->forced_token_location_p = NULL;
+ r->forced_token_location = 0;
+}
+
+/* We're looking at \, if it's escaping EOL, look past it. If at
+ LIMIT, don't advance. */
+
+static const unsigned char *
+do_peek_backslash (const unsigned char *peek, const unsigned char *limit)
+{
+ const unsigned char *probe = peek;
+
+ if (__builtin_expect (peek[1] == '\n', true))
+ {
+ eol:
+ probe += 2;
+ if (__builtin_expect (probe < limit, true))
+ {
+ peek = probe;
+ if (*peek == '\\')
+ /* The user might be perverse. */
+ return do_peek_backslash (peek, limit);
+ }
+ }
+ else if (__builtin_expect (peek[1] == '\r', false))
+ {
+ if (probe[2] == '\n')
+ probe++;
+ goto eol;
+ }
+
+ return peek;
+}
+
+static const unsigned char *
+do_peek_next (const unsigned char *peek, const unsigned char *limit)
+{
+ if (__builtin_expect (*peek == '\\', false))
+ peek = do_peek_backslash (peek, limit);
+ return peek;
+}
+
+static const unsigned char *
+do_peek_prev (const unsigned char *peek, const unsigned char *bound)
+{
+ if (peek == bound)
+ return NULL;
+
+ unsigned char c = *--peek;
+ if (__builtin_expect (c == '\n', false)
+ || __builtin_expect (c == 'r', false))
+ {
+ if (peek == bound)
+ return peek;
+ int ix = -1;
+ if (c == '\n' && peek[ix] == '\r')
+ {
+ if (peek + ix == bound)
+ return peek;
+ ix--;
+ }
+
+ if (peek[ix] == '\\')
+ return do_peek_prev (peek + ix, bound);
+
+ return peek;
+ }
+ else
+ return peek;
+}
+
+/* If PEEK[-1] is identifier MATCH, scan past it and trailing white
+ space. Otherwise return NULL. */
+
+static const unsigned char *
+do_peek_ident (const char *match, const unsigned char *peek,
+ const unsigned char *limit)
+{
+ for (; *++match; peek++)
+ if (*peek != *match)
+ {
+ peek = do_peek_next (peek, limit);
+ if (*peek != *match)
+ return NULL;
+ }
+
+ /* Must now not be looking at an identifier char. */
+ peek = do_peek_next (peek, limit);
+ if (ISIDNUM (*peek))
+ return NULL;
+
+ /* Skip control-line whitespace. */
+ ws:
+ while (*peek == ' ' || *peek == '\t')
+ peek++;
+ if (__builtin_expect (*peek == '\\', false))
+ {
+ peek = do_peek_backslash (peek, limit);
+ if (*peek != '\\')
+ goto ws;
+ }
+
+ return peek;
+}
+
+/* Are we looking at a module control line starting as PEEK - 1? */
+
+static bool
+do_peek_module (cpp_reader *pfile, unsigned char c,
+ const unsigned char *peek, const unsigned char *limit)
+{
+ bool import = false;
+
+ if (__builtin_expect (c == 'e', false))
+ {
+ if (!((peek[0] == 'x' || peek[0] == '\\')
+ && (peek = do_peek_ident ("export", peek, limit))))
+ return false;
+
+ /* export, peek for import or module. No need to peek __import
+ here. */
+ if (peek[0] == 'i')
+ {
+ if (!((peek[1] == 'm' || peek[1] == '\\')
+ && (peek = do_peek_ident ("import", peek + 1, limit))))
+ return false;
+ import = true;
+ }
+ else if (peek[0] == 'm')
+ {
+ if (!((peek[1] == 'o' || peek[1] == '\\')
+ && (peek = do_peek_ident ("module", peek + 1, limit))))
+ return false;
+ }
+ else
+ return false;
+ }
+ else if (__builtin_expect (c == 'i', false))
+ {
+ if (!((peek[0] == 'm' || peek[0] == '\\')
+ && (peek = do_peek_ident ("import", peek, limit))))
+ return false;
+ import = true;
+ }
+ else if (__builtin_expect (c == '_', false))
+ {
+ /* Needed for translated includes. */
+ if (!((peek[0] == '_' || peek[0] == '\\')
+ && (peek = do_peek_ident ("__import", peek, limit))))
+ return false;
+ import = true;
+ }
+ else if (__builtin_expect (c == 'm', false))
+ {
+ if (!((peek[0] == 'o' || peek[0] == '\\')
+ && (peek = do_peek_ident ("module", peek, limit))))
+ return false;
+ }
+ else
+ return false;
+
+ /* Peek the next character to see if it's good enough. We'll be at
+ the first non-whitespace char, including skipping an escaped
+ newline. */
+ /* ... import followed by identifier, ':', '<' or header-name
+ preprocessing tokens, or module followed by identifier, ':' or
+ ';' preprocessing tokens. */
+ unsigned char p = *peek++;
+
+ /* A character literal is ... single quotes, ... optionally preceded
+ by u8, u, U, or L */
+ /* A string-literal is a ... double quotes, optionally prefixed by
+ R, u8, u8R, u, uR, U, UR, L, or LR */
+ if (p == 'u')
+ {
+ peek = do_peek_next (peek, limit);
+ if (*peek == '8')
+ {
+ peek++;
+ goto peek_u8;
+ }
+ goto peek_u;
+ }
+ else if (p == 'U' || p == 'L')
+ {
+ peek_u8:
+ peek = do_peek_next (peek, limit);
+ peek_u:
+ if (*peek == '\"' || *peek == '\'')
+ return false;
+
+ if (*peek == 'R')
+ goto peek_R;
+ /* Identifier. Ok. */
+ }
+ else if (p == 'R')
+ {
+ peek_R:
+ if (CPP_OPTION (pfile, rliterals))
+ {
+ peek = do_peek_next (peek, limit);
+ if (*peek == '\"')
+ return false;
+ }
+ /* Identifier. Ok. */
+ }
+ else if ('Z' - 'A' == 25
+ ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_')
+ : ISIDST (p))
+ {
+ /* Identifier. Ok. */
+ }
+ else if (p == '<')
+ {
+ /* Maybe angle header, ok for import. Reject
+ '<=', '<<' digraph:'<:'. */
+ if (!import)
+ return false;
+ peek = do_peek_next (peek, limit);
+ if (*peek == '=' || *peek == '<'
+ || (*peek == ':' && CPP_OPTION (pfile, digraphs)))
+ return false;
+ }
+ else if (p == ';')
+ {
+ /* SEMICOLON, ok for module. */
+ if (import)
+ return false;
+ }
+ else if (p == '"')
+ {
+ /* STRING, ok for import. */
+ if (!import)
+ return false;
+ }
+ else if (p == ':')
+ {
+ /* Maybe COLON, ok. Reject '::', digraph:':>'. */
+ peek = do_peek_next (peek, limit);
+ if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs)))
+ return false;
+ }
+ else
+ /* FIXME: Detect a unicode character, excluding those not
+ permitted as the initial character. [lex.name]/1. I presume
+ we need to check the \[uU] spellings, and directly using
+ Unicode in say UTF8 form? Or perhaps we do the phase-1
+ conversion of UTF8 to universal-character-names? */
+ return false;
+
+ return true;
+}
+
+/* Directives-only scanning. Somewhat more relaxed than correct
+ parsing -- some ill-formed programs will not be rejected. */
+
+void
+cpp_directive_only_process (cpp_reader *pfile,
+ void *data,
+ void (*cb) (cpp_reader *, CPP_DO_task, void *, ...))
+{
+ bool module_p = CPP_OPTION (pfile, module_directives);
+
+ do
+ {
+ restart:
+ /* Buffer initialization, but no line cleaning. */
+ cpp_buffer *buffer = pfile->buffer;
+ buffer->cur_note = buffer->notes_used = 0;
+ buffer->cur = buffer->line_base = buffer->next_line;
+ buffer->need_line = false;
+ /* Files always end in a newline. We rely on this for
+ character peeking safety. */
+ gcc_assert (buffer->rlimit[-1] == '\n');
+
+ const unsigned char *base = buffer->cur;
+ unsigned line_count = 0;
+ const unsigned char *line_start = base;
+
+ bool bol = true;
+ bool raw = false;
+
+ const unsigned char *lwm = base;
+ for (const unsigned char *pos = base, *limit = buffer->rlimit;
+ pos < limit;)
+ {
+ unsigned char c = *pos++;
+ /* This matches the switch in _cpp_lex_direct. */
+ switch (c)
+ {
+ case ' ': case '\t': case '\f': case '\v':
+ /* Whitespace, do nothing. */
+ break;
+
+ case '\r': /* MAC line ending, or Windows \r\n */
+ if (*pos == '\n')
+ pos++;
+ /* FALLTHROUGH */
+
+ case '\n':
+ bol = true;
+
+ next_line:
+ CPP_INCREMENT_LINE (pfile, 0);
+ line_count++;
+ line_start = pos;
+ break;
+
+ case '\\':
+ /* <backslash><newline> is removed, and doesn't undo any
+ preceeding escape or whatnot. */
+ if (*pos == '\n')
+ {
+ pos++;
+ goto next_line;
+ }
+ else if (*pos == '\r')
+ {
+ if (pos[1] == '\n')
+ pos++;
+ pos++;
+ goto next_line;
+ }
+ goto dflt;
+
+ case '#':
+ if (bol)
+ {
+ /* Line directive. */
+ if (pos - 1 > base && !pfile->state.skipping)
+ cb (pfile, CPP_DO_print, data,
+ line_count, base, pos - 1 - base);
+
+ /* Prep things for directive handling. */
+ buffer->next_line = pos;
+ buffer->need_line = true;
+ bool ok = _cpp_get_fresh_line (pfile);
+ gcc_checking_assert (ok);
+
+ /* Ensure proper column numbering for generated
+ error messages. */
+ buffer->line_base -= pos - line_start;
+
+ _cpp_handle_directive (pfile, line_start + 1 != pos);
+
+ /* Sanitize the line settings. Duplicate #include's can
+ mess things up. */
+ // FIXME: Necessary?
+ pfile->line_table->highest_location
+ = pfile->line_table->highest_line;
+
+ if (!pfile->state.skipping
+ && pfile->buffer->next_line < pfile->buffer->rlimit)
+ cb (pfile, CPP_DO_location, data,
+ pfile->line_table->highest_line);
+
+ goto restart;
+ }
+ goto dflt;
+
+ case '/':
+ {
+ const unsigned char *peek = do_peek_next (pos, limit);
+ if (!(*peek == '/' || *peek == '*'))
+ goto dflt;
+
+ /* Line or block comment */
+ bool is_block = *peek == '*';
+ bool star = false;
+ bool esc = false;
+ location_t sloc
+ = linemap_position_for_column (pfile->line_table,
+ pos - line_start);
+
+ while (pos < limit)
+ {
+ char c = *pos++;
+ switch (c)
+ {
+ case '\\':
+ esc = true;
+ break;
+
+ case '\r':
+ if (*pos == '\n')
+ pos++;
+ /* FALLTHROUGH */
+
+ case '\n':
+ {
+ CPP_INCREMENT_LINE (pfile, 0);
+ line_count++;
+ line_start = pos;
+ if (!esc && !is_block)
+ {
+ bol = true;
+ goto done_comment;
+ }
+ }
+ if (!esc)
+ star = false;
+ esc = false;
+ break;
+
+ case '*':
+ if (pos > peek && !esc)
+ star = is_block;
+ esc = false;
+ break;
+
+ case '/':
+ if (star)
+ goto done_comment;
+ /* FALLTHROUGH */
+
+ default:
+ star = false;
+ esc = false;
+ break;
+ }
+ }
+ cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
+ "unterminated comment");
+ done_comment:
+ lwm = pos;
+ break;
+ }
+
+ case '\'':
+ if (!CPP_OPTION (pfile, digit_separators))
+ goto delimited_string;
+
+ /* Possibly a number punctuator. */
+ if (!ISIDNUM (*do_peek_next (pos, limit)))
+ goto delimited_string;
+
+ goto quote_peek;
+
+ case '\"':
+ if (!CPP_OPTION (pfile, rliterals))
+ goto delimited_string;
+
+ quote_peek:
+ {
+ /* For ' see if it's a number punctuator
+ \.?<digit>(<digit>|<identifier-nondigit>
+ |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
+ /* For " see if it's a raw string
+ {U,L,u,u8}R. This includes CPP_NUMBER detection,
+ because that could be 0e+R. */
+ const unsigned char *peek = pos - 1;
+ bool quote_first = c == '"';
+ bool quote_eight = false;
+ bool maybe_number_start = false;
+ bool want_number = false;
+
+ while ((peek = do_peek_prev (peek, lwm)))
+ {
+ unsigned char p = *peek;
+ if (quote_first)
+ {
+ if (!raw)
+ {
+ if (p != 'R')
+ break;
+ raw = true;
+ continue;
+ }
+
+ quote_first = false;
+ if (p == 'L' || p == 'U' || p == 'u')
+ ;
+ else if (p == '8')
+ quote_eight = true;
+ else
+ goto second_raw;
+ }
+ else if (quote_eight)
+ {
+ if (p != 'u')
+ {
+ raw = false;
+ break;
+ }
+ quote_eight = false;
+ }
+ else if (c == '"')
+ {
+ second_raw:;
+ if (!want_number && ISIDNUM (p))
+ {
+ raw = false;
+ break;
+ }
+ }
+
+ if (ISDIGIT (p))
+ maybe_number_start = true;
+ else if (p == '.')
+ want_number = true;
+ else if (ISIDNUM (p))
+ maybe_number_start = false;
+ else if (p == '+' || p == '-')
+ {
+ if (const unsigned char *peek_prev
+ = do_peek_prev (peek, lwm))
+ {
+ p = *peek_prev;
+ if (p == 'e' || p == 'E'
+ || p == 'p' || p == 'P')
+ {
+ want_number = true;
+ maybe_number_start = false;
+ }
+ else
+ break;
+ }
+ else
+ break;
+ }
+ else if (p == '\'' || p == '\"')
+ {
+ /* If this is lwm, this must be the end of a
+ previous string. So this is a trailing
+ literal type, (a) if those are allowed,
+ and (b) maybe_start is false. Otherwise
+ this must be a CPP_NUMBER because we've
+ met another ', and we'd have checked that
+ in its own right. */
+ if (peek == lwm && CPP_OPTION (pfile, uliterals))
+ {
+ if (!maybe_number_start && !want_number)
+ /* Must be a literal type. */
+ raw = false;
+ }
+ else if (p == '\''
+ && CPP_OPTION (pfile, digit_separators))
+ maybe_number_start = true;
+ break;
+ }
+ else if (c == '\'')
+ break;
+ else if (!quote_first && !quote_eight)
+ break;
+ }
+
+ if (maybe_number_start)
+ {
+ if (c == '\'')
+ /* A CPP NUMBER. */
+ goto dflt;
+ raw = false;
+ }
+
+ goto delimited_string;
+ }
+
+ delimited_string:
+ {
+ /* (Possibly raw) string or char literal. */
+ unsigned char end = c;
+ int delim_len = -1;
+ const unsigned char *delim = NULL;
+ location_t sloc = linemap_position_for_column (pfile->line_table,
+ pos - line_start);
+ int esc = 0;
+
+ if (raw)
+ {
+ /* There can be no line breaks in the delimiter. */
+ delim = pos;
+ for (delim_len = 0; (c = *pos++) != '('; delim_len++)
+ {
+ if (delim_len == 16)
+ {
+ cpp_error_with_line (pfile, CPP_DL_ERROR,
+ sloc, 0,
+ "raw string delimiter"
+ " longer than %d"
+ " characters",
+ delim_len);
+ raw = false;
+ pos = delim;
+ break;
+ }
+ if (strchr (") \\\t\v\f\n", c))
+ {
+ cpp_error_with_line (pfile, CPP_DL_ERROR,
+ sloc, 0,
+ "invalid character '%c'"
+ " in raw string"
+ " delimiter", c);
+ raw = false;
+ pos = delim;
+ break;
+ }
+ if (pos >= limit)
+ goto bad_string;
+ }
+ }
+
+ while (pos < limit)
+ {
+ char c = *pos++;
+ switch (c)
+ {
+ case '\\':
+ if (!raw)
+ esc++;
+ break;
+
+ case '\r':
+ if (*pos == '\n')
+ pos++;
+ /* FALLTHROUGH */
+
+ case '\n':
+ {
+ CPP_INCREMENT_LINE (pfile, 0);
+ line_count++;
+ line_start = pos;
+ }
+ if (esc)
+ esc--;
+ break;
+
+ case ')':
+ if (raw
+ && pos + delim_len + 1 < limit
+ && pos[delim_len] == end
+ && !memcmp (delim, pos, delim_len))
+ {
+ pos += delim_len + 1;
+ raw = false;
+ goto done_string;
+ }
+ break;
+
+ default:
+ if (!raw && !(esc & 1) && c == end)
+ goto done_string;
+ esc = 0;
+ break;
+ }
+ }
+ bad_string:
+ cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
+ "unterminated literal");
+
+ done_string:
+ raw = false;
+ lwm = pos - 1;
+ }
+ goto dflt;
+
+ case '_':
+ case 'e':
+ case 'i':
+ case 'm':
+ if (bol && module_p && !pfile->state.skipping
+ && do_peek_module (pfile, c, pos, limit))
+ {
+ /* We've seen the start of a module control line.
+ Start up the tokenizer. */
+ pos--; /* Backup over the first character. */
+
+ /* Backup over whitespace to start of line. */
+ while (pos > line_start
+ && (pos[-1] == ' ' || pos[-1] == '\t'))
+ pos--;
+
+ if (pos > base)
+ cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
+
+ /* Prep things for directive handling. */
+ buffer->next_line = pos;
+ buffer->need_line = true;
+
+ /* Now get tokens until the PRAGMA_EOL. */
+ do
+ {
+ location_t spelling;
+ const cpp_token *tok
+ = cpp_get_token_with_location (pfile, &spelling);
+
+ gcc_assert (pfile->state.in_deferred_pragma
+ || tok->type == CPP_PRAGMA_EOL);
+ cb (pfile, CPP_DO_token, data, tok, spelling);
+ }
+ while (pfile->state.in_deferred_pragma);
+
+ if (pfile->buffer->next_line < pfile->buffer->rlimit)
+ cb (pfile, CPP_DO_location, data,
+ pfile->line_table->highest_line);
+
+ pfile->mi_valid = false;
+ goto restart;
+ }
+ goto dflt;
+
+ default:
+ dflt:
+ bol = false;
+ pfile->mi_valid = false;
+ break;
+ }
+ }
+
+ if (buffer->rlimit > base && !pfile->state.skipping)
+ cb (pfile, CPP_DO_print, data, line_count, base, buffer->rlimit - base);
+
+ _cpp_pop_buffer (pfile);
+ }
+ while (pfile->buffer);
}