/* CPP Library - lexical analysis.
- Copyright (C) 2000-2013 Free Software Foundation, Inc.
+ Copyright (C) 2000-2018 Free Software Foundation, Inc.
Contributed by Per Bothner, 1994-95.
Based on CCCP program by Paul Rubin, June 1986
Adapted to ANSI C, Richard Stallman, Jan 1987
}
}
-/* Disable on Solaris 2/x86 until the following problems can be properly
+/* Disable on Solaris 2/x86 until the following problem can be properly
autoconfed:
- The Solaris 9 assembler cannot assemble SSE4.2 insns.
- Before Solaris 9 Update 6, SSE insns cannot be executed.
The Solaris 10+ assembler tags objects with the instruction set
extensions used, so SSE4.2 executables cannot run on machines that
don't support that extension. */
-#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
+#if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
/* Replicated character data to be shared between implementations.
Recall that outside of a context with vector support we can't
/* Advance the pointer to an aligned address. We will re-scan a
few bytes, but we no longer need care for reading past the
end of a page, since we're guaranteed a match. */
- s = (const uchar *)((si + 16) & -16);
+ s = (const uchar *)((si + 15) & -16);
}
- /* Main loop, processing 16 bytes at a time. By doing the whole loop
- in inline assembly, we can make proper use of the flags set. */
- __asm ( "sub $16, %1\n"
- " .balign 16\n"
+ /* Main loop, processing 16 bytes at a time. */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+ while (1)
+ {
+ char f;
+
+ /* By using inline assembly instead of the builtin,
+ we can use the result, as well as the flags set. */
+ __asm ("%vpcmpestri\t$0, %2, %3"
+ : "=c"(index), "=@ccc"(f)
+ : "m"(*s), "x"(search), "a"(4), "d"(16));
+ if (f)
+ break;
+
+ s += 16;
+ }
+#else
+ s -= 16;
+ /* By doing the whole loop in inline assembly,
+ we can make proper use of the flags set. */
+ __asm ( ".balign 16\n"
"0: add $16, %1\n"
- " %vpcmpestri $0, (%1), %2\n"
+ " %vpcmpestri\t$0, (%1), %2\n"
" jnc 0b"
: "=&c"(index), "+r"(s)
: "x"(search), "a"(4), "d"(16));
+#endif
found:
return s + index;
search_line_fast = impl;
}
-#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
+#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
+
+/* A vection of the fast scanner using AltiVec vectorized byte compares
+ and VSX unaligned loads (when VSX is available). This is otherwise
+ the same as the pre-GCC 5 version. */
+
+ATTRIBUTE_NO_SANITIZE_UNDEFINED
+static const uchar *
+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
+{
+ typedef __attribute__((altivec(vector))) unsigned char vc;
+
+ const vc repl_nl = {
+ '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
+ '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
+ };
+ const vc repl_cr = {
+ '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
+ '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
+ };
+ const vc repl_bs = {
+ '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
+ '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
+ };
+ const vc repl_qm = {
+ '?', '?', '?', '?', '?', '?', '?', '?',
+ '?', '?', '?', '?', '?', '?', '?', '?',
+ };
+ const vc zero = { 0 };
+
+ vc data, t;
+
+ /* Main loop processing 16 bytes at a time. */
+ do
+ {
+ vc m_nl, m_cr, m_bs, m_qm;
+
+ data = __builtin_vec_vsx_ld (0, s);
+ s += 16;
+
+ m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
+ m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
+ m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
+ m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
+ t = (m_nl | m_cr) | (m_bs | m_qm);
+
+ /* T now contains 0xff in bytes for which we matched one of the relevant
+ characters. We want to exit the loop if any byte in T is non-zero.
+ Below is the expansion of vec_any_ne(t, zero). */
+ }
+ while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
+
+ /* Restore s to to point to the 16 bytes we just processed. */
+ s -= 16;
+
+ {
+#define N (sizeof(vc) / sizeof(long))
+
+ union {
+ vc v;
+ /* Statically assert that N is 2 or 4. */
+ unsigned long l[(N == 2 || N == 4) ? N : -1];
+ } u;
+ unsigned long l, i = 0;
+
+ u.v = t;
+
+ /* Find the first word of T that is non-zero. */
+ switch (N)
+ {
+ case 4:
+ l = u.l[i++];
+ if (l != 0)
+ break;
+ s += sizeof(unsigned long);
+ l = u.l[i++];
+ if (l != 0)
+ break;
+ s += sizeof(unsigned long);
+ /* FALLTHRU */
+ case 2:
+ l = u.l[i++];
+ if (l != 0)
+ break;
+ s += sizeof(unsigned long);
+ l = u.l[i];
+ }
+
+ /* L now contains 0xff in bytes for which we matched one of the
+ relevant characters. We can find the byte index by finding
+ its bit index and dividing by 8. */
+#ifdef __BIG_ENDIAN__
+ l = __builtin_clzl(l) >> 3;
+#else
+ l = __builtin_ctzl(l) >> 3;
+#endif
+ return s + l;
+
+#undef N
+ }
+}
+
+#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
-/* A vection of the fast scanner using AltiVec vectorized byte compares. */
+/* A vection of the fast scanner using AltiVec vectorized byte compares.
+ This cannot be used for little endian because vec_lvsl/lvsr are
+ deprecated for little endian and the code won't work properly. */
/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
so we can't compile this function without -maltivec on the command line
(or implied by some other switch). */
if (l != 0)
break;
s += sizeof(unsigned long);
+ /* FALLTHROUGH */
case 2:
l = u.l[i++];
if (l != 0)
}
}
-#elif defined (__ARM_NEON__)
+#elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
+#include "arm_neon.h"
+
+/* This doesn't have to be the exact page size, but no system may use
+ a size smaller than this. ARMv8 requires a minimum page size of
+ 4k. The impact of being conservative here is a small number of
+ cases will take the slightly slower entry path into the main
+ loop. */
+
+#define AARCH64_MIN_PAGE_SIZE 4096
+
+static const uchar *
+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
+{
+ const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
+ const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
+ const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
+ const uint8x16_t repl_qm = vdupq_n_u8 ('?');
+ const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
+
+#ifdef __ARM_BIG_ENDIAN
+ const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
+#else
+ const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
+#endif
+
+ unsigned int found;
+ const uint8_t *p;
+ uint8x16_t data;
+ uint8x16_t t;
+ uint16x8_t m;
+ uint8x16_t u, v, w;
+
+ /* Align the source pointer. */
+ p = (const uint8_t *)((uintptr_t)s & -16);
+
+ /* Assuming random string start positions, with a 4k page size we'll take
+ the slow path about 0.37% of the time. */
+ if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
+ - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
+ < 16, 0))
+ {
+ /* Slow path: the string starts near a possible page boundary. */
+ uint32_t misalign, mask;
+
+ misalign = (uintptr_t)s & 15;
+ mask = (-1u << misalign) & 0xffff;
+ data = vld1q_u8 (p);
+ t = vceqq_u8 (data, repl_nl);
+ u = vceqq_u8 (data, repl_cr);
+ v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+ w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+ t = vorrq_u8 (v, w);
+ t = vandq_u8 (t, xmask);
+ m = vpaddlq_u8 (t);
+ m = vshlq_u16 (m, shift);
+ found = vaddvq_u16 (m);
+ found &= mask;
+ if (found)
+ return (const uchar*)p + __builtin_ctz (found);
+ }
+ else
+ {
+ data = vld1q_u8 ((const uint8_t *) s);
+ t = vceqq_u8 (data, repl_nl);
+ u = vceqq_u8 (data, repl_cr);
+ v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+ w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+ t = vorrq_u8 (v, w);
+ if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
+ goto done;
+ }
+
+ do
+ {
+ p += 16;
+ data = vld1q_u8 (p);
+ t = vceqq_u8 (data, repl_nl);
+ u = vceqq_u8 (data, repl_cr);
+ v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+ w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+ t = vorrq_u8 (v, w);
+ } while (!vpaddd_u64 ((uint64x2_t)t));
+
+done:
+ /* Now that we've found the terminating substring, work out precisely where
+ we need to stop. */
+ t = vandq_u8 (t, xmask);
+ m = vpaddlq_u8 (t);
+ m = vshlq_u16 (m, shift);
+ found = vaddvq_u16 (m);
+ return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
+ + __builtin_ctz (found));
+}
+
+#elif defined (__ARM_NEON)
#include "arm_neon.h"
static const uchar *
#else
-/* We only have one accellerated alternative. Use a direct call so that
+/* We only have one accelerated alternative. Use a direct call so that
we encourage inlining. */
#define search_line_fast search_line_acc_char
&& *buffer->cur == '\\'
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
+ cppchar_t s;
buffer->cur += 2;
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
- state))
+ state, &s, NULL, NULL))
return true;
buffer->cur -= 2;
}
return false;
}
+/* Helper function to issue error about improper __VA_OPT__ use. */
+static void
+maybe_va_opt_error (cpp_reader *pfile)
+{
+ if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, va_opt))
+ {
+ /* __VA_OPT__ should not be accepted at all, but allow it in
+ system headers. */
+ if (!cpp_in_system_header (pfile))
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "__VA_OPT__ is not available until C++2a");
+ }
+ else if (!pfile->state.va_args_ok)
+ {
+ /* __VA_OPT__ should only appear in the replacement list of a
+ variadic macro. */
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "__VA_OPT__ can only appear in the expansion"
+ " of a C++2a variadic macro");
+ }
+}
+
/* Helper function to get the cpp_hashnode of the identifier BASE. */
static cpp_hashnode *
lex_identifier_intern (cpp_reader *pfile, const uchar *base)
replacement list of a variadic macro. */
if (result == pfile->spec_nodes.n__VA_ARGS__
&& !pfile->state.va_args_ok)
- cpp_error (pfile, CPP_DL_PEDWARN,
- "__VA_ARGS__ can only appear in the expansion"
- " of a C99 variadic macro");
+ {
+ if (CPP_OPTION (pfile, cplusplus))
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "__VA_ARGS__ can only appear in the expansion"
+ " of a C++11 variadic macro");
+ else
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "__VA_ARGS__ can only appear in the expansion"
+ " of a C99 variadic macro");
+ }
+
+ if (result == pfile->spec_nodes.n__VA_OPT__)
+ maybe_va_opt_error (pfile);
/* For -Wc++-compat, warn about use of C++ named operators. */
if (result->flags & NODE_WARN_OPERATOR)
/* Lex an identifier starting at BUFFER->CUR - 1. */
static cpp_hashnode *
lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
- struct normalize_state *nst)
+ struct normalize_state *nst, cpp_hashnode **spelling)
{
cpp_hashnode *result;
const uchar *cur;
cur = pfile->buffer->cur;
if (! starts_ucn)
- while (ISIDNUM (*cur))
- {
- hash = HT_HASHSTEP (hash, *cur);
- cur++;
- }
+ {
+ while (ISIDNUM (*cur))
+ {
+ hash = HT_HASHSTEP (hash, *cur);
+ cur++;
+ }
+ NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
+ }
pfile->buffer->cur = cur;
if (starts_ucn || forms_identifier_p (pfile, false, nst))
{
do {
while (ISIDNUM (*pfile->buffer->cur))
{
+ NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
pfile->buffer->cur++;
- NORMALIZE_STATE_UPDATE_IDNUM (nst);
}
} while (forms_identifier_p (pfile, false, nst));
result = _cpp_interpret_identifier (pfile, base,
pfile->buffer->cur - base);
+ *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
}
else
{
result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
base, len, hash, HT_ALLOC));
+ *spelling = result;
}
/* Rarely, identifiers require diagnostics when lexed. */
replacement list of a variadic macro. */
if (result == pfile->spec_nodes.n__VA_ARGS__
&& !pfile->state.va_args_ok)
- cpp_error (pfile, CPP_DL_PEDWARN,
- "__VA_ARGS__ can only appear in the expansion"
- " of a C99 variadic macro");
+ {
+ if (CPP_OPTION (pfile, cplusplus))
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "__VA_ARGS__ can only appear in the expansion"
+ " of a C++11 variadic macro");
+ else
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "__VA_ARGS__ can only appear in the expansion"
+ " of a C99 variadic macro");
+ }
+
+ /* __VA_OPT__ should only appear in the replacement list of a
+ variadic macro. */
+ if (result == pfile->spec_nodes.n__VA_OPT__)
+ maybe_va_opt_error (pfile);
/* For -Wc++-compat, warn about use of C++ named operators. */
if (result->flags & NODE_WARN_OPERATOR)
while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
|| VALID_SIGN (*cur, cur[-1]))
{
+ NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
cur++;
- NORMALIZE_STATE_UPDATE_IDNUM (nst);
}
+ /* A number can't end with a digit separator. */
+ while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
+ --cur;
pfile->buffer->cur = cur;
}
cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
base, cur - base, hash, HT_NO_INSERT));
- return !result ? false : (result->type == NT_MACRO);
+ return result && cpp_macro_p (result);
}
+/* Returns true if a literal suffix does not have the expected form
+ and is defined as a macro. */
+
+static bool
+is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base)
+{
+ /* User-defined literals outside of namespace std must start with a single
+ underscore, so assume anything of that form really is a UDL suffix.
+ We don't need to worry about UDLs defined inside namespace std because
+ their names are reserved, so cannot be used as macro names in valid
+ programs. */
+ if (base[0] == '_' && base[1] != '_')
+ return false;
+ return is_macro (pfile, base);
+}
/* Lexes a raw string. The stored string contains the spelling, including
double quotes, delimiter string, '(' and ')', any leading
(const uchar *)(STR), (LEN)); \
temp_buffer_len += (LEN); \
} \
- } while (0);
+ } while (0)
orig_base = base;
++cur;
{
/* If a string format macro, say from inttypes.h, is placed touching
a string literal it could be parsed as a C++11 user-defined string
- literal thus breaking the program.
- Try to identify macros with is_macro. A warning is issued. */
- if (is_macro (pfile, cur))
+ literal thus breaking the program. */
+ if (is_macro_not_literal_suffix (pfile, cur))
{
/* Raise a warning, but do not consume subsequent tokens. */
- if (CPP_OPTION (pfile, warn_literal_suffix))
+ if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
token->src_loc, 0,
"invalid suffix on literal; C++11 requires "
else if (terminator == '\'')
type = (*base == 'L' ? CPP_WCHAR :
*base == 'U' ? CPP_CHAR32 :
- *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
+ *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
+ : CPP_CHAR);
else
terminator = '>', type = CPP_HEADER_NAME;
{
/* If a string format macro, say from inttypes.h, is placed touching
a string literal it could be parsed as a C++11 user-defined string
- literal thus breaking the program.
- Try to identify macros with is_macro. A warning is issued. */
- if (is_macro (pfile, cur))
+ literal thus breaking the program. */
+ if (is_macro_not_literal_suffix (pfile, cur))
{
/* Raise a warning, but do not consume subsequent tokens. */
- if (CPP_OPTION (pfile, warn_literal_suffix))
+ if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
token->src_loc, 0,
"invalid suffix on literal; C++11 requires "
++cur;
}
}
+ else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
+ && is_macro (pfile, cur)
+ && !pfile->state.skipping)
+ cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
+ token->src_loc, 0, "C++11 requires a space "
+ "between string literal and macro");
pfile->buffer->cur = cur;
create_literal (pfile, token, base, cur - base, type);
store_comment (pfile, token);
}
+/* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
+ comment. */
+
+static bool
+fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
+{
+ const unsigned char *from = comment_start + 1;
+
+ switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
+ {
+ /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
+ don't recognize any comments. The latter only checks attributes,
+ the former doesn't warn. */
+ case 0:
+ default:
+ return false;
+ /* -Wimplicit-fallthrough=1 considers any comment, no matter what
+ content it has. */
+ case 1:
+ return true;
+ case 2:
+ /* -Wimplicit-fallthrough=2 looks for (case insensitive)
+ .*falls?[ \t-]*thr(u|ough).* regex. */
+ for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
+ from++)
+ {
+ /* Is there anything like strpbrk with upper boundary, or
+ memchr looking for 2 characters rather than just one? */
+ if (from[0] != 'f' && from[0] != 'F')
+ continue;
+ if (from[1] != 'a' && from[1] != 'A')
+ continue;
+ if (from[2] != 'l' && from[2] != 'L')
+ continue;
+ if (from[3] != 'l' && from[3] != 'L')
+ continue;
+ from += sizeof "fall" - 1;
+ if (from[0] == 's' || from[0] == 'S')
+ from++;
+ while (*from == ' ' || *from == '\t' || *from == '-')
+ from++;
+ if (from[0] != 't' && from[0] != 'T')
+ continue;
+ if (from[1] != 'h' && from[1] != 'H')
+ continue;
+ if (from[2] != 'r' && from[2] != 'R')
+ continue;
+ if (from[3] == 'u' || from[3] == 'U')
+ return true;
+ if (from[3] != 'o' && from[3] != 'O')
+ continue;
+ if (from[4] != 'u' && from[4] != 'U')
+ continue;
+ if (from[5] != 'g' && from[5] != 'G')
+ continue;
+ if (from[6] != 'h' && from[6] != 'H')
+ continue;
+ return true;
+ }
+ return false;
+ case 3:
+ case 4:
+ break;
+ }
+
+ /* Whole comment contents:
+ -fallthrough
+ @fallthrough@
+ */
+ if (*from == '-' || *from == '@')
+ {
+ size_t len = sizeof "fallthrough" - 1;
+ if ((size_t) (pfile->buffer->cur - from - 1) < len)
+ return false;
+ if (memcmp (from + 1, "fallthrough", len))
+ return false;
+ if (*from == '@')
+ {
+ if (from[len + 1] != '@')
+ return false;
+ len++;
+ }
+ from += 1 + len;
+ }
+ /* Whole comment contents (regex):
+ lint -fallthrough[ \t]*
+ */
+ else if (*from == 'l')
+ {
+ size_t len = sizeof "int -fallthrough" - 1;
+ if ((size_t) (pfile->buffer->cur - from - 1) < len)
+ return false;
+ if (memcmp (from + 1, "int -fallthrough", len))
+ return false;
+ from += 1 + len;
+ while (*from == ' ' || *from == '\t')
+ from++;
+ }
+ /* Whole comment contents (regex):
+ [ \t]*FALLTHR(U|OUGH)[ \t]*
+ */
+ else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
+ {
+ while (*from == ' ' || *from == '\t')
+ from++;
+ if ((size_t) (pfile->buffer->cur - from) < sizeof "FALLTHRU" - 1)
+ return false;
+ if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
+ return false;
+ from += sizeof "FALLTHR" - 1;
+ if (*from == 'U')
+ from++;
+ else if ((size_t) (pfile->buffer->cur - from) < sizeof "OUGH" - 1)
+ return false;
+ else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
+ return false;
+ else
+ from += sizeof "OUGH" - 1;
+ while (*from == ' ' || *from == '\t')
+ from++;
+ }
+ /* Whole comment contents (regex):
+ [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
+ [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
+ [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
+ */
+ else
+ {
+ while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
+ from++;
+ unsigned char f = *from;
+ bool all_upper = false;
+ if (f == 'E' || f == 'e')
+ {
+ if ((size_t) (pfile->buffer->cur - from)
+ < sizeof "else fallthru" - 1)
+ return false;
+ if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
+ all_upper = true;
+ else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
+ return false;
+ from += sizeof "else" - 1;
+ if (*from == ',')
+ from++;
+ if (*from != ' ')
+ return false;
+ from++;
+ if (all_upper && *from == 'f')
+ return false;
+ if (f == 'e' && *from == 'F')
+ return false;
+ f = *from;
+ }
+ else if (f == 'I' || f == 'i')
+ {
+ if ((size_t) (pfile->buffer->cur - from)
+ < sizeof "intentional fallthru" - 1)
+ return false;
+ if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
+ sizeof "NTENTIONAL" - 1) == 0)
+ all_upper = true;
+ else if (memcmp (from + 1, "ntentional",
+ sizeof "ntentional" - 1))
+ return false;
+ from += sizeof "intentional" - 1;
+ if (*from == ' ')
+ {
+ from++;
+ if (all_upper && *from == 'f')
+ return false;
+ }
+ else if (all_upper)
+ {
+ if (memcmp (from, "LY F", sizeof "LY F" - 1))
+ return false;
+ from += sizeof "LY " - 1;
+ }
+ else
+ {
+ if (memcmp (from, "ly ", sizeof "ly " - 1))
+ return false;
+ from += sizeof "ly " - 1;
+ }
+ if (f == 'i' && *from == 'F')
+ return false;
+ f = *from;
+ }
+ if (f != 'F' && f != 'f')
+ return false;
+ if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
+ return false;
+ if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
+ all_upper = true;
+ else if (all_upper)
+ return false;
+ else if (memcmp (from + 1, "all", sizeof "all" - 1))
+ return false;
+ from += sizeof "fall" - 1;
+ if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
+ from += 2;
+ else if (*from == ' ' || *from == '-')
+ from++;
+ else if (*from != (all_upper ? 'T' : 't'))
+ return false;
+ if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
+ return false;
+ if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
+ return false;
+ if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
+ {
+ if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
+ return false;
+ if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
+ sizeof "hrough" - 1))
+ return false;
+ from += sizeof "through" - 1;
+ }
+ else
+ from += sizeof "thru" - 1;
+ while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
+ from++;
+ if (*from == '-')
+ {
+ from++;
+ if (*comment_start == '*')
+ {
+ do
+ {
+ while (*from && *from != '*'
+ && *from != '\n' && *from != '\r')
+ from++;
+ if (*from != '*' || from[1] == '/')
+ break;
+ from++;
+ }
+ while (1);
+ }
+ else
+ while (*from && *from != '\n' && *from != '\r')
+ from++;
+ }
+ }
+ /* C block comment. */
+ if (*comment_start == '*')
+ {
+ if (*from != '*' || from[1] != '/')
+ return false;
+ }
+ /* C++ line comment. */
+ else if (*from != '\n')
+ return false;
+
+ return true;
+}
+
/* Allocate COUNT tokens for RUN. */
void
_cpp_init_tokenrun (tokenrun *run, unsigned int count)
count = index;
pfile->keep_tokens++;
+ /* For peeked tokens temporarily disable line_change reporting,
+ until the tokens are parsed for real. */
+ void (*line_change) (cpp_reader *, const cpp_token *, int)
+ = pfile->cb.line_change;
+ pfile->cb.line_change = NULL;
+
do
{
peektok = _cpp_lex_token (pfile);
if (peektok->type == CPP_EOF)
- return peektok;
+ {
+ index--;
+ break;
+ }
}
while (index--);
- _cpp_backup_tokens_direct (pfile, count + 1);
+ _cpp_backup_tokens_direct (pfile, count - index);
pfile->keep_tokens--;
+ pfile->cb.line_change = line_change;
return peektok;
}
cppchar_t c;
cpp_buffer *buffer;
const unsigned char *comment_start;
+ bool fallthrough_comment = false;
cpp_token *result = pfile->cur_token++;
fresh_line:
}
return result;
}
+ if (buffer != pfile->buffer)
+ fallthrough_comment = false;
if (!pfile->keep_tokens)
{
pfile->cur_run = &pfile->base_run;
&& CPP_OPTION (pfile, rliterals))
|| (*buffer->cur == '8'
&& c == 'u'
- && (buffer->cur[1] == '"'
+ && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
+ && CPP_OPTION (pfile, utf8_char_literals)))
|| (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
&& CPP_OPTION (pfile, rliterals)))))
{
{
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
- &nst);
+ &nst,
+ &result->val.node.spelling);
warn_about_normalization (pfile, result, &nst);
}
result->flags |= NAMED_OP;
result->type = (enum cpp_ttype) result->val.node.node->directive_index;
}
+
+ /* Signal FALLTHROUGH comment followed by another token. */
+ if (fallthrough_comment)
+ result->flags |= PREV_FALLTHROUGH;
break;
case '\'':
if (_cpp_skip_block_comment (pfile))
cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
}
- else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
- || cpp_in_system_header (pfile)))
+ else if (c == '/' && ! CPP_OPTION (pfile, traditional))
{
- /* Warn about comments only if pedantically GNUC89, and not
+ /* Don't warn for system headers. */
+ if (cpp_in_system_header (pfile))
+ ;
+ /* Warn about comments if pedantically GNUC89, and not
in system headers. */
- if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
- && ! buffer->warned_cplusplus_comments)
+ else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
+ && CPP_PEDANTIC (pfile)
+ && ! buffer->warned_cplusplus_comments)
{
- cpp_error (pfile, CPP_DL_PEDWARN,
- "C++ style comments are not allowed in ISO C90");
- cpp_error (pfile, CPP_DL_PEDWARN,
- "(this will be reported only once per input file)");
+ if (cpp_error (pfile, CPP_DL_PEDWARN,
+ "C++ style comments are not allowed in ISO C90"))
+ cpp_error (pfile, CPP_DL_NOTE,
+ "(this will be reported only once per input file)");
buffer->warned_cplusplus_comments = 1;
}
-
+ /* Or if specifically desired via -Wc90-c99-compat. */
+ else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
+ && ! CPP_OPTION (pfile, cplusplus)
+ && ! buffer->warned_cplusplus_comments)
+ {
+ if (cpp_error (pfile, CPP_DL_WARNING,
+ "C++ style comments are incompatible with C90"))
+ cpp_error (pfile, CPP_DL_NOTE,
+ "(this will be reported only once per input file)");
+ buffer->warned_cplusplus_comments = 1;
+ }
+ /* In C89/C94, C++ style comments are forbidden. */
+ else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
+ || CPP_OPTION (pfile, lang) == CLK_STDC94))
+ {
+ /* But don't be confused about valid code such as
+ - // immediately followed by *,
+ - // in a preprocessing directive,
+ - // in an #if 0 block. */
+ if (buffer->cur[1] == '*'
+ || pfile->state.in_directive
+ || pfile->state.skipping)
+ {
+ result->type = CPP_DIV;
+ break;
+ }
+ else if (! buffer->warned_cplusplus_comments)
+ {
+ if (cpp_error (pfile, CPP_DL_ERROR,
+ "C++ style comments are not allowed in "
+ "ISO C90"))
+ cpp_error (pfile, CPP_DL_NOTE,
+ "(this will be reported only once per input "
+ "file)");
+ buffer->warned_cplusplus_comments = 1;
+ }
+ }
if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
}
break;
}
+ if (fallthrough_comment_p (pfile, comment_start))
+ fallthrough_comment = true;
+
+ if (pfile->cb.comment)
+ {
+ size_t len = pfile->buffer->cur - comment_start;
+ pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
+ len + 1);
+ }
+
if (!pfile->state.save_comments)
{
result->flags |= PREV_WHITE;
goto update_tokens_line;
}
+ if (fallthrough_comment)
+ result->flags |= PREV_FALLTHROUGH;
+
/* Save the comment as a token in its own right. */
save_comment (pfile, result, comment_start, c);
break;
if (forms_identifier_p (pfile, true, &nst))
{
result->type = CPP_NAME;
- result->val.node.node = lex_identifier (pfile, base, true, &nst);
+ result->val.node.node = lex_identifier (pfile, base, true, &nst,
+ &result->val.node.spelling);
warn_about_normalization (pfile, result, &nst);
break;
}
buffer->cur++;
}
+ /* FALLTHRU */
default:
create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
break;
}
+ /* Potentially convert the location of the token to a range. */
+ if (result->src_loc >= RESERVED_LOCATION_COUNT
+ && result->type != CPP_EOF)
+ {
+ /* Ensure that any line notes are processed, so that we have the
+ correct physical line/column for the end-point of the token even
+ when a logical line is split via one or more backslashes. */
+ if (buffer->cur >= buffer->notes[buffer->cur_note].pos
+ && !pfile->overlaid_buffer)
+ _cpp_process_line_notes (pfile, false);
+
+ source_range tok_range;
+ tok_range.m_start = result->src_loc;
+ tok_range.m_finish
+ = linemap_position_for_column (pfile->line_table,
+ CPP_BUF_COLUMN (buffer, buffer->cur));
+
+ result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
+ result->src_loc,
+ tok_range, NULL);
+ }
+
return result;
}
return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
}
+/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
+ The buffer must already contain the enough space to hold the
+ token's spelling. Returns a pointer to the character after the
+ last character written. */
+unsigned char *
+_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
+{
+ size_t i;
+ const unsigned char *name = NODE_NAME (ident);
+
+ for (i = 0; i < NODE_LEN (ident); i++)
+ if (name[i] & ~0x7F)
+ {
+ i += utf8_to_ucn (buffer, name + i) - 1;
+ buffer += 10;
+ }
+ else
+ *buffer++ = name[i];
+
+ return buffer;
+}
+
/* Write the spelling of a token TOKEN to BUFFER. The buffer must
already contain the enough space to hold the token's spelling.
Returns a pointer to the character after the last character written.
FORSTRING is true if this is to be the spelling after translation
- phase 1 (this is different for UCNs).
+ phase 1 (with the original spelling of extended identifiers), false
+ if extended identifiers should always be written using UCNs (there is
+ no option for always writing them in the internal UTF-8 form).
FIXME: Would be nice if we didn't need the PFILE argument. */
unsigned char *
cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
case SPELL_IDENT:
if (forstring)
{
- memcpy (buffer, NODE_NAME (token->val.node.node),
- NODE_LEN (token->val.node.node));
- buffer += NODE_LEN (token->val.node.node);
+ memcpy (buffer, NODE_NAME (token->val.node.spelling),
+ NODE_LEN (token->val.node.spelling));
+ buffer += NODE_LEN (token->val.node.spelling);
}
else
- {
- size_t i;
- const unsigned char * name = NODE_NAME (token->val.node.node);
-
- for (i = 0; i < NODE_LEN (token->val.node.node); i++)
- if (name[i] & ~0x7F)
- {
- i += utf8_to_ucn (buffer, name + i) - 1;
- buffer += 10;
- }
- else
- *buffer++ = NODE_NAME (token->val.node.node)[i];
- }
+ buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
break;
case SPELL_LITERAL:
return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
case SPELL_NONE:
return (a->type != CPP_MACRO_ARG
- || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
+ || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
+ && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
case SPELL_IDENT:
- return a->val.node.node == b->val.node.node;
+ return (a->val.node.node == b->val.node.node
+ && a->val.node.spelling == b->val.node.spelling);
case SPELL_LITERAL:
return (a->val.str.len == b->val.str.len
&& !memcmp (a->val.str.text, b->val.str.text,
len = MIN_BUFF_SIZE;
len = CPP_ALIGN (len);
-#ifdef ENABLE_VALGRIND_CHECKING
+#ifdef ENABLE_VALGRIND_ANNOTATIONS
/* Valgrind warns about uses of interior pointers, so put _cpp_buff
struct first. */
size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
for (; buff; buff = next)
{
next = buff->next;
-#ifdef ENABLE_VALGRIND_CHECKING
+#ifdef ENABLE_VALGRIND_ANNOTATIONS
free (buff);
#else
free (buff->base);
return result;
}
+/* Commit or allocate storage from a buffer. */
+
+void *
+_cpp_commit_buff (cpp_reader *pfile, size_t size)
+{
+ void *ptr = BUFF_FRONT (pfile->a_buff);
+
+ if (pfile->hash_table->alloc_subobject)
+ {
+ void *copy = pfile->hash_table->alloc_subobject (size);
+ memcpy (copy, ptr, size);
+ ptr = copy;
+ }
+ else
+ BUFF_FRONT (pfile->a_buff) += size;
+
+ return ptr;
+}
+
/* Say which field of TOK is in use. */
enum cpp_token_fld_kind
return CPP_TOKEN_FLD_SOURCE;
else if (tok->type == CPP_PRAGMA)
return CPP_TOKEN_FLD_PRAGMA;
- /* else fall through */
+ /* fall through */
default:
return CPP_TOKEN_FLD_NONE;
}