testsuite: C++ module tests

[gcc.git] / libcpp / lex.c
diff --git a/libcpp/lex.c b/libcpp/lex.c

index 5cc2224329e0fc4c4eea132076f7c4c015bdf916..07d5a4ff4668853a230c00f53dd48b0691276b09 100644 (file)
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1,5 +1,5 @@
  /* CPP Library - lexical analysis.
-   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Copyright (C) 2000-2020 Free Software Foundation, Inc.
     Contributed by Per Bothner, 1994-95.
     Based on CCCP program by Paul Rubin, June 1986
     Adapted to ANSI C, Richard Stallman, Jan 1987
@@ -270,7 +270,7 @@ search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
     extensions used, so SSE4.2 executables cannot run on machines that
     don't support that extension.  */
  
-#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
+#if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
  
  /* Replicated character data to be shared between implementations.
     Recall that outside of a context with vector support we can't
@@ -447,18 +447,36 @@ search_line_sse42 (const uchar *s, const uchar *end)
        /* Advance the pointer to an aligned address.  We will re-scan a
          few bytes, but we no longer need care for reading past the
          end of a page, since we're guaranteed a match.  */
-      s = (const uchar *)((si + 16) & -16);
+      s = (const uchar *)((si + 15) & -16);
      }
  
-  /* Main loop, processing 16 bytes at a time.  By doing the whole loop
-     in inline assembly, we can make proper use of the flags set.  */
-  __asm (      "sub $16, %1\n"
-       "       .balign 16\n"
+  /* Main loop, processing 16 bytes at a time.  */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+  while (1)
+    {
+      char f;
+
+      /* By using inline assembly instead of the builtin,
+        we can use the result, as well as the flags set.  */
+      __asm ("%vpcmpestri\t$0, %2, %3"
+            : "=c"(index), "=@ccc"(f)
+            : "m"(*s), "x"(search), "a"(4), "d"(16));
+      if (f)
+       break;
+      
+      s += 16;
+    }
+#else
+  s -= 16;
+  /* By doing the whole loop in inline assembly,
+     we can make proper use of the flags set.  */
+  __asm (      ".balign 16\n"
         "0:     add $16, %1\n"
-       "       %vpcmpestri $0, (%1), %2\n"
+       "       %vpcmpestri\t$0, (%1), %2\n"
         "       jnc 0b"
         : "=&c"(index), "+r"(s)
         : "x"(search), "a"(4), "d"(16));
+#endif
  
   found:
    return s + index;
@@ -513,9 +531,113 @@ init_vectorized_lexer (void)
    search_line_fast = impl;
  }
  
-#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
+#elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
+
+/* A vection of the fast scanner using AltiVec vectorized byte compares
+   and VSX unaligned loads (when VSX is available).  This is otherwise
+   the same as the AltiVec version.  */
+
+ATTRIBUTE_NO_SANITIZE_UNDEFINED
+static const uchar *
+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
+{
+  typedef __attribute__((altivec(vector))) unsigned char vc;
+
+  const vc repl_nl = {
+    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', 
+    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
+  };
+  const vc repl_cr = {
+    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r', 
+    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
+  };
+  const vc repl_bs = {
+    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', 
+    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
+  };
+  const vc repl_qm = {
+    '?', '?', '?', '?', '?', '?', '?', '?', 
+    '?', '?', '?', '?', '?', '?', '?', '?', 
+  };
+  const vc zero = { 0 };
+
+  vc data, t;
+
+  /* Main loop processing 16 bytes at a time.  */
+  do
+    {
+      vc m_nl, m_cr, m_bs, m_qm;
+
+      data = __builtin_vec_vsx_ld (0, s);
+      s += 16;
  
-/* A vection of the fast scanner using AltiVec vectorized byte compares.  */
+      m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
+      m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
+      m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
+      m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
+      t = (m_nl | m_cr) | (m_bs | m_qm);
+
+      /* T now contains 0xff in bytes for which we matched one of the relevant
+        characters.  We want to exit the loop if any byte in T is non-zero.
+        Below is the expansion of vec_any_ne(t, zero).  */
+    }
+  while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
+
+  /* Restore s to to point to the 16 bytes we just processed.  */
+  s -= 16;
+
+  {
+#define N  (sizeof(vc) / sizeof(long))
+
+    union {
+      vc v;
+      /* Statically assert that N is 2 or 4.  */
+      unsigned long l[(N == 2 || N == 4) ? N : -1];
+    } u;
+    unsigned long l, i = 0;
+
+    u.v = t;
+
+    /* Find the first word of T that is non-zero.  */
+    switch (N)
+      {
+      case 4:
+       l = u.l[i++];
+       if (l != 0)
+         break;
+       s += sizeof(unsigned long);
+       l = u.l[i++];
+       if (l != 0)
+         break;
+       s += sizeof(unsigned long);
+       /* FALLTHRU */
+      case 2:
+       l = u.l[i++];
+       if (l != 0)
+         break;
+       s += sizeof(unsigned long);
+       l = u.l[i];
+      }
+
+    /* L now contains 0xff in bytes for which we matched one of the
+       relevant characters.  We can find the byte index by finding
+       its bit index and dividing by 8.  */
+#ifdef __BIG_ENDIAN__
+    l = __builtin_clzl(l) >> 3;
+#else
+    l = __builtin_ctzl(l) >> 3;
+#endif
+    return s + l;
+
+#undef N
+  }
+}
+
+#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
+
+/* A vection of the fast scanner using AltiVec vectorized byte compares.
+   This cannot be used for little endian because vec_lvsl/lvsr are
+   deprecated for little endian and the code won't work properly.  */
  /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
     so we can't compile this function without -maltivec on the command line
     (or implied by some other switch).  */
@@ -557,13 +679,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
       beginning with all ones and shifting in zeros according to the
       mis-alignment.  The LVSR instruction pulls the exact shift we
       want from the address.  */
-#ifdef __BIG_ENDIAN__
    mask = __builtin_vec_lvsr(0, s);
    mask = __builtin_vec_perm(zero, ones, mask);
-#else
-  mask = __builtin_vec_lvsl(0, s);
-  mask = __builtin_vec_perm(ones, zero, mask);
-#endif
    data &= mask;
  
    /* While altivec loads mask addresses, we still need to align S so
@@ -616,6 +733,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
         if (l != 0)
           break;
         s += sizeof(unsigned long);
+       /* FALLTHROUGH */
        case 2:
         l = u.l[i++];
         if (l != 0)
@@ -627,18 +745,109 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
      /* L now contains 0xff in bytes for which we matched one of the
         relevant characters.  We can find the byte index by finding
         its bit index and dividing by 8.  */
-#ifdef __BIG_ENDIAN__
      l = __builtin_clzl(l) >> 3;
-#else
-    l = __builtin_ctzl(l) >> 3;
-#endif
      return s + l;
  
  #undef N
    }
  }
  
-#elif defined (__ARM_NEON__)
+#elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
+#include "arm_neon.h"
+
+/* This doesn't have to be the exact page size, but no system may use
+   a size smaller than this.  ARMv8 requires a minimum page size of
+   4k.  The impact of being conservative here is a small number of
+   cases will take the slightly slower entry path into the main
+   loop.  */
+
+#define AARCH64_MIN_PAGE_SIZE 4096
+
+static const uchar *
+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
+{
+  const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
+  const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
+  const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
+  const uint8x16_t repl_qm = vdupq_n_u8 ('?');
+  const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
+
+#ifdef __ARM_BIG_ENDIAN
+  const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
+#else
+  const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
+#endif
+
+  unsigned int found;
+  const uint8_t *p;
+  uint8x16_t data;
+  uint8x16_t t;
+  uint16x8_t m;
+  uint8x16_t u, v, w;
+
+  /* Align the source pointer.  */
+  p = (const uint8_t *)((uintptr_t)s & -16);
+
+  /* Assuming random string start positions, with a 4k page size we'll take
+     the slow path about 0.37% of the time.  */
+  if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
+                        - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
+                       < 16, 0))
+    {
+      /* Slow path: the string starts near a possible page boundary.  */
+      uint32_t misalign, mask;
+
+      misalign = (uintptr_t)s & 15;
+      mask = (-1u << misalign) & 0xffff;
+      data = vld1q_u8 (p);
+      t = vceqq_u8 (data, repl_nl);
+      u = vceqq_u8 (data, repl_cr);
+      v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+      w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+      t = vorrq_u8 (v, w);
+      t = vandq_u8 (t, xmask);
+      m = vpaddlq_u8 (t);
+      m = vshlq_u16 (m, shift);
+      found = vaddvq_u16 (m);
+      found &= mask;
+      if (found)
+       return (const uchar*)p + __builtin_ctz (found);
+    }
+  else
+    {
+      data = vld1q_u8 ((const uint8_t *) s);
+      t = vceqq_u8 (data, repl_nl);
+      u = vceqq_u8 (data, repl_cr);
+      v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+      w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+      t = vorrq_u8 (v, w);
+      if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
+       goto done;
+    }
+
+  do
+    {
+      p += 16;
+      data = vld1q_u8 (p);
+      t = vceqq_u8 (data, repl_nl);
+      u = vceqq_u8 (data, repl_cr);
+      v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+      w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+      t = vorrq_u8 (v, w);
+    } while (!vpaddd_u64 ((uint64x2_t)t));
+
+done:
+  /* Now that we've found the terminating substring, work out precisely where
+     we need to stop.  */
+  t = vandq_u8 (t, xmask);
+  m = vpaddlq_u8 (t);
+  m = vshlq_u16 (m, shift);
+  found = vaddvq_u16 (m);
+  return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
+         + __builtin_ctz (found));
+}
+
+#elif defined (__ARM_NEON)
  #include "arm_neon.h"
  
  static const uchar *
@@ -703,7 +912,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
  
  #else
  
-/* We only have one accellerated alternative.  Use a direct call so that
+/* We only have one accelerated alternative.  Use a direct call so that
     we encourage inlining.  */
  
  #define search_line_fast  search_line_acc_char
@@ -853,7 +1062,7 @@ _cpp_clean_line (cpp_reader *pfile)
        d = (uchar *) s;
  
        /* Handle DOS line endings.  */
-      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
+      if (*s == '\r' && s + 1 != buffer->rlimit && s[1] == '\n')
         s++;
      }
  
@@ -1023,7 +1232,7 @@ static int
  skip_line_comment (cpp_reader *pfile)
  {
    cpp_buffer *buffer = pfile->buffer;
-  source_location orig_line = pfile->line_table->highest_line;
+  location_t orig_line = pfile->line_table->highest_line;
  
    while (*buffer->cur != '\n')
      buffer->cur++;
@@ -1104,7 +1313,9 @@ warn_about_normalization (cpp_reader *pfile,
      }
  }
  
-/* Returns TRUE if the sequence starting at buffer->cur is invalid in
+static const cppchar_t utf8_signifier = 0xC0;
+
+/* Returns TRUE if the sequence starting at buffer->cur is valid in
     an identifier.  FIRST is TRUE if this starts an identifier.  */
  static bool
  forms_identifier_p (cpp_reader *pfile, int first,
@@ -1127,21 +1338,52 @@ forms_identifier_p (cpp_reader *pfile, int first,
        return true;
      }
  
-  /* Is this a syntactically valid UCN?  */
-  if (CPP_OPTION (pfile, extended_identifiers)
-      && *buffer->cur == '\\'
-      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
+  /* Is this a syntactically valid UCN or a valid UTF-8 char?  */
+  if (CPP_OPTION (pfile, extended_identifiers))
      {
-      buffer->cur += 2;
-      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
-                         state))
-       return true;
-      buffer->cur -= 2;
+      cppchar_t s;
+      if (*buffer->cur >= utf8_signifier)
+       {
+         if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
+                              state, &s))
+           return true;
+       }
+      else if (*buffer->cur == '\\'
+              && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
+       {
+         buffer->cur += 2;
+         if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
+                             state, &s, NULL, NULL))
+           return true;
+         buffer->cur -= 2;
+       }
      }
  
    return false;
  }
  
+/* Helper function to issue error about improper __VA_OPT__ use.  */
+static void
+maybe_va_opt_error (cpp_reader *pfile)
+{
+  if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, va_opt))
+    {
+      /* __VA_OPT__ should not be accepted at all, but allow it in
+        system headers.  */
+      if (!_cpp_in_system_header (pfile))
+       cpp_error (pfile, CPP_DL_PEDWARN,
+                  "__VA_OPT__ is not available until C++20");
+    }
+  else if (!pfile->state.va_args_ok)
+    {
+      /* __VA_OPT__ should only appear in the replacement list of a
+        variadic macro.  */
+      cpp_error (pfile, CPP_DL_PEDWARN,
+                "__VA_OPT__ can only appear in the expansion"
+                " of a C++20 variadic macro");
+    }
+}
+
  /* Helper function to get the cpp_hashnode of the identifier BASE.  */
  static cpp_hashnode *
  lex_identifier_intern (cpp_reader *pfile, const uchar *base)
@@ -1186,6 +1428,9 @@ lex_identifier_intern (cpp_reader *pfile, const uchar *base)
                        " of a C99 variadic macro");
         }
  
+      if (result == pfile->spec_nodes.n__VA_OPT__)
+       maybe_va_opt_error (pfile);
+
        /* For -Wc++-compat, warn about use of C++ named operators.  */
        if (result->flags & NODE_WARN_OPERATOR)
         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
@@ -1209,7 +1454,7 @@ _cpp_lex_identifier (cpp_reader *pfile, const char *name)
  /* Lex an identifier starting at BUFFER->CUR - 1.  */
  static cpp_hashnode *
  lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
-               struct normalize_state *nst)
+               struct normalize_state *nst, cpp_hashnode **spelling)
  {
    cpp_hashnode *result;
    const uchar *cur;
@@ -1229,7 +1474,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
    pfile->buffer->cur = cur;
    if (starts_ucn || forms_identifier_p (pfile, false, nst))
      {
-      /* Slower version for identifiers containing UCNs (or $).  */
+      /* Slower version for identifiers containing UCNs
+        or extended chars (including $).  */
        do {
         while (ISIDNUM (*pfile->buffer->cur))
           {
@@ -1239,6 +1485,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
        } while (forms_identifier_p (pfile, false, nst));
        result = _cpp_interpret_identifier (pfile, base,
                                           pfile->buffer->cur - base);
+      *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
      }
    else
      {
@@ -1247,6 +1494,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
  
        result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
                                                   base, len, hash, HT_ALLOC));
+      *spelling = result;
      }
  
    /* Rarely, identifiers require diagnostics when lexed.  */
@@ -1273,6 +1521,11 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
                        " of a C99 variadic macro");
         }
  
+      /* __VA_OPT__ should only appear in the replacement list of a
+        variadic macro.  */
+      if (result == pfile->spec_nodes.n__VA_OPT__)
+       maybe_va_opt_error (pfile);
+
        /* For -Wc++-compat, warn about use of C++ named operators.  */
        if (result->flags & NODE_WARN_OPERATOR)
         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
@@ -1304,6 +1557,9 @@ lex_number (cpp_reader *pfile, cpp_string *number,
           NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
           cur++;
         }
+      /* A number can't end with a digit separator.  */
+      while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
+       --cur;
  
        pfile->buffer->cur = cur;
      }
@@ -1321,44 +1577,90 @@ static void
  create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
                 unsigned int len, enum cpp_ttype type)
  {
-  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
-
-  memcpy (dest, base, len);
-  dest[len] = '\0';
    token->type = type;
    token->val.str.len = len;
-  token->val.str.text = dest;
+  token->val.str.text = cpp_alloc_token_string (pfile, base, len);
  }
  
+const uchar *
+cpp_alloc_token_string (cpp_reader *pfile,
+                       const unsigned char *ptr, unsigned len)
+{
+  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
+
+  dest[len] = 0;
+  memcpy (dest, ptr, len);
+  return dest;
+}
+
+/* A pair of raw buffer pointers.  The currently open one is [1], the
+   first one is [0].  Used for string literal lexing.  */
+struct lit_accum {
+  _cpp_buff *first;
+  _cpp_buff *last;
+  const uchar *rpos;
+  size_t accum;
+
+  lit_accum ()
+    : first (NULL), last (NULL), rpos (0), accum (0)
+  {
+  }
+
+  void append (cpp_reader *, const uchar *, size_t);
+
+  void read_begin (cpp_reader *);
+  bool reading_p () const
+  {
+    return rpos != NULL;
+  }
+  char read_char ()
+  {
+    char c = *rpos++;
+    if (rpos == BUFF_FRONT (last))
+      rpos = NULL;
+    return c;
+  }
+};
+
  /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
     sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
  
-static void
-bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
-               _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
+void
+lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len)
  {
-  _cpp_buff *first_buff = *first_buff_p;
-  _cpp_buff *last_buff = *last_buff_p;
-
-  if (first_buff == NULL)
-    first_buff = last_buff = _cpp_get_buff (pfile, len);
-  else if (len > BUFF_ROOM (last_buff))
+  if (!last)
+    /* Starting.  */
+    first = last = _cpp_get_buff (pfile, len);
+  else if (len > BUFF_ROOM (last))
      {
-      size_t room = BUFF_ROOM (last_buff);
-      memcpy (BUFF_FRONT (last_buff), base, room);
-      BUFF_FRONT (last_buff) += room;
+      /* There is insufficient room in the buffer.  Copy what we can,
+        and then either extend or create a new one.  */
+      size_t room = BUFF_ROOM (last);
+      memcpy (BUFF_FRONT (last), base, room);
+      BUFF_FRONT (last) += room;
        base += room;
        len -= room;
-      last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
-    }
+      accum += room;
+
+      gcc_checking_assert (!rpos);
  
-  memcpy (BUFF_FRONT (last_buff), base, len);
-  BUFF_FRONT (last_buff) += len;
+      last = _cpp_append_extend_buff (pfile, last, len);
+    }
  
-  *first_buff_p = first_buff;
-  *last_buff_p = last_buff;
+  memcpy (BUFF_FRONT (last), base, len);
+  BUFF_FRONT (last) += len;
+  accum += len;
  }
  
+void
+lit_accum::read_begin (cpp_reader *pfile)
+{
+  /* We never accumulate more than 4 chars to read.  */
+  if (BUFF_ROOM (last) < 4)
+
+    last = _cpp_append_extend_buff (pfile, last, 4);
+  rpos = BUFF_FRONT (last);
+}
  
  /* Returns true if a macro has been defined.
     This might not work if compile with -save-temps,
@@ -1382,271 +1684,275 @@ is_macro(cpp_reader *pfile, const uchar *base)
    cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
                                         base, cur - base, hash, HT_NO_INSERT));
  
-  return !result ? false : (result->type == NT_MACRO);
+  return result && cpp_macro_p (result);
  }
  
+/* Returns true if a literal suffix does not have the expected form
+   and is defined as a macro.  */
  
-/* Lexes a raw string.  The stored string contains the spelling, including
-   double quotes, delimiter string, '(' and ')', any leading
-   'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
-   literal, or CPP_OTHER if it was not properly terminated.
+static bool
+is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base)
+{
+  /* User-defined literals outside of namespace std must start with a single
+     underscore, so assume anything of that form really is a UDL suffix.
+     We don't need to worry about UDLs defined inside namespace std because
+     their names are reserved, so cannot be used as macro names in valid
+     programs.  */
+  if (base[0] == '_' && base[1] != '_')
+    return false;
+  return is_macro (pfile, base);
+}
+
+/* Lexes a raw string.  The stored string contains the spelling,
+   including double quotes, delimiter string, '(' and ')', any leading
+   'L', 'u', 'U' or 'u8' and 'R' modifier.  The created token contains
+   the type of the literal, or CPP_OTHER if it was not properly
+   terminated.
+
+   BASE is the start of the token.  Updates pfile->buffer->cur to just
+   after the lexed string.
  
     The spelling is NUL-terminated, but it is not guaranteed that this
     is the first NUL since embedded NULs are preserved.  */
  
  static void
-lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
-               const uchar *cur)
-{
-  uchar raw_prefix[17];
-  uchar temp_buffer[18];
-  const uchar *orig_base;
-  unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
-  enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
-  raw_str_phase phase = RAW_STR_PREFIX;
-  enum cpp_ttype type;
-  size_t total_len = 0;
-  /* Index into temp_buffer during phases other than RAW_STR,
-     during RAW_STR phase 17 to tell BUF_APPEND that nothing should
-     be appended to temp_buffer.  */
-  size_t temp_buffer_len = 0;
-  _cpp_buff *first_buff = NULL, *last_buff = NULL;
-  size_t raw_prefix_start;
+lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
+{
+  const uchar *pos = base;
+
+  /* 'tis a pity this information isn't passed down from the lexer's
+     initial categorization of the token.  */
+  enum cpp_ttype type = CPP_STRING;
+
+  if (*pos == 'L')
+    {
+      type = CPP_WSTRING;
+      pos++;
+    }
+  else if (*pos == 'U')
+    {
+      type = CPP_STRING32;
+      pos++;
+    }
+  else if (*pos == 'u')
+    {
+      if (pos[1] == '8')
+       {
+         type = CPP_UTF8STRING;
+         pos++;
+       }
+      else
+       type = CPP_STRING16;
+      pos++;
+    }
+
+  gcc_checking_assert (pos[0] == 'R' && pos[1] == '"');
+  pos += 2;
+
    _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
  
-  type = (*base == 'L' ? CPP_WSTRING :
-         *base == 'U' ? CPP_STRING32 :
-         *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
-         : CPP_STRING);
-
-#define BUF_APPEND(STR,LEN)                                    \
-      do {                                                     \
-       bufring_append (pfile, (const uchar *)(STR), (LEN),     \
-                       &first_buff, &last_buff);               \
-       total_len += (LEN);                                     \
-       if (__builtin_expect (temp_buffer_len < 17, 0)          \
-           && (const uchar *)(STR) != base                     \
-           && (LEN) <= 2)                                      \
-         {                                                     \
-           memcpy (temp_buffer + temp_buffer_len,              \
-                   (const uchar *)(STR), (LEN));               \
-           temp_buffer_len += (LEN);                           \
-         }                                                     \
-      } while (0);
-
-  orig_base = base;
-  ++cur;
-  raw_prefix_start = cur - base;
+  /* Skip notes before the ".  */
+  while (note->pos < pos)
+    ++note;
+
+  lit_accum accum;
+  
+  uchar prefix[17];
+  unsigned prefix_len = 0;
+  enum Phase
+  {
+   PHASE_PREFIX = -2,
+   PHASE_NONE = -1,
+   PHASE_SUFFIX = 0
+  } phase = PHASE_PREFIX;
+
    for (;;)
      {
-      cppchar_t c;
+      gcc_checking_assert (note->pos >= pos);
  
-      /* If we previously performed any trigraph or line splicing
-        transformations, undo them in between the opening and closing
-        double quote.  */
-      while (note->pos < cur)
-       ++note;
-      for (; note->pos == cur; ++note)
-       {
-         switch (note->type)
-           {
-           case '\\':
-           case ' ':
-             /* Restore backslash followed by newline.  */
-             BUF_APPEND (base, cur - base);
-             base = cur;
-             BUF_APPEND ("\\", 1);
-           after_backslash:
-             if (note->type == ' ')
-               {
-                 /* GNU backslash whitespace newline extension.  FIXME
-                    could be any sequence of non-vertical space.  When we
-                    can properly restore any such sequence, we should mark
-                    this note as handled so _cpp_process_line_notes
-                    doesn't warn.  */
-                 BUF_APPEND (" ", 1);
-               }
+      /* Undo any escaped newlines and trigraphs.  */
+      if (!accum.reading_p () && note->pos == pos)
+       switch (note->type)
+         {
+         case '\\':
+         case ' ':
+           /* Restore backslash followed by newline.  */
+           accum.append (pfile, base, pos - base);
+           base = pos;
+           accum.read_begin (pfile);
+           accum.append (pfile, UC"\\", 1);
+
+         after_backslash:
+           if (note->type == ' ')
+             /* GNU backslash whitespace newline extension.  FIXME
+                could be any sequence of non-vertical space.  When we
+                can properly restore any such sequence, we should
+                mark this note as handled so _cpp_process_line_notes
+                doesn't warn.  */
+             accum.append (pfile, UC" ", 1);
+
+           accum.append (pfile, UC"\n", 1);
+           note++;
+           break;
  
-             BUF_APPEND ("\n", 1);
-             break;
+         case '\n':
+           /* This can happen for ??/<NEWLINE> when trigraphs are not
+              being interpretted.  */
+           gcc_checking_assert (!CPP_OPTION (pfile, trigraphs));
+           note->type = 0;
+           note++;
+           break;
  
-           case 0:
-             /* Already handled.  */
-             break;
+         default:
+           gcc_checking_assert (_cpp_trigraph_map[note->type]);
  
-           default:
-             if (_cpp_trigraph_map[note->type])
-               {
-                 /* Don't warn about this trigraph in
-                    _cpp_process_line_notes, since trigraphs show up as
-                    trigraphs in raw strings.  */
-                 uchar type = note->type;
-                 note->type = 0;
-
-                 if (!CPP_OPTION (pfile, trigraphs))
-                   /* If we didn't convert the trigraph in the first
-                      place, don't do anything now either.  */
-                   break;
+           /* Don't warn about this trigraph in
+              _cpp_process_line_notes, since trigraphs show up as
+              trigraphs in raw strings.  */
+           uchar type = note->type;
+           note->type = 0;
  
-                 BUF_APPEND (base, cur - base);
-                 base = cur;
-                 BUF_APPEND ("??", 2);
+           if (CPP_OPTION (pfile, trigraphs))
+             {
+               accum.append (pfile, base, pos - base);
+               base = pos;
+               accum.read_begin (pfile);
+               accum.append (pfile, UC"??", 2);
+               accum.append (pfile, &type, 1);
+
+               /* ??/ followed by newline gets two line notes, one for
+                  the trigraph and one for the backslash/newline.  */
+               if (type == '/' && note[1].pos == pos)
+                 {
+                   note++;
+                   gcc_assert (note->type == '\\' || note->type == ' ');
+                   goto after_backslash;
+                 }
+               /* Skip the replacement character.  */
+               base = ++pos;
+             }
  
-                 /* ??/ followed by newline gets two line notes, one for
-                    the trigraph and one for the backslash/newline.  */
-                 if (type == '/' && note[1].pos == cur)
-                   {
-                     if (note[1].type != '\\'
-                         && note[1].type != ' ')
-                       abort ();
-                     BUF_APPEND ("/", 1);
-                     ++note;
-                     goto after_backslash;
-                   }
-                 else
-                   {
-                     /* Skip the replacement character.  */
-                     base = ++cur;
-                     BUF_APPEND (&type, 1);
-                     c = type;
-                     goto check_c;
-                   }
-               }
-             else
-               abort ();
-             break;
-           }
-       }
-      c = *cur++;
-      if (__builtin_expect (temp_buffer_len < 17, 0))
-       temp_buffer[temp_buffer_len++] = c;
+           note++;
+           break;
+         }
+
+      /* Now get a char to process.  Either from an expanded note, or
+        from the line buffer.  */
+      bool read_note = accum.reading_p ();
+      char c = read_note ? accum.read_char () : *pos++;
  
-     check_c:
-      if (phase == RAW_STR_PREFIX)
+      if (phase == PHASE_PREFIX)
         {
-         while (raw_prefix_len < temp_buffer_len)
+         if (c == '(')
             {
-             raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
-             switch (raw_prefix[raw_prefix_len])
-               {
-               case ' ': case '(': case ')': case '\\': case '\t':
-               case '\v': case '\f': case '\n': default:
-                 break;
-               /* Basic source charset except the above chars.  */
-               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-               case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-               case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-               case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-               case 'y': case 'z':
-               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-               case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-               case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-               case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-               case 'Y': case 'Z':
-               case '0': case '1': case '2': case '3': case '4': case '5':
-               case '6': case '7': case '8': case '9':
-               case '_': case '{': case '}': case '#': case '[': case ']':
-               case '<': case '>': case '%': case ':': case ';': case '.':
-               case '?': case '*': case '+': case '-': case '/': case '^':
-               case '&': case '|': case '~': case '!': case '=': case ',':
-               case '"': case '\'':
-                 if (raw_prefix_len < 16)
-                   {
-                     raw_prefix_len++;
-                     continue;
-                   }
-                 break;
-               }
-
-             if (raw_prefix[raw_prefix_len] != '(')
-               {
-                 int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
-                 if (raw_prefix_len == 16)
-                   cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
-                                        col, "raw string delimiter longer "
-                                             "than 16 characters");
-                 else if (raw_prefix[raw_prefix_len] == '\n')
-                   cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
-                                        col, "invalid new-line in raw "
-                                             "string delimiter");
-                 else
-                   cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
-                                        col, "invalid character '%c' in "
-                                             "raw string delimiter",
-                                        (int) raw_prefix[raw_prefix_len]);
-                 pfile->buffer->cur = orig_base + raw_prefix_start - 1;
-                 create_literal (pfile, token, orig_base,
-                                 raw_prefix_start - 1, CPP_OTHER);
-                 if (first_buff)
-                   _cpp_release_buff (pfile, first_buff);
-                 return;
-               }
-             raw_prefix[raw_prefix_len] = '"';
-             phase = RAW_STR;
-             /* Nothing should be appended to temp_buffer during
-                RAW_STR phase.  */
-             temp_buffer_len = 17;
-             break;
+             /* Done.  */
+             phase = PHASE_NONE;
+             prefix[prefix_len++] = '"';
             }
-         continue;
-       }
-      else if (phase == RAW_STR_SUFFIX)
-       {
-         while (raw_suffix_len <= raw_prefix_len
-                && raw_suffix_len < temp_buffer_len
-                && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
-           raw_suffix_len++;
-         if (raw_suffix_len > raw_prefix_len)
-           break;
-         if (raw_suffix_len == temp_buffer_len)
+         else if (prefix_len < 16
+                  /* Prefix chars are any of the basic character set,
+                     [lex.charset] except for '
+                     ()\\\t\v\f\n'. Optimized for a contiguous
+                     alphabet.  */
+                  /* Unlike a switch, this collapses down to one or
+                     two shift and bitmask operations on an ASCII
+                     system, with an outlier or two.   */
+                  && (('Z' - 'A' == 25
+                       ? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+                       : ISIDST (c))
+                      || (c >= '0' && c <= '9')
+                      || c == '_' || c == '{' || c == '}'
+                      || c == '[' || c == ']' || c == '#'
+                      || c == '<' || c == '>' || c == '%'
+                      || c == ':' || c == ';' || c == '.' || c == '?'
+                      || c == '*' || c == '+' || c == '-' || c == '/'
+                      || c == '^' || c == '&' || c == '|' || c == '~'
+                      || c == '!' || c == '=' || c == ','
+                      || c == '"' || c == '\''))
+           prefix[prefix_len++] = c;
+         else
+           {
+             /* Something is wrong.  */
+             int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note;
+             if (prefix_len == 16)
+               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+                                    col, "raw string delimiter longer "
+                                    "than 16 characters");
+             else if (c == '\n')
+               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+                                    col, "invalid new-line in raw "
+                                    "string delimiter");
+             else
+               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+                                    col, "invalid character '%c' in "
+                                    "raw string delimiter", c);
+             type = CPP_OTHER;
+             phase = PHASE_NONE;
+             /* Continue until we get a close quote, that's probably
+                the best failure mode.  */
+             prefix_len = 0;
+           }
+         if (c != '\n')
             continue;
-         phase = RAW_STR;
-         /* Nothing should be appended to temp_buffer during
-            RAW_STR phase.  */
-         temp_buffer_len = 17;
         }
-      if (c == ')')
+
+      if (phase != PHASE_NONE)
         {
-         phase = RAW_STR_SUFFIX;
-         raw_suffix_len = 0;
-         temp_buffer_len = 0;
+         if (prefix[phase] != c)
+           phase = PHASE_NONE;
+         else if (unsigned (phase + 1) == prefix_len)
+           break;
+         else
+           {
+             phase = Phase (phase + 1);
+             continue;
+           }
         }
-      else if (c == '\n')
+
+      if (!prefix_len && c == '"')
+       /* Failure mode lexing.  */
+       goto out;
+      else if (prefix_len && c == ')')
+       phase = PHASE_SUFFIX;
+      else if (!read_note && c == '\n')
         {
+         pos--;
+         pfile->buffer->cur = pos;
           if (pfile->state.in_directive
               || (pfile->state.parsing_args
                   && pfile->buffer->next_line >= pfile->buffer->rlimit))
             {
-             cur--;
-             type = CPP_OTHER;
               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
                                    "unterminated raw string");
-             break;
+             type = CPP_OTHER;
+             goto out;
             }
  
-         BUF_APPEND (base, cur - base);
+         accum.append (pfile, base, pos - base + 1);
+         _cpp_process_line_notes (pfile, false);
  
-         if (pfile->buffer->cur < pfile->buffer->rlimit)
+         if (pfile->buffer->next_line < pfile->buffer->rlimit)
             CPP_INCREMENT_LINE (pfile, 0);
           pfile->buffer->need_line = true;
  
-         pfile->buffer->cur = cur-1;
-         _cpp_process_line_notes (pfile, false);
           if (!_cpp_get_fresh_line (pfile))
             {
-             source_location src_loc = token->src_loc;
+             /* We ran out of file and failed to get a line.  */
+             location_t src_loc = token->src_loc;
               token->type = CPP_EOF;
               /* Tell the compiler the line number of the EOF token.  */
               token->src_loc = pfile->line_table->highest_line;
               token->flags = BOL;
-             if (first_buff != NULL)
-               _cpp_release_buff (pfile, first_buff);
+             if (accum.first)
+               _cpp_release_buff (pfile, accum.first);
               cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
                                    "unterminated raw string");
+             /* Now pop the buffer that _cpp_get_fresh_line did not.  */
+             _cpp_pop_buffer (pfile);
               return;
             }
  
-         cur = base = pfile->buffer->cur;
+         pos = base = pfile->buffer->cur;
           note = &pfile->buffer->notes[pfile->buffer->cur_note];
         }
      }
@@ -1655,9 +1961,8 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
      {
        /* If a string format macro, say from inttypes.h, is placed touching
          a string literal it could be parsed as a C++11 user-defined string
-        literal thus breaking the program.
-        Try to identify macros with is_macro. A warning is issued. */
-      if (is_macro (pfile, cur))
+        literal thus breaking the program.  */
+      if (is_macro_not_literal_suffix (pfile, pos))
         {
           /* Raise a warning, but do not consume subsequent tokens.  */
           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
@@ -1667,37 +1972,37 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
                                    "a space between literal and string macro");
         }
        /* Grab user defined literal suffix.  */
-      else if (ISIDST (*cur))
+      else if (ISIDST (*pos))
         {
           type = cpp_userdef_string_add_type (type);
-         ++cur;
+         ++pos;
  
-         while (ISIDNUM (*cur))
-           ++cur;
+         while (ISIDNUM (*pos))
+           ++pos;
         }
      }
  
-  pfile->buffer->cur = cur;
-  if (first_buff == NULL)
-    create_literal (pfile, token, base, cur - base, type);
+ out:
+  pfile->buffer->cur = pos;
+  if (!accum.accum)
+    create_literal (pfile, token, base, pos - base, type);
    else
      {
-      uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
+      size_t extra_len = pos - base;
+      uchar *dest = _cpp_unaligned_alloc (pfile, accum.accum + extra_len + 1);
  
        token->type = type;
-      token->val.str.len = total_len + (cur - base);
+      token->val.str.len = accum.accum + extra_len;
        token->val.str.text = dest;
-      last_buff = first_buff;
-      while (last_buff != NULL)
+      for (_cpp_buff *buf = accum.first; buf; buf = buf->next)
         {
-         memcpy (dest, last_buff->base,
-                 BUFF_FRONT (last_buff) - last_buff->base);
-         dest += BUFF_FRONT (last_buff) - last_buff->base;
-         last_buff = last_buff->next;
+         size_t len = BUFF_FRONT (buf) - buf->base;
+         memcpy (dest, buf->base, len);
+         dest += len;
         }
-      _cpp_release_buff (pfile, first_buff);
-      memcpy (dest, base, cur - base);
-      dest[cur - base] = '\0';
+      _cpp_release_buff (pfile, accum.first);
+      memcpy (dest, base, extra_len);
+      dest[extra_len] = '\0';
      }
  }
  
@@ -1730,7 +2035,7 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
      }
    if (terminator == 'R')
      {
-      lex_raw_string (pfile, token, base, cur);
+      lex_raw_string (pfile, token, base);
        return;
      }
    if (terminator == '"')
@@ -1741,7 +2046,8 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
    else if (terminator == '\'')
      type = (*base == 'L' ? CPP_WCHAR :
             *base == 'U' ? CPP_CHAR32 :
-           *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
+           *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
+                        : CPP_CHAR);
    else
      terminator = '>', type = CPP_HEADER_NAME;
  
@@ -1784,9 +2090,8 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
      {
        /* If a string format macro, say from inttypes.h, is placed touching
          a string literal it could be parsed as a C++11 user-defined string
-        literal thus breaking the program.
-        Try to identify macros with is_macro. A warning is issued. */
-      if (is_macro (pfile, cur))
+        literal thus breaking the program.  */
+      if (is_macro_not_literal_suffix (pfile, cur))
         {
           /* Raise a warning, but do not consume subsequent tokens.  */
           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
@@ -1806,6 +2111,12 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
             ++cur;
         }
      }
+  else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
+          && is_macro (pfile, cur)
+          && !pfile->state.skipping)
+    cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
+                          token->src_loc, 0, "C++11 requires a space "
+                          "between string literal and macro");
  
    pfile->buffer->cur = cur;
    create_literal (pfile, token, base, cur - base, type);
@@ -1906,6 +2217,261 @@ save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
    store_comment (pfile, token);
  }
  
+/* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
+   comment.  */
+
+static bool
+fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
+{
+  const unsigned char *from = comment_start + 1;
+
+  switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
+    {
+      /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
+        don't recognize any comments.  The latter only checks attributes,
+        the former doesn't warn.  */
+    case 0:
+    default:
+      return false;
+      /* -Wimplicit-fallthrough=1 considers any comment, no matter what
+        content it has.  */
+    case 1:
+      return true;
+    case 2:
+      /* -Wimplicit-fallthrough=2 looks for (case insensitive)
+        .*falls?[ \t-]*thr(u|ough).* regex.  */
+      for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
+          from++)
+       {
+         /* Is there anything like strpbrk with upper boundary, or
+            memchr looking for 2 characters rather than just one?  */
+         if (from[0] != 'f' && from[0] != 'F')
+           continue;
+         if (from[1] != 'a' && from[1] != 'A')
+           continue;
+         if (from[2] != 'l' && from[2] != 'L')
+           continue;
+         if (from[3] != 'l' && from[3] != 'L')
+           continue;
+         from += sizeof "fall" - 1;
+         if (from[0] == 's' || from[0] == 'S')
+           from++;
+         while (*from == ' ' || *from == '\t' || *from == '-')
+           from++;
+         if (from[0] != 't' && from[0] != 'T')
+           continue;
+         if (from[1] != 'h' && from[1] != 'H')
+           continue;
+         if (from[2] != 'r' && from[2] != 'R')
+           continue;
+         if (from[3] == 'u' || from[3] == 'U')
+           return true;
+         if (from[3] != 'o' && from[3] != 'O')
+           continue;
+         if (from[4] != 'u' && from[4] != 'U')
+           continue;
+         if (from[5] != 'g' && from[5] != 'G')
+           continue;
+         if (from[6] != 'h' && from[6] != 'H')
+           continue;
+         return true;
+       }
+      return false;
+    case 3:
+    case 4:
+      break;
+    }
+
+  /* Whole comment contents:
+     -fallthrough
+     @fallthrough@
+   */
+  if (*from == '-' || *from == '@')
+    {
+      size_t len = sizeof "fallthrough" - 1;
+      if ((size_t) (pfile->buffer->cur - from - 1) < len)
+       return false;
+      if (memcmp (from + 1, "fallthrough", len))
+       return false;
+      if (*from == '@')
+       {
+         if (from[len + 1] != '@')
+           return false;
+         len++;
+       }
+      from += 1 + len;
+    }
+  /* Whole comment contents (regex):
+     lint -fallthrough[ \t]*
+   */
+  else if (*from == 'l')
+    {
+      size_t len = sizeof "int -fallthrough" - 1;
+      if ((size_t) (pfile->buffer->cur - from - 1) < len)
+       return false;
+      if (memcmp (from + 1, "int -fallthrough", len))
+       return false;
+      from += 1 + len;
+      while (*from == ' ' || *from == '\t')
+       from++;
+    }
+  /* Whole comment contents (regex):
+     [ \t]*FALLTHR(U|OUGH)[ \t]*
+   */
+  else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
+    {
+      while (*from == ' ' || *from == '\t')
+       from++;
+      if ((size_t) (pfile->buffer->cur - from)  < sizeof "FALLTHRU" - 1)
+       return false;
+      if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
+       return false;
+      from += sizeof "FALLTHR" - 1;
+      if (*from == 'U')
+       from++;
+      else if ((size_t) (pfile->buffer->cur - from)  < sizeof "OUGH" - 1)
+       return false;
+      else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
+       return false;
+      else
+       from += sizeof "OUGH" - 1;
+      while (*from == ' ' || *from == '\t')
+       from++;
+    }
+  /* Whole comment contents (regex):
+     [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
+     [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
+     [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
+   */
+  else
+    {
+      while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
+       from++;
+      unsigned char f = *from;
+      bool all_upper = false;
+      if (f == 'E' || f == 'e')
+       {
+         if ((size_t) (pfile->buffer->cur - from)
+             < sizeof "else fallthru" - 1)
+           return false;
+         if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
+           all_upper = true;
+         else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
+           return false;
+         from += sizeof "else" - 1;
+         if (*from == ',')
+           from++;
+         if (*from != ' ')
+           return false;
+         from++;
+         if (all_upper && *from == 'f')
+           return false;
+         if (f == 'e' && *from == 'F')
+           return false;
+         f = *from;
+       }
+      else if (f == 'I' || f == 'i')
+       {
+         if ((size_t) (pfile->buffer->cur - from)
+             < sizeof "intentional fallthru" - 1)
+           return false;
+         if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
+                                 sizeof "NTENTIONAL" - 1) == 0)
+           all_upper = true;
+         else if (memcmp (from + 1, "ntentional",
+                          sizeof "ntentional" - 1))
+           return false;
+         from += sizeof "intentional" - 1;
+         if (*from == ' ')
+           {
+             from++;
+             if (all_upper && *from == 'f')
+               return false;
+           }
+         else if (all_upper)
+           {
+             if (memcmp (from, "LY F", sizeof "LY F" - 1))
+               return false;
+             from += sizeof "LY " - 1;
+           }
+         else
+           {
+             if (memcmp (from, "ly ", sizeof "ly " - 1))
+               return false;
+             from += sizeof "ly " - 1;
+           }
+         if (f == 'i' && *from == 'F')
+           return false;
+         f = *from;
+       }
+      if (f != 'F' && f != 'f')
+       return false;
+      if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
+       return false;
+      if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
+       all_upper = true;
+      else if (all_upper)
+       return false;
+      else if (memcmp (from + 1, "all", sizeof "all" - 1))
+       return false;
+      from += sizeof "fall" - 1;
+      if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
+       from += 2;
+      else if (*from == ' ' || *from == '-')
+       from++;
+      else if (*from != (all_upper ? 'T' : 't'))
+       return false;
+      if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
+       return false;
+      if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
+       return false;
+      if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
+       {
+         if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
+           return false;
+         if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
+                     sizeof "hrough" - 1))
+           return false;
+         from += sizeof "through" - 1;
+       }
+      else
+       from += sizeof "thru" - 1;
+      while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
+       from++;
+      if (*from == '-')
+       {
+         from++;
+         if (*comment_start == '*')
+           {
+             do
+               {
+                 while (*from && *from != '*'
+                        && *from != '\n' && *from != '\r')
+                   from++;
+                 if (*from != '*' || from[1] == '/')
+                   break;
+                 from++;
+               }
+             while (1);
+           }
+         else
+           while (*from && *from != '\n' && *from != '\r')
+             from++;
+       }
+    }
+  /* C block comment.  */
+  if (*comment_start == '*')
+    {
+      if (*from != '*' || from[1] != '/')
+       return false;
+    }
+  /* C++ line comment.  */
+  else if (*from != '\n')
+    return false;
+
+  return true;
+}
+
  /* Allocate COUNT tokens for RUN.  */
  void
  _cpp_init_tokenrun (tokenrun *run, unsigned int count)
@@ -1981,16 +2547,35 @@ cpp_peek_token (cpp_reader *pfile, int index)
    count = index;
    pfile->keep_tokens++;
  
+  /* For peeked tokens temporarily disable line_change reporting,
+     until the tokens are parsed for real.  */
+  void (*line_change) (cpp_reader *, const cpp_token *, int)
+    = pfile->cb.line_change;
+  pfile->cb.line_change = NULL;
+
    do
      {
        peektok = _cpp_lex_token (pfile);
        if (peektok->type == CPP_EOF)
-       return peektok;
+       {
+         index--;
+         break;
+       }
+      else if (peektok->type == CPP_PRAGMA)
+       {
+         /* Don't peek past a pragma.  */
+         if (peektok == &pfile->directive_result)
+           /* Save the pragma in the buffer.  */
+           *pfile->cur_token++ = *peektok;
+         index--;
+         break;
+       }
      }
    while (index--);
  
-  _cpp_backup_tokens_direct (pfile, count + 1);
+  _cpp_backup_tokens_direct (pfile, count - index);
    pfile->keep_tokens--;
+  pfile->cb.line_change = line_change;
  
    return peektok;
  }
@@ -2018,23 +2603,168 @@ _cpp_temp_token (cpp_reader *pfile)
              memmove (next->base + 1, next->base,
                       (la - sz) * sizeof (cpp_token));
  
-          next->base[0] = pfile->cur_run->limit[-1];
-        }
+          next->base[0] = pfile->cur_run->limit[-1];
+        }
+
+      if (sz > 1)
+        memmove (pfile->cur_token + 1, pfile->cur_token,
+                 MIN (la, sz - 1) * sizeof (cpp_token));
+    }
+
+  if (!sz && pfile->cur_token == pfile->cur_run->limit)
+    {
+      pfile->cur_run = next_tokenrun (pfile->cur_run);
+      pfile->cur_token = pfile->cur_run->base;
+    }
+
+  result = pfile->cur_token++;
+  result->src_loc = old->src_loc;
+  return result;
+}
+
+/* We're at the beginning of a logical line (so not in
+  directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set.  See
+  if we should enter deferred_pragma mode to tokenize the rest of the
+  line as a module control-line.  */
+
+static void
+cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result)
+{
+  unsigned backup = 0; /* Tokens we peeked.  */
+  cpp_hashnode *node = result->val.node.node;
+  cpp_token *peek = result;
+  cpp_token *keyword = peek;
+  cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules;
+  int header_count = 0;
+
+  /* Make sure the incoming state is as we expect it.  This way we
+     can restore it using constants.  */
+  gcc_checking_assert (!pfile->state.in_deferred_pragma
+                      && !pfile->state.skipping
+                      && !pfile->state.parsing_args
+                      && !pfile->state.angled_headers
+                      && (pfile->state.save_comments
+                          == !CPP_OPTION (pfile, discard_comments)));
+
+  /* Enter directives mode sufficiently for peeking.  We don't have
+     to actually set in_directive.  */
+  pfile->state.in_deferred_pragma = true;
+
+  /* These two fields are needed to process tokenization in deferred
+     pragma mode.  They are not used outside deferred pragma mode or
+     directives mode.  */
+  pfile->state.pragma_allow_expansion = true;
+  pfile->directive_line = result->src_loc;
+
+  /* Saving comments is incompatible with directives mode.   */
+  pfile->state.save_comments = 0;
+
+  if (node == n_modules[spec_nodes::M_EXPORT][0])
+    {
+      peek = _cpp_lex_direct (pfile);
+      keyword = peek;
+      backup++;
+      if (keyword->type != CPP_NAME)
+       goto not_module;
+      node = keyword->val.node.node;
+      if (!(node->flags & NODE_MODULE))
+       goto not_module;
+    }
+
+  if (node == n_modules[spec_nodes::M__IMPORT][0])
+    /* __import  */
+    header_count = backup + 2 + 16;
+  else if (node == n_modules[spec_nodes::M_IMPORT][0])
+    /* import  */
+    header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed) ? 16 : 0);
+  else if (node == n_modules[spec_nodes::M_MODULE][0])
+    ; /* module  */
+  else
+    goto not_module;
+
+  /* We've seen [export] {module|import|__import}.  Check the next token.  */
+  if (header_count)
+    /* After '{,__}import' a header name may appear.  */
+    pfile->state.angled_headers = true;
+  peek = _cpp_lex_direct (pfile);
+  backup++;
+
+  /* ... import followed by identifier, ':', '<' or
+     header-name preprocessing tokens, or module
+     followed by cpp-identifier, ':' or ';' preprocessing
+     tokens.  C++ keywords are not yet relevant.  */
+  if (peek->type == CPP_NAME
+      || peek->type == CPP_COLON
+      ||  (header_count
+          ? (peek->type == CPP_LESS
+             || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R')
+             || peek->type == CPP_HEADER_NAME)
+          : peek->type == CPP_SEMICOLON))
+    {
+      pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed);
+      if (!pfile->state.pragma_allow_expansion)
+       pfile->state.prevent_expansion++;
+
+      if (!header_count && linemap_included_from
+         (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table)))
+       cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0,
+                            "module control-line cannot be in included file");
+
+      /* The first one or two tokens cannot be macro names.  */
+      for (int ix = backup; ix--;)
+       {
+         cpp_token *tok = ix ? keyword : result;
+         cpp_hashnode *node = tok->val.node.node;
+
+         /* Don't attempt to expand the token.  */
+         tok->flags |= NO_EXPAND;
+         if (_cpp_defined_macro_p (node)
+             && _cpp_maybe_notify_macro_use (pfile, node, tok->src_loc)
+             && !cpp_fun_like_macro_p (node))
+           cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0, 
+                                "module control-line \"%s\" cannot be"
+                                " an object-like macro",
+                                NODE_NAME (node));
+       }
  
-      if (sz > 1)
-        memmove (pfile->cur_token + 1, pfile->cur_token,
-                 MIN (la, sz - 1) * sizeof (cpp_token));
-    }
+      /* Map to underbar variants.  */
+      keyword->val.node.node = n_modules[header_count
+                                        ? spec_nodes::M_IMPORT
+                                        : spec_nodes::M_MODULE][1];
+      if (backup != 1)
+       result->val.node.node = n_modules[spec_nodes::M_EXPORT][1];
  
-  if (!sz && pfile->cur_token == pfile->cur_run->limit)
+      /* Maybe tell the tokenizer we expect a header-name down the
+        road.  */
+      pfile->state.directive_file_token = header_count;
+    }
+  else
      {
-      pfile->cur_run = next_tokenrun (pfile->cur_run);
-      pfile->cur_token = pfile->cur_run->base;
+    not_module:
+      /* Drop out of directive mode.  */
+      /* We aaserted save_comments had this value upon entry.  */
+      pfile->state.save_comments
+       = !CPP_OPTION (pfile, discard_comments);
+      pfile->state.in_deferred_pragma = false;
+      /* Do not let this remain on.  */
+      pfile->state.angled_headers = false;
      }
  
-  result = pfile->cur_token++;
-  result->src_loc = old->src_loc;
-  return result;
+  /* In either case we want to backup the peeked tokens.  */
+  if (backup)
+    {
+      /* If we saw EOL, we should drop it, because this isn't a module
+        control-line after all.  */
+      bool eol = peek->type == CPP_PRAGMA_EOL;
+      if (!eol || backup > 1)
+       {
+         /* Put put the peeked tokens back  */
+         _cpp_backup_tokens_direct (pfile, backup);
+         /* But if the last one was an EOL, forget it.  */
+         if (eol)
+           pfile->lookaheads--;
+       }
+    }
  }
  
  /* Lex a token into RESULT (external interface).  Takes care of issues
@@ -2085,6 +2815,21 @@ _cpp_lex_token (cpp_reader *pfile)
             }
           else if (pfile->state.in_deferred_pragma)
             result = &pfile->directive_result;
+         else if (result->type == CPP_NAME
+                  && (result->val.node.node->flags & NODE_MODULE)
+                  && !pfile->state.skipping
+                  /* Unlike regular directives, we do not deal with
+                     tokenizing module directives as macro arguments.
+                     That's not permitted.  */
+                  && !pfile->state.parsing_args)
+           {
+             /* P1857.  Before macro expansion, At start of logical
+                line ... */
+             /* We don't have to consider lookaheads at this point.  */
+             gcc_checking_assert (!pfile->lookaheads);
+
+             cpp_maybe_module_directive (pfile, result);
+           }
  
           if (pfile->cb.line_change && !pfile->state.skipping)
             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
@@ -2110,8 +2855,6 @@ _cpp_lex_token (cpp_reader *pfile)
  bool
  _cpp_get_fresh_line (cpp_reader *pfile)
  {
-  int return_at_eof;
-
    /* We can't get a new line until we leave the current directive.  */
    if (pfile->state.in_directive)
      return false;
@@ -2142,10 +2885,17 @@ _cpp_get_fresh_line (cpp_reader *pfile)
           buffer->next_line = buffer->rlimit;
         }
  
-      return_at_eof = buffer->return_at_eof;
-      _cpp_pop_buffer (pfile);
-      if (pfile->buffer == NULL || return_at_eof)
-       return false;
+      if (buffer->prev && !buffer->return_at_eof)
+       _cpp_pop_buffer (pfile);
+      else
+       {
+         /* End of translation.  Do not pop the buffer yet. Increment
+            line number so that the EOF token is on a line of its own
+            (_cpp_lex_direct doesn't increment in that case, because
+            it's hard for it to distinguish this special case). */
+         CPP_INCREMENT_LINE (pfile, 0);
+         return false;
+       }
      }
  }
  
@@ -2175,6 +2925,7 @@ _cpp_lex_direct (cpp_reader *pfile)
    cppchar_t c;
    cpp_buffer *buffer;
    const unsigned char *comment_start;
+  bool fallthrough_comment = false;
    cpp_token *result = pfile->cur_token++;
  
   fresh_line:
@@ -2182,25 +2933,25 @@ _cpp_lex_direct (cpp_reader *pfile)
    buffer = pfile->buffer;
    if (buffer->need_line)
      {
-      if (pfile->state.in_deferred_pragma)
-       {
-         result->type = CPP_PRAGMA_EOL;
-         pfile->state.in_deferred_pragma = false;
-         if (!pfile->state.pragma_allow_expansion)
-           pfile->state.prevent_expansion--;
-         return result;
-       }
+      gcc_assert (!pfile->state.in_deferred_pragma);
        if (!_cpp_get_fresh_line (pfile))
         {
           result->type = CPP_EOF;
-         if (!pfile->state.in_directive)
+         /* Not a real EOF in a directive or arg parsing -- we refuse
+            to advance to the next file now, and will once we're out
+            of those modes.  */
+         if (!pfile->state.in_directive && !pfile->state.parsing_args)
             {
               /* Tell the compiler the line number of the EOF token.  */
               result->src_loc = pfile->line_table->highest_line;
               result->flags = BOL;
+             /* Now pop the buffer that _cpp_get_fresh_line did not.  */
+             _cpp_pop_buffer (pfile);
             }
           return result;
         }
+      if (buffer != pfile->buffer)
+       fallthrough_comment = false;
        if (!pfile->keep_tokens)
         {
           pfile->cur_run = &pfile->base_run;
@@ -2224,8 +2975,8 @@ _cpp_lex_direct (cpp_reader *pfile)
      }
    c = *buffer->cur++;
  
-  if (pfile->forced_token_location_p)
-    result->src_loc = *pfile->forced_token_location_p;
+  if (pfile->forced_token_location)
+    result->src_loc = pfile->forced_token_location;
    else
      result->src_loc = linemap_position_for_column (pfile->line_table,
                                           CPP_BUF_COLUMN (buffer, buffer->cur));
@@ -2238,9 +2989,28 @@ _cpp_lex_direct (cpp_reader *pfile)
        goto skipped_white;
  
      case '\n':
-      if (buffer->cur < buffer->rlimit)
+      /* Increment the line, unless this is the last line ...  */
+      if (buffer->cur < buffer->rlimit
+         /* ... or this is a #include, (where _cpp_stack_file needs to
+            unwind by one line) ...  */
+         || (pfile->state.in_directive > 1
+             /* ... except traditional-cpp increments this elsewhere.  */
+             && !CPP_OPTION (pfile, traditional)))
         CPP_INCREMENT_LINE (pfile, 0);
        buffer->need_line = true;
+      if (pfile->state.in_deferred_pragma)
+       {
+         /* Produce the PRAGMA_EOL on this line.  File reading
+            ensures there is always a \n at end of the buffer, thus
+            in a deferred pragma we always see CPP_PRAGMA_EOL before
+            any CPP_EOF.  */
+         result->type = CPP_PRAGMA_EOL;
+         result->flags &= ~PREV_WHITE;
+         pfile->state.in_deferred_pragma = false;
+         if (!pfile->state.pragma_allow_expansion)
+           pfile->state.prevent_expansion--;
+         return result;
+       }
        goto fresh_line;
  
      case '0': case '1': case '2': case '3': case '4':
@@ -2270,7 +3040,8 @@ _cpp_lex_direct (cpp_reader *pfile)
                   && CPP_OPTION (pfile, rliterals))
               || (*buffer->cur == '8'
                   && c == 'u'
-                 && (buffer->cur[1] == '"'
+                 && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
+                               && CPP_OPTION (pfile, utf8_char_literals)))
                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
                           && CPP_OPTION (pfile, rliterals)))))
             {
@@ -2295,7 +3066,8 @@ _cpp_lex_direct (cpp_reader *pfile)
        {
         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
         result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
-                                               &nst);
+                                               &nst,
+                                               &result->val.node.spelling);
         warn_about_normalization (pfile, result, &nst);
        }
  
@@ -2305,6 +3077,10 @@ _cpp_lex_direct (cpp_reader *pfile)
           result->flags |= NAMED_OP;
           result->type = (enum cpp_ttype) result->val.node.node->directive_index;
         }
+
+      /* Signal FALLTHROUGH comment followed by another token.  */
+      if (fallthrough_comment)
+       result->flags |= PREV_FALLTHROUGH;
        break;
  
      case '\'':
@@ -2322,31 +3098,60 @@ _cpp_lex_direct (cpp_reader *pfile)
           if (_cpp_skip_block_comment (pfile))
             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
         }
-      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
-                           || cpp_in_system_header (pfile)))
+      else if (c == '/' && ! CPP_OPTION (pfile, traditional))
         {
+         /* Don't warn for system headers.  */
+         if (_cpp_in_system_header (pfile))
+           ;
           /* Warn about comments if pedantically GNUC89, and not
              in system headers.  */
-         if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
-             && ! buffer->warned_cplusplus_comments)
+         else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
+                  && CPP_PEDANTIC (pfile)
+                  && ! buffer->warned_cplusplus_comments)
             {
-             cpp_error (pfile, CPP_DL_PEDWARN,
-                        "C++ style comments are not allowed in ISO C90");
-             cpp_error (pfile, CPP_DL_PEDWARN,
-                        "(this will be reported only once per input file)");
+             if (cpp_error (pfile, CPP_DL_PEDWARN,
+                            "C++ style comments are not allowed in ISO C90"))
+               cpp_error (pfile, CPP_DL_NOTE,
+                          "(this will be reported only once per input file)");
               buffer->warned_cplusplus_comments = 1;
             }
           /* Or if specifically desired via -Wc90-c99-compat.  */
-         else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat)
+         else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
+                  && ! CPP_OPTION (pfile, cplusplus)
                    && ! buffer->warned_cplusplus_comments)
             {
-             cpp_error (pfile, CPP_DL_WARNING,
-                        "C++ style comments are are incompatible with C90");
-             cpp_error (pfile, CPP_DL_WARNING,
-                        "(this will be reported only once per input file)");
+             if (cpp_error (pfile, CPP_DL_WARNING,
+                            "C++ style comments are incompatible with C90"))
+               cpp_error (pfile, CPP_DL_NOTE,
+                          "(this will be reported only once per input file)");
               buffer->warned_cplusplus_comments = 1;
             }
-
+         /* In C89/C94, C++ style comments are forbidden.  */
+         else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
+                   || CPP_OPTION (pfile, lang) == CLK_STDC94))
+           {
+             /* But don't be confused about valid code such as
+                - // immediately followed by *,
+                - // in a preprocessing directive,
+                - // in an #if 0 block.  */
+             if (buffer->cur[1] == '*'
+                 || pfile->state.in_directive
+                 || pfile->state.skipping)
+               {
+                 result->type = CPP_DIV;
+                 break;
+               }
+             else if (! buffer->warned_cplusplus_comments)
+               {
+                 if (cpp_error (pfile, CPP_DL_ERROR,
+                                "C++ style comments are not allowed in "
+                                "ISO C90"))
+                   cpp_error (pfile, CPP_DL_NOTE,
+                              "(this will be reported only once per input "
+                              "file)");
+                 buffer->warned_cplusplus_comments = 1;
+               }
+           }
           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
             cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
         }
@@ -2362,12 +3167,25 @@ _cpp_lex_direct (cpp_reader *pfile)
           break;
         }
  
+      if (fallthrough_comment_p (pfile, comment_start))
+       fallthrough_comment = true;
+
+      if (pfile->cb.comment)
+       {
+         size_t len = pfile->buffer->cur - comment_start;
+         pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
+                            len + 1);
+       }
+
        if (!pfile->state.save_comments)
         {
           result->flags |= PREV_WHITE;
           goto update_tokens_line;
         }
  
+      if (fallthrough_comment)
+       result->flags |= PREV_FALLTHROUGH;
+
        /* Save the comment as a token in its own right.  */
        save_comment (pfile, result, comment_start, c);
        break;
@@ -2382,7 +3200,13 @@ _cpp_lex_direct (cpp_reader *pfile)
  
        result->type = CPP_LESS;
        if (*buffer->cur == '=')
-       buffer->cur++, result->type = CPP_LESS_EQ;
+       {
+         buffer->cur++, result->type = CPP_LESS_EQ;
+         if (*buffer->cur == '>'
+             && CPP_OPTION (pfile, cplusplus)
+             && CPP_OPTION (pfile, lang) >= CLK_GNUCXX20)
+           buffer->cur++, result->type = CPP_SPACESHIP;
+       }
        else if (*buffer->cur == '<')
         {
           buffer->cur++;
@@ -2506,7 +3330,7 @@ _cpp_lex_direct (cpp_reader *pfile)
  
      case ':':
        result->type = CPP_COLON;
-      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
+      if (*buffer->cur == ':' && CPP_OPTION (pfile, scope))
         buffer->cur++, result->type = CPP_SCOPE;
        else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
         {
@@ -2536,25 +3360,57 @@ _cpp_lex_direct (cpp_reader *pfile)
        /* @ is a punctuator in Objective-C.  */
      case '@': result->type = CPP_ATSIGN; break;
  
-    case '$':
-    case '\\':
+    default:
        {
         const uchar *base = --buffer->cur;
-       struct normalize_state nst = INITIAL_NORMALIZE_STATE;
  
+       /* Check for an extended identifier ($ or UCN or UTF-8).  */
+       struct normalize_state nst = INITIAL_NORMALIZE_STATE;
         if (forms_identifier_p (pfile, true, &nst))
           {
             result->type = CPP_NAME;
-           result->val.node.node = lex_identifier (pfile, base, true, &nst);
+           result->val.node.node = lex_identifier (pfile, base, true, &nst,
+                                                   &result->val.node.spelling);
             warn_about_normalization (pfile, result, &nst);
             break;
           }
+
+       /* Otherwise this will form a CPP_OTHER token.  Parse valid UTF-8 as a
+          single token.  */
         buffer->cur++;
+       if (c >= utf8_signifier)
+         {
+           const uchar *pstr = base;
+           cppchar_t s;
+           if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
+             buffer->cur = pstr;
+         }
+       create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
+       break;
        }
  
-    default:
-      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
-      break;
+    }
+
+  /* Potentially convert the location of the token to a range.  */
+  if (result->src_loc >= RESERVED_LOCATION_COUNT
+      && result->type != CPP_EOF)
+    {
+      /* Ensure that any line notes are processed, so that we have the
+        correct physical line/column for the end-point of the token even
+        when a logical line is split via one or more backslashes.  */
+      if (buffer->cur >= buffer->notes[buffer->cur_note].pos
+         && !pfile->overlaid_buffer)
+       _cpp_process_line_notes (pfile, false);
+
+      source_range tok_range;
+      tok_range.m_start = result->src_loc;
+      tok_range.m_finish
+       = linemap_position_for_column (pfile->line_table,
+                                      CPP_BUF_COLUMN (buffer, buffer->cur));
+
+      result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
+                                              result->src_loc,
+                                              tok_range, NULL);
      }
  
    return result;
@@ -2619,11 +3475,35 @@ cpp_digraph2name (enum cpp_ttype type)
    return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
  }
  
+/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
+   The buffer must already contain the enough space to hold the
+   token's spelling.  Returns a pointer to the character after the
+   last character written.  */
+unsigned char *
+_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
+{
+  size_t i;
+  const unsigned char *name = NODE_NAME (ident);
+         
+  for (i = 0; i < NODE_LEN (ident); i++)
+    if (name[i] & ~0x7F)
+      {
+       i += utf8_to_ucn (buffer, name + i) - 1;
+       buffer += 10;
+      }
+    else
+      *buffer++ = name[i];
+
+  return buffer;
+}
+
  /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
     already contain the enough space to hold the token's spelling.
     Returns a pointer to the character after the last character written.
     FORSTRING is true if this is to be the spelling after translation
-   phase 1 (this is different for UCNs).
+   phase 1 (with the original spelling of extended identifiers), false
+   if extended identifiers should always be written using UCNs (there is
+   no option for always writing them in the internal UTF-8 form).
     FIXME: Would be nice if we didn't need the PFILE argument.  */
  unsigned char *
  cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
@@ -2652,24 +3532,12 @@ cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
      case SPELL_IDENT:
        if (forstring)
         {
-         memcpy (buffer, NODE_NAME (token->val.node.node),
-                 NODE_LEN (token->val.node.node));
-         buffer += NODE_LEN (token->val.node.node);
+         memcpy (buffer, NODE_NAME (token->val.node.spelling),
+                 NODE_LEN (token->val.node.spelling));
+         buffer += NODE_LEN (token->val.node.spelling);
         }
        else
-       {
-         size_t i;
-         const unsigned char * name = NODE_NAME (token->val.node.node);
-         
-         for (i = 0; i < NODE_LEN (token->val.node.node); i++)
-           if (name[i] & ~0x7F)
-             {
-               i += utf8_to_ucn (buffer, name + i) - 1;
-               buffer += 10;
-             }
-           else
-             *buffer++ = NODE_NAME (token->val.node.node)[i];
-       }
+       buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
        break;
  
      case SPELL_LITERAL:
@@ -2760,7 +3628,11 @@ cpp_output_token (const cpp_token *token, FILE *fp)
        break;
  
      case SPELL_LITERAL:
+      if (token->type == CPP_HEADER_NAME)
+       fputc ('"', fp);
        fwrite (token->val.str.text, 1, token->val.str.len, fp);
+      if (token->type == CPP_HEADER_NAME)
+       fputc ('"', fp);
        break;
  
      case SPELL_NONE:
@@ -2783,9 +3655,11 @@ _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
         return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
        case SPELL_NONE:
         return (a->type != CPP_MACRO_ARG
-               || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
+               || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
+                   && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
        case SPELL_IDENT:
-       return a->val.node.node == b->val.node.node;
+       return (a->val.node.node == b->val.node.node
+               && a->val.node.spelling == b->val.node.spelling);
        case SPELL_LITERAL:
         return (a->val.str.len == b->val.str.len
                 && !memcmp (a->val.str.text, b->val.str.text,
@@ -2847,6 +3721,7 @@ cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
                                 || (CPP_OPTION (pfile, objc)
                                     && token1->val.str.text[0] == '@'
                                     && (b == CPP_NAME || b == CPP_STRING)));
+    case CPP_LESS_EQ:  return c == '>';
      case CPP_STRING:
      case CPP_WSTRING:
      case CPP_UTF8STRING:
@@ -2954,7 +3829,7 @@ new_buff (size_t len)
      len = MIN_BUFF_SIZE;
    len = CPP_ALIGN (len);
  
-#ifdef ENABLE_VALGRIND_CHECKING
+#ifdef ENABLE_VALGRIND_ANNOTATIONS
    /* Valgrind warns about uses of interior pointers, so put _cpp_buff
       struct first.  */
    size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
@@ -3051,7 +3926,7 @@ _cpp_free_buff (_cpp_buff *buff)
    for (; buff; buff = next)
      {
        next = buff->next;
-#ifdef ENABLE_VALGRIND_CHECKING
+#ifdef ENABLE_VALGRIND_ANNOTATIONS
        free (buff);
  #else
        free (buff->base);
@@ -3106,6 +3981,25 @@ _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
    return result;
  }
  
+/* Commit or allocate storage from a buffer.  */
+
+void *
+_cpp_commit_buff (cpp_reader *pfile, size_t size)
+{
+  void *ptr = BUFF_FRONT (pfile->a_buff);
+
+  if (pfile->hash_table->alloc_subobject)
+    {
+      void *copy = pfile->hash_table->alloc_subobject (size);
+      memcpy (copy, ptr, size);
+      ptr = copy;
+    }
+  else
+    BUFF_FRONT (pfile->a_buff) += size;
+
+  return ptr;
+}
+
  /* Say which field of TOK is in use.  */
  
  enum cpp_token_fld_kind
@@ -3118,7 +4012,11 @@ cpp_token_val_index (const cpp_token *tok)
      case SPELL_LITERAL:
        return CPP_TOKEN_FLD_STR;
      case SPELL_OPERATOR:
-      if (tok->type == CPP_PASTE)
+      /* Operands which were originally spelled as ident keep around
+         the node for the exact spelling.  */
+      if (tok->flags & NAMED_OP)
+       return CPP_TOKEN_FLD_NODE;
+      else if (tok->type == CPP_PASTE)
         return CPP_TOKEN_FLD_TOKEN_NO;
        else
         return CPP_TOKEN_FLD_NONE;
@@ -3129,20 +4027,20 @@ cpp_token_val_index (const cpp_token *tok)
         return CPP_TOKEN_FLD_SOURCE;
        else if (tok->type == CPP_PRAGMA)
         return CPP_TOKEN_FLD_PRAGMA;
-      /* else fall through */
+      /* fall through */
      default:
        return CPP_TOKEN_FLD_NONE;
      }
  }
  
-/* All tokens lexed in R after calling this function will be forced to have
-   their source_location the same as the location referenced by P, until
+/* All tokens lexed in R after calling this function will be forced to
+   have their location_t to be P, until
     cpp_stop_forcing_token_locations is called for R.  */
  
  void
-cpp_force_token_locations (cpp_reader *r, source_location *p)
+cpp_force_token_locations (cpp_reader *r, location_t loc)
  {
-  r->forced_token_location_p = p;
+  r->forced_token_location = loc;
  }
  
  /* Go back to assigning locations naturally for lexed tokens.  */
@@ -3150,5 +4048,717 @@ cpp_force_token_locations (cpp_reader *r, source_location *p)
  void
  cpp_stop_forcing_token_locations (cpp_reader *r)
  {
-  r->forced_token_location_p = NULL;
+  r->forced_token_location = 0;
+}
+
+/* We're looking at \, if it's escaping EOL, look past it.  If at
+   LIMIT, don't advance.  */
+
+static const unsigned char *
+do_peek_backslash (const unsigned char *peek, const unsigned char *limit)
+{
+  const unsigned char *probe = peek;
+
+  if (__builtin_expect (peek[1] == '\n', true))
+    {
+    eol:
+      probe += 2;
+      if (__builtin_expect (probe < limit, true))
+       {
+         peek = probe;
+         if (*peek == '\\')
+           /* The user might be perverse.  */
+           return do_peek_backslash (peek, limit);
+       }
+    }
+  else if (__builtin_expect (peek[1] == '\r', false))
+    {
+      if (probe[2] == '\n')
+       probe++;
+      goto eol;
+    }
+
+  return peek;
+}
+
+static const unsigned char *
+do_peek_next (const unsigned char *peek, const unsigned char *limit)
+{
+  if (__builtin_expect (*peek == '\\', false))
+    peek = do_peek_backslash (peek, limit);
+  return peek;
+}
+
+static const unsigned char *
+do_peek_prev (const unsigned char *peek, const unsigned char *bound)
+{
+  if (peek == bound)
+    return NULL;
+
+  unsigned char c = *--peek;
+  if (__builtin_expect (c == '\n', false)
+      || __builtin_expect (c == 'r', false))
+    {
+      if (peek == bound)
+       return peek;
+      int ix = -1;
+      if (c == '\n' && peek[ix] == '\r')
+       {
+         if (peek + ix == bound)
+           return peek;
+         ix--;
+       }
+
+      if (peek[ix] == '\\')
+       return do_peek_prev (peek + ix, bound);
+
+      return peek;
+    }
+  else
+    return peek;
+}
+
+/* If PEEK[-1] is identifier MATCH, scan past it and trailing white
+   space.  Otherwise return NULL.  */
+
+static const unsigned char *
+do_peek_ident (const char *match, const unsigned char *peek,
+              const unsigned char *limit)
+{
+  for (; *++match; peek++)
+    if (*peek != *match)
+      {
+       peek = do_peek_next (peek, limit);
+       if (*peek != *match)
+         return NULL;
+      }
+
+  /* Must now not be looking at an identifier char.  */
+  peek = do_peek_next (peek, limit);
+  if (ISIDNUM (*peek))
+    return NULL;
+
+  /* Skip control-line whitespace.  */
+ ws:
+  while (*peek == ' ' || *peek == '\t')
+    peek++;
+  if (__builtin_expect (*peek == '\\', false))
+    {
+      peek = do_peek_backslash (peek, limit);
+      if (*peek != '\\')
+       goto ws;
+    }
+
+  return peek;
+}
+
+/* Are we looking at a module control line starting as PEEK - 1?  */
+
+static bool
+do_peek_module (cpp_reader *pfile, unsigned char c,
+               const unsigned char *peek, const unsigned char *limit)
+{
+  bool import = false;
+
+  if (__builtin_expect (c == 'e', false))
+    {
+      if (!((peek[0] == 'x' || peek[0] == '\\')
+           && (peek = do_peek_ident ("export", peek, limit))))
+       return false;
+
+      /* export, peek for import or module.  No need to peek __import
+        here.  */
+      if (peek[0] == 'i')
+       {
+         if (!((peek[1] == 'm' || peek[1] == '\\')
+               && (peek = do_peek_ident ("import", peek + 1, limit))))
+           return false;
+         import = true;
+       }
+      else if (peek[0] == 'm')
+       {
+         if (!((peek[1] == 'o' || peek[1] == '\\')
+               && (peek = do_peek_ident ("module", peek + 1, limit))))
+           return false;
+       }
+      else
+       return false;
+    }
+  else if (__builtin_expect (c == 'i', false))
+    {
+      if (!((peek[0] == 'm' || peek[0] == '\\')
+           && (peek = do_peek_ident ("import", peek, limit))))
+       return false;
+      import = true;
+    }
+  else if (__builtin_expect (c == '_', false))
+    {
+      /* Needed for translated includes.   */
+      if (!((peek[0] == '_' || peek[0] == '\\')
+           && (peek = do_peek_ident ("__import", peek, limit))))
+       return false;
+      import = true;
+    }
+  else if (__builtin_expect (c == 'm', false))
+    {
+      if (!((peek[0] == 'o' || peek[0] == '\\')
+           && (peek = do_peek_ident ("module", peek, limit))))
+       return false;
+    }
+  else
+    return false;
+
+  /* Peek the next character to see if it's good enough.  We'll be at
+     the first non-whitespace char, including skipping an escaped
+     newline.  */
+  /* ... import followed by identifier, ':', '<' or header-name
+     preprocessing tokens, or module followed by identifier, ':' or
+     ';' preprocessing tokens.  */
+  unsigned char p = *peek++;
+      
+  /* A character literal is ... single quotes, ... optionally preceded
+     by u8, u, U, or L */
+  /* A string-literal is a ... double quotes, optionally prefixed by
+     R, u8, u8R, u, uR, U, UR, L, or LR */
+  if (p == 'u')
+    {
+      peek = do_peek_next (peek, limit);
+      if (*peek == '8')
+       {
+         peek++;
+         goto peek_u8;
+       }
+      goto peek_u;
+    }
+  else if (p == 'U' || p == 'L')
+    {
+    peek_u8:
+      peek = do_peek_next (peek, limit);
+    peek_u:
+      if (*peek == '\"' || *peek == '\'')
+       return false;
+
+      if (*peek == 'R')
+       goto peek_R;
+      /* Identifier. Ok.  */
+    }
+  else if (p == 'R')
+    {
+    peek_R:
+      if (CPP_OPTION (pfile, rliterals))
+       {
+         peek = do_peek_next (peek, limit);
+         if (*peek == '\"')
+           return false;
+       }
+      /* Identifier. Ok.  */
+    }
+  else if ('Z' - 'A' == 25
+          ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_')
+          : ISIDST (p))
+    {
+      /* Identifier.  Ok. */
+    }
+  else if (p == '<')
+    {
+      /* Maybe angle header, ok for import.  Reject
+        '<=', '<<' digraph:'<:'.  */
+      if (!import)
+       return false;
+      peek = do_peek_next (peek, limit);
+      if (*peek == '=' || *peek == '<'
+         || (*peek == ':' && CPP_OPTION (pfile, digraphs)))
+       return false;
+    }
+  else if (p == ';')
+    {
+      /* SEMICOLON, ok for module.  */
+      if (import)
+       return false;
+    }
+  else if (p == '"')
+    {
+      /* STRING, ok for import.  */
+      if (!import)
+       return false;
+    }
+  else if (p == ':')
+    {
+      /* Maybe COLON, ok.  Reject '::', digraph:':>'.  */
+      peek = do_peek_next (peek, limit);
+      if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs)))
+       return false;
+    }
+  else
+    /* FIXME: Detect a unicode character, excluding those not
+       permitted as the initial character. [lex.name]/1.  I presume
+       we need to check the \[uU] spellings, and directly using
+       Unicode in say UTF8 form?  Or perhaps we do the phase-1
+       conversion of UTF8 to universal-character-names?  */
+    return false;
+
+  return true;
+}
+
+/* Directives-only scanning.  Somewhat more relaxed than correct
+   parsing -- some ill-formed programs will not be rejected.  */
+
+void
+cpp_directive_only_process (cpp_reader *pfile,
+                           void *data,
+                           void (*cb) (cpp_reader *, CPP_DO_task, void *, ...))
+{
+  bool module_p = CPP_OPTION (pfile, module_directives);
+
+  do
+    {
+    restart:
+      /* Buffer initialization, but no line cleaning. */
+      cpp_buffer *buffer = pfile->buffer;
+      buffer->cur_note = buffer->notes_used = 0;
+      buffer->cur = buffer->line_base = buffer->next_line;
+      buffer->need_line = false;
+      /* Files always end in a newline.  We rely on this for
+        character peeking safety.  */
+      gcc_assert (buffer->rlimit[-1] == '\n');
+
+      const unsigned char *base = buffer->cur;
+      unsigned line_count = 0;
+      const unsigned char *line_start = base;
+
+      bool bol = true;
+      bool raw = false;
+
+      const unsigned char *lwm = base;
+      for (const unsigned char *pos = base, *limit = buffer->rlimit;
+          pos < limit;)
+       {
+         unsigned char c = *pos++;
+         /* This matches the switch in _cpp_lex_direct.  */
+         switch (c)
+           {
+           case ' ': case '\t': case '\f': case '\v':
+             /* Whitespace, do nothing.  */
+             break;
+
+           case '\r': /* MAC line ending, or Windows \r\n  */
+             if (*pos == '\n')
+               pos++;
+             /* FALLTHROUGH */
+
+           case '\n':
+             bol = true;
+
+           next_line:
+             CPP_INCREMENT_LINE (pfile, 0);
+             line_count++;
+             line_start = pos;
+             break;
+
+           case '\\':
+             /* <backslash><newline> is removed, and doesn't undo any
+                preceeding escape or whatnot.  */
+             if (*pos == '\n')
+               {
+                 pos++;
+                 goto next_line;
+               }
+             else if (*pos == '\r')
+               {
+                 if (pos[1] == '\n')
+                   pos++;
+                 pos++;
+                 goto next_line;
+               }
+             goto dflt;
+             
+           case '#':
+             if (bol)
+               {
+                 /* Line directive.  */
+                 if (pos - 1 > base && !pfile->state.skipping)
+                   cb (pfile, CPP_DO_print, data,
+                       line_count, base, pos - 1 - base);
+
+                 /* Prep things for directive handling. */
+                 buffer->next_line = pos;
+                 buffer->need_line = true;
+                 bool ok = _cpp_get_fresh_line (pfile);
+                 gcc_checking_assert (ok);
+
+                 /* Ensure proper column numbering for generated
+                    error messages. */
+                 buffer->line_base -= pos - line_start;
+
+                 _cpp_handle_directive (pfile, line_start + 1 != pos);
+
+                 /* Sanitize the line settings.  Duplicate #include's can
+                    mess things up. */
+                 // FIXME: Necessary?
+                 pfile->line_table->highest_location
+                   = pfile->line_table->highest_line;
+
+                 if (!pfile->state.skipping
+                     && pfile->buffer->next_line < pfile->buffer->rlimit)
+                   cb (pfile, CPP_DO_location, data,
+                       pfile->line_table->highest_line);
+
+                 goto restart;
+               }
+             goto dflt;
+
+           case '/':
+             {
+               const unsigned char *peek = do_peek_next (pos, limit);
+               if (!(*peek == '/' || *peek == '*'))
+                 goto dflt;
+
+               /* Line or block comment  */
+               bool is_block = *peek == '*';
+               bool star = false;
+               bool esc = false;
+               location_t sloc
+                 = linemap_position_for_column (pfile->line_table,
+                                                pos - line_start);
+
+               while (pos < limit)
+                 {
+                   char c = *pos++;
+                   switch (c)
+                     {
+                     case '\\':
+                       esc = true;
+                       break;
+
+                     case '\r':
+                       if (*pos == '\n')
+                         pos++;
+                       /* FALLTHROUGH  */
+
+                     case '\n':
+                       {
+                         CPP_INCREMENT_LINE (pfile, 0);
+                         line_count++;
+                         line_start = pos;
+                         if (!esc && !is_block)
+                           {
+                             bol = true;
+                             goto done_comment;
+                           }
+                       }
+                       if (!esc)
+                         star = false;
+                       esc = false;
+                       break;
+
+                     case '*':
+                       if (pos > peek && !esc)
+                         star = is_block;
+                       esc = false;
+                       break;
+
+                     case '/':
+                       if (star)
+                         goto done_comment;
+                       /* FALLTHROUGH  */
+
+                     default:
+                       star = false;
+                       esc = false;
+                       break;
+                     }
+                 }
+               cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
+                                    "unterminated comment");
+             done_comment:
+               lwm = pos;
+               break;
+             }
+
+           case '\'':
+             if (!CPP_OPTION (pfile, digit_separators))
+               goto delimited_string;
+
+             /* Possibly a number punctuator.  */
+             if (!ISIDNUM (*do_peek_next (pos, limit)))
+               goto delimited_string;
+
+             goto quote_peek;
+
+           case '\"':
+             if (!CPP_OPTION (pfile, rliterals))
+               goto delimited_string;
+
+           quote_peek:
+             {
+               /* For ' see if it's a number punctuator
+                  \.?<digit>(<digit>|<identifier-nondigit>
+                  |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
+               /* For " see if it's a raw string
+                  {U,L,u,u8}R.  This includes CPP_NUMBER detection,
+                  because that could be 0e+R.  */
+               const unsigned char *peek = pos - 1;
+               bool quote_first = c == '"';
+               bool quote_eight = false;
+               bool maybe_number_start = false;
+               bool want_number = false;
+
+               while ((peek = do_peek_prev (peek, lwm)))
+                 {
+                   unsigned char p = *peek;
+                   if (quote_first)
+                     {
+                       if (!raw)
+                         {
+                           if (p != 'R')
+                             break;
+                           raw = true;
+                           continue;
+                         }
+
+                       quote_first = false;
+                       if (p == 'L' || p == 'U' || p == 'u')
+                         ;
+                       else if (p == '8')
+                         quote_eight = true;
+                       else
+                         goto second_raw;
+                     }
+                   else if (quote_eight)
+                     {
+                       if (p != 'u')
+                         {
+                           raw = false;
+                           break;
+                         }
+                       quote_eight = false;
+                     }
+                   else if (c == '"')
+                     {
+                     second_raw:;
+                       if (!want_number && ISIDNUM (p))
+                         {
+                           raw = false;
+                           break;
+                         }
+                     }
+
+                   if (ISDIGIT (p))
+                     maybe_number_start = true;
+                   else if (p == '.')
+                     want_number = true;
+                   else if (ISIDNUM (p))
+                     maybe_number_start = false;
+                   else if (p == '+' || p == '-')
+                     {
+                       if (const unsigned char *peek_prev
+                           = do_peek_prev (peek, lwm))
+                         {
+                           p = *peek_prev;
+                           if (p == 'e' || p == 'E'
+                               || p == 'p' || p == 'P')
+                             {
+                               want_number = true;
+                               maybe_number_start = false;
+                             }
+                           else
+                             break;
+                         }
+                       else
+                         break;
+                     }
+                   else if (p == '\'' || p == '\"')
+                     {
+                       /* If this is lwm, this must be the end of a
+                          previous string.  So this is a trailing
+                          literal type, (a) if those are allowed,
+                            and (b) maybe_start is false.  Otherwise
+                            this must be a CPP_NUMBER because we've
+                            met another ', and we'd have checked that
+                            in its own right.  */
+                       if (peek == lwm && CPP_OPTION (pfile, uliterals))
+                         {
+                           if  (!maybe_number_start && !want_number)
+                             /* Must be a literal type.  */
+                             raw = false;
+                         }
+                       else if (p == '\''
+                                && CPP_OPTION (pfile, digit_separators))
+                         maybe_number_start = true;
+                       break;
+                     }
+                   else if (c == '\'')
+                     break;
+                   else if (!quote_first && !quote_eight)
+                     break;
+                 }
+
+               if (maybe_number_start)
+                 {
+                   if (c == '\'')
+                     /* A CPP NUMBER.  */
+                     goto dflt;
+                   raw = false;
+                 }
+
+               goto delimited_string;
+             }
+
+           delimited_string:
+             {
+               /* (Possibly raw) string or char literal.  */
+               unsigned char end = c;
+               int delim_len = -1;
+               const unsigned char *delim = NULL;
+               location_t sloc = linemap_position_for_column (pfile->line_table,
+                                                              pos - line_start);
+               int esc = 0;
+
+               if (raw)
+                 {
+                   /* There can be no line breaks in the delimiter.  */
+                   delim = pos;
+                   for (delim_len = 0; (c = *pos++) != '('; delim_len++)
+                     {
+                       if (delim_len == 16)
+                         {
+                           cpp_error_with_line (pfile, CPP_DL_ERROR,
+                                                sloc, 0,
+                                                "raw string delimiter"
+                                                " longer than %d"
+                                                " characters",
+                                                delim_len);
+                           raw = false;
+                           pos = delim;
+                           break;
+                         }
+                       if (strchr (") \\\t\v\f\n", c))
+                         {
+                           cpp_error_with_line (pfile, CPP_DL_ERROR,
+                                                sloc, 0,
+                                                "invalid character '%c'"
+                                                " in raw string"
+                                                " delimiter", c);
+                           raw = false;
+                           pos = delim;
+                           break;
+                         }
+                       if (pos >= limit)
+                         goto bad_string;
+                     }
+                 }
+
+               while (pos < limit)
+                 {
+                   char c = *pos++;
+                   switch (c)
+                     {
+                     case '\\':
+                       if (!raw)
+                         esc++;
+                       break;
+
+                     case '\r':
+                       if (*pos == '\n')
+                         pos++;
+                       /* FALLTHROUGH  */
+
+                     case '\n':
+                       {
+                         CPP_INCREMENT_LINE (pfile, 0);
+                         line_count++;
+                         line_start = pos;
+                       }
+                       if (esc)
+                         esc--;
+                       break;
+
+                     case ')':
+                       if (raw
+                           && pos + delim_len + 1 < limit
+                           && pos[delim_len] == end
+                           && !memcmp (delim, pos, delim_len))
+                         {
+                           pos += delim_len + 1;
+                           raw = false;
+                           goto done_string;
+                         }
+                       break;
+
+                     default:
+                       if (!raw && !(esc & 1) && c == end)
+                         goto done_string;
+                       esc = 0;
+                       break;
+                     }
+                 }
+             bad_string:
+               cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
+                                    "unterminated literal");
+               
+             done_string:
+               raw = false;
+               lwm = pos - 1;
+             }
+             goto dflt;
+
+           case '_':
+           case 'e':
+           case 'i':
+           case 'm':
+             if (bol && module_p && !pfile->state.skipping
+                 && do_peek_module (pfile, c, pos, limit))
+               {
+                 /* We've seen the start of a module control line.
+                    Start up the tokenizer.  */
+                 pos--; /* Backup over the first character.  */
+
+                 /* Backup over whitespace to start of line.  */
+                 while (pos > line_start
+                        && (pos[-1] == ' ' || pos[-1] == '\t'))
+                   pos--;
+
+                 if (pos > base)
+                   cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
+
+                 /* Prep things for directive handling. */
+                 buffer->next_line = pos;
+                 buffer->need_line = true;
+
+                 /* Now get tokens until the PRAGMA_EOL.  */
+                 do
+                   {
+                     location_t spelling;
+                     const cpp_token *tok
+                       = cpp_get_token_with_location (pfile, &spelling);
+
+                     gcc_assert (pfile->state.in_deferred_pragma
+                                 || tok->type == CPP_PRAGMA_EOL);
+                     cb (pfile, CPP_DO_token, data, tok, spelling);
+                   }
+                 while (pfile->state.in_deferred_pragma);
+
+                 if (pfile->buffer->next_line < pfile->buffer->rlimit)
+                   cb (pfile, CPP_DO_location, data,
+                       pfile->line_table->highest_line);
+
+                 pfile->mi_valid = false;
+                 goto restart;
+               }
+             goto dflt;
+
+           default:
+           dflt:
+             bol = false;
+             pfile->mi_valid = false;
+             break;
+           }
+       }
+
+      if (buffer->rlimit > base && !pfile->state.skipping)
+       cb (pfile, CPP_DO_print, data, line_count, base, buffer->rlimit - base);
+
+      _cpp_pop_buffer (pfile);
+    }
+  while (pfile->buffer);
  }