X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=libcpp%2Flex.c;h=07d5a4ff4668853a230c00f53dd48b0691276b09;hb=6a6926635c36e0ef2598b5399afdbfc2dbd4bf1f;hp=6fca257710570780db810ceb56dd2e41e7048c9a;hpb=35c4515b8b8e306684a3837d40ffa2c9fcdd9899;p=gcc.git

diff --git a/libcpp/lex.c b/libcpp/lex.c
index 6fca2577105..07d5a4ff466 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1,5 +1,5 @@
 /* CPP Library - lexical analysis.
-   Copyright (C) 2000-2017 Free Software Foundation, Inc.
+   Copyright (C) 2000-2020 Free Software Foundation, Inc.
    Contributed by Per Bothner, 1994-95.
    Based on CCCP program by Paul Rubin, June 1986
    Adapted to ANSI C, Richard Stallman, Jan 1987
@@ -531,11 +531,11 @@ init_vectorized_lexer (void)
   search_line_fast = impl;
 }
 
-#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
+#elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
 
 /* A vection of the fast scanner using AltiVec vectorized byte compares
    and VSX unaligned loads (when VSX is available).  This is otherwise
-   the same as the pre-GCC 5 version.  */
+   the same as the AltiVec version.  */
 
 ATTRIBUTE_NO_SANITIZE_UNDEFINED
 static const uchar *
@@ -568,7 +568,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
     {
       vc m_nl, m_cr, m_bs, m_qm;
 
-      data = *((const vc *)s);
+      data = __builtin_vec_vsx_ld (0, s);
       s += 16;
 
       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
@@ -1062,7 +1062,7 @@ _cpp_clean_line (cpp_reader *pfile)
       d = (uchar *) s;
 
       /* Handle DOS line endings.  */
-      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
+      if (*s == '\r' && s + 1 != buffer->rlimit && s[1] == '\n')
 	s++;
     }
 
@@ -1232,7 +1232,7 @@ static int
 skip_line_comment (cpp_reader *pfile)
 {
   cpp_buffer *buffer = pfile->buffer;
-  source_location orig_line = pfile->line_table->highest_line;
+  location_t orig_line = pfile->line_table->highest_line;
 
   while (*buffer->cur != '\n')
     buffer->cur++;
@@ -1313,7 +1313,9 @@ warn_about_normalization (cpp_reader *pfile,
     }
 }
 
-/* Returns TRUE if the sequence starting at buffer->cur is invalid in
+static const cppchar_t utf8_signifier = 0xC0;
+
+/* Returns TRUE if the sequence starting at buffer->cur is valid in
    an identifier.  FIRST is TRUE if this starts an identifier.  */
 static bool
 forms_identifier_p (cpp_reader *pfile, int first,
@@ -1336,17 +1338,25 @@ forms_identifier_p (cpp_reader *pfile, int first,
       return true;
     }
 
-  /* Is this a syntactically valid UCN?  */
-  if (CPP_OPTION (pfile, extended_identifiers)
-      && *buffer->cur == '\\'
-      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
+  /* Is this a syntactically valid UCN or a valid UTF-8 char?  */
+  if (CPP_OPTION (pfile, extended_identifiers))
     {
       cppchar_t s;
-      buffer->cur += 2;
-      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
-			  state, &s, NULL, NULL))
-	return true;
-      buffer->cur -= 2;
+      if (*buffer->cur >= utf8_signifier)
+	{
+	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
+			       state, &s))
+	    return true;
+	}
+      else if (*buffer->cur == '\\'
+	       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
+	{
+	  buffer->cur += 2;
+	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
+			      state, &s, NULL, NULL))
+	    return true;
+	  buffer->cur -= 2;
+	}
     }
 
   return false;
@@ -1360,9 +1370,9 @@ maybe_va_opt_error (cpp_reader *pfile)
     {
       /* __VA_OPT__ should not be accepted at all, but allow it in
 	 system headers.  */
-      if (!cpp_in_system_header (pfile))
+      if (!_cpp_in_system_header (pfile))
 	cpp_error (pfile, CPP_DL_PEDWARN,
-		   "__VA_OPT__ is not available until C++2a");
+		   "__VA_OPT__ is not available until C++20");
     }
   else if (!pfile->state.va_args_ok)
     {
@@ -1370,7 +1380,7 @@ maybe_va_opt_error (cpp_reader *pfile)
 	 variadic macro.  */
       cpp_error (pfile, CPP_DL_PEDWARN,
 		 "__VA_OPT__ can only appear in the expansion"
-		 " of a C++2a variadic macro");
+		 " of a C++20 variadic macro");
     }
 }
 
@@ -1464,7 +1474,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
   pfile->buffer->cur = cur;
   if (starts_ucn || forms_identifier_p (pfile, false, nst))
     {
-      /* Slower version for identifiers containing UCNs (or $).  */
+      /* Slower version for identifiers containing UCNs
+	 or extended chars (including $).  */
       do {
 	while (ISIDNUM (*pfile->buffer->cur))
 	  {
@@ -1566,44 +1577,90 @@ static void
 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 		unsigned int len, enum cpp_ttype type)
 {
-  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
-
-  memcpy (dest, base, len);
-  dest[len] = '\0';
   token->type = type;
   token->val.str.len = len;
-  token->val.str.text = dest;
+  token->val.str.text = cpp_alloc_token_string (pfile, base, len);
+}
+
+const uchar *
+cpp_alloc_token_string (cpp_reader *pfile,
+			const unsigned char *ptr, unsigned len)
+{
+  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
+
+  dest[len] = 0;
+  memcpy (dest, ptr, len);
+  return dest;
 }
 
+/* A pair of raw buffer pointers.  The currently open one is [1], the
+   first one is [0].  Used for string literal lexing.  */
+struct lit_accum {
+  _cpp_buff *first;
+  _cpp_buff *last;
+  const uchar *rpos;
+  size_t accum;
+
+  lit_accum ()
+    : first (NULL), last (NULL), rpos (0), accum (0)
+  {
+  }
+
+  void append (cpp_reader *, const uchar *, size_t);
+
+  void read_begin (cpp_reader *);
+  bool reading_p () const
+  {
+    return rpos != NULL;
+  }
+  char read_char ()
+  {
+    char c = *rpos++;
+    if (rpos == BUFF_FRONT (last))
+      rpos = NULL;
+    return c;
+  }
+};
+
 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
 
-static void
-bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
-		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
+void
+lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len)
 {
-  _cpp_buff *first_buff = *first_buff_p;
-  _cpp_buff *last_buff = *last_buff_p;
-
-  if (first_buff == NULL)
-    first_buff = last_buff = _cpp_get_buff (pfile, len);
-  else if (len > BUFF_ROOM (last_buff))
+  if (!last)
+    /* Starting.  */
+    first = last = _cpp_get_buff (pfile, len);
+  else if (len > BUFF_ROOM (last))
     {
-      size_t room = BUFF_ROOM (last_buff);
-      memcpy (BUFF_FRONT (last_buff), base, room);
-      BUFF_FRONT (last_buff) += room;
+      /* There is insufficient room in the buffer.  Copy what we can,
+	 and then either extend or create a new one.  */
+      size_t room = BUFF_ROOM (last);
+      memcpy (BUFF_FRONT (last), base, room);
+      BUFF_FRONT (last) += room;
       base += room;
       len -= room;
-      last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
-    }
+      accum += room;
 
-  memcpy (BUFF_FRONT (last_buff), base, len);
-  BUFF_FRONT (last_buff) += len;
+      gcc_checking_assert (!rpos);
 
-  *first_buff_p = first_buff;
-  *last_buff_p = last_buff;
+      last = _cpp_append_extend_buff (pfile, last, len);
+    }
+
+  memcpy (BUFF_FRONT (last), base, len);
+  BUFF_FRONT (last) += len;
+  accum += len;
 }
 
+void
+lit_accum::read_begin (cpp_reader *pfile)
+{
+  /* We never accumulate more than 4 chars to read.  */
+  if (BUFF_ROOM (last) < 4)
+
+    last = _cpp_append_extend_buff (pfile, last, 4);
+  rpos = BUFF_FRONT (last);
+}
 
 /* Returns true if a macro has been defined.
    This might not work if compile with -save-temps,
@@ -1627,271 +1684,275 @@ is_macro(cpp_reader *pfile, const uchar *base)
   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
 					base, cur - base, hash, HT_NO_INSERT));
 
-  return !result ? false : (result->type == NT_MACRO);
+  return result && cpp_macro_p (result);
 }
 
+/* Returns true if a literal suffix does not have the expected form
+   and is defined as a macro.  */
 
-/* Lexes a raw string.  The stored string contains the spelling, including
-   double quotes, delimiter string, '(' and ')', any leading
-   'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
-   literal, or CPP_OTHER if it was not properly terminated.
+static bool
+is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base)
+{
+  /* User-defined literals outside of namespace std must start with a single
+     underscore, so assume anything of that form really is a UDL suffix.
+     We don't need to worry about UDLs defined inside namespace std because
+     their names are reserved, so cannot be used as macro names in valid
+     programs.  */
+  if (base[0] == '_' && base[1] != '_')
+    return false;
+  return is_macro (pfile, base);
+}
+
+/* Lexes a raw string.  The stored string contains the spelling,
+   including double quotes, delimiter string, '(' and ')', any leading
+   'L', 'u', 'U' or 'u8' and 'R' modifier.  The created token contains
+   the type of the literal, or CPP_OTHER if it was not properly
+   terminated.
+
+   BASE is the start of the token.  Updates pfile->buffer->cur to just
+   after the lexed string.
 
    The spelling is NUL-terminated, but it is not guaranteed that this
    is the first NUL since embedded NULs are preserved.  */
 
 static void
-lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
-		const uchar *cur)
-{
-  uchar raw_prefix[17];
-  uchar temp_buffer[18];
-  const uchar *orig_base;
-  unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
-  enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
-  raw_str_phase phase = RAW_STR_PREFIX;
-  enum cpp_ttype type;
-  size_t total_len = 0;
-  /* Index into temp_buffer during phases other than RAW_STR,
-     during RAW_STR phase 17 to tell BUF_APPEND that nothing should
-     be appended to temp_buffer.  */
-  size_t temp_buffer_len = 0;
-  _cpp_buff *first_buff = NULL, *last_buff = NULL;
-  size_t raw_prefix_start;
+lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
+{
+  const uchar *pos = base;
+
+  /* 'tis a pity this information isn't passed down from the lexer's
+     initial categorization of the token.  */
+  enum cpp_ttype type = CPP_STRING;
+
+  if (*pos == 'L')
+    {
+      type = CPP_WSTRING;
+      pos++;
+    }
+  else if (*pos == 'U')
+    {
+      type = CPP_STRING32;
+      pos++;
+    }
+  else if (*pos == 'u')
+    {
+      if (pos[1] == '8')
+	{
+	  type = CPP_UTF8STRING;
+	  pos++;
+	}
+      else
+	type = CPP_STRING16;
+      pos++;
+    }
+
+  gcc_checking_assert (pos[0] == 'R' && pos[1] == '"');
+  pos += 2;
+
   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
 
-  type = (*base == 'L' ? CPP_WSTRING :
-	  *base == 'U' ? CPP_STRING32 :
-	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
-	  : CPP_STRING);
-
-#define BUF_APPEND(STR,LEN)					\
-      do {							\
-	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
-			&first_buff, &last_buff);		\
-	total_len += (LEN);					\
-	if (__builtin_expect (temp_buffer_len < 17, 0)		\
-	    && (const uchar *)(STR) != base			\
-	    && (LEN) <= 2)					\
-	  {							\
-	    memcpy (temp_buffer + temp_buffer_len,		\
-		    (const uchar *)(STR), (LEN));		\
-	    temp_buffer_len += (LEN);				\
-	  }							\
-      } while (0)
-
-  orig_base = base;
-  ++cur;
-  raw_prefix_start = cur - base;
+  /* Skip notes before the ".  */
+  while (note->pos < pos)
+    ++note;
+
+  lit_accum accum;
+  
+  uchar prefix[17];
+  unsigned prefix_len = 0;
+  enum Phase
+  {
+   PHASE_PREFIX = -2,
+   PHASE_NONE = -1,
+   PHASE_SUFFIX = 0
+  } phase = PHASE_PREFIX;
+
   for (;;)
     {
-      cppchar_t c;
+      gcc_checking_assert (note->pos >= pos);
 
-      /* If we previously performed any trigraph or line splicing
-	 transformations, undo them in between the opening and closing
-	 double quote.  */
-      while (note->pos < cur)
-	++note;
-      for (; note->pos == cur; ++note)
-	{
-	  switch (note->type)
-	    {
-	    case '\\':
-	    case ' ':
-	      /* Restore backslash followed by newline.  */
-	      BUF_APPEND (base, cur - base);
-	      base = cur;
-	      BUF_APPEND ("\\", 1);
-	    after_backslash:
-	      if (note->type == ' ')
-		{
-		  /* GNU backslash whitespace newline extension.  FIXME
-		     could be any sequence of non-vertical space.  When we
-		     can properly restore any such sequence, we should mark
-		     this note as handled so _cpp_process_line_notes
-		     doesn't warn.  */
-		  BUF_APPEND (" ", 1);
-		}
-
-	      BUF_APPEND ("\n", 1);
-	      break;
+      /* Undo any escaped newlines and trigraphs.  */
+      if (!accum.reading_p () && note->pos == pos)
+	switch (note->type)
+	  {
+	  case '\\':
+	  case ' ':
+	    /* Restore backslash followed by newline.  */
+	    accum.append (pfile, base, pos - base);
+	    base = pos;
+	    accum.read_begin (pfile);
+	    accum.append (pfile, UC"\\", 1);
+
+	  after_backslash:
+	    if (note->type == ' ')
+	      /* GNU backslash whitespace newline extension.  FIXME
+		 could be any sequence of non-vertical space.  When we
+		 can properly restore any such sequence, we should
+		 mark this note as handled so _cpp_process_line_notes
+		 doesn't warn.  */
+	      accum.append (pfile, UC" ", 1);
+
+	    accum.append (pfile, UC"\n", 1);
+	    note++;
+	    break;
 
-	    case 0:
-	      /* Already handled.  */
-	      break;
+	  case '\n':
+	    /* This can happen for ??/<NEWLINE> when trigraphs are not
+	       being interpretted.  */
+	    gcc_checking_assert (!CPP_OPTION (pfile, trigraphs));
+	    note->type = 0;
+	    note++;
+	    break;
 
-	    default:
-	      if (_cpp_trigraph_map[note->type])
-		{
-		  /* Don't warn about this trigraph in
-		     _cpp_process_line_notes, since trigraphs show up as
-		     trigraphs in raw strings.  */
-		  uchar type = note->type;
-		  note->type = 0;
-
-		  if (!CPP_OPTION (pfile, trigraphs))
-		    /* If we didn't convert the trigraph in the first
-		       place, don't do anything now either.  */
-		    break;
+	  default:
+	    gcc_checking_assert (_cpp_trigraph_map[note->type]);
+
+	    /* Don't warn about this trigraph in
+	       _cpp_process_line_notes, since trigraphs show up as
+	       trigraphs in raw strings.  */
+	    uchar type = note->type;
+	    note->type = 0;
+
+	    if (CPP_OPTION (pfile, trigraphs))
+	      {
+		accum.append (pfile, base, pos - base);
+		base = pos;
+		accum.read_begin (pfile);
+		accum.append (pfile, UC"??", 2);
+		accum.append (pfile, &type, 1);
+
+		/* ??/ followed by newline gets two line notes, one for
+		   the trigraph and one for the backslash/newline.  */
+		if (type == '/' && note[1].pos == pos)
+		  {
+		    note++;
+		    gcc_assert (note->type == '\\' || note->type == ' ');
+		    goto after_backslash;
+		  }
+		/* Skip the replacement character.  */
+		base = ++pos;
+	      }
+
+	    note++;
+	    break;
+	  }
 
-		  BUF_APPEND (base, cur - base);
-		  base = cur;
-		  BUF_APPEND ("??", 2);
+      /* Now get a char to process.  Either from an expanded note, or
+	 from the line buffer.  */
+      bool read_note = accum.reading_p ();
+      char c = read_note ? accum.read_char () : *pos++;
 
-		  /* ??/ followed by newline gets two line notes, one for
-		     the trigraph and one for the backslash/newline.  */
-		  if (type == '/' && note[1].pos == cur)
-		    {
-		      if (note[1].type != '\\'
-			  && note[1].type != ' ')
-			abort ();
-		      BUF_APPEND ("/", 1);
-		      ++note;
-		      goto after_backslash;
-		    }
-		  else
-		    {
-		      /* Skip the replacement character.  */
-		      base = ++cur;
-		      BUF_APPEND (&type, 1);
-		      c = type;
-		      goto check_c;
-		    }
-		}
+      if (phase == PHASE_PREFIX)
+	{
+	  if (c == '(')
+	    {
+	      /* Done.  */
+	      phase = PHASE_NONE;
+	      prefix[prefix_len++] = '"';
+	    }
+	  else if (prefix_len < 16
+		   /* Prefix chars are any of the basic character set,
+		      [lex.charset] except for '
+		      ()\\\t\v\f\n'. Optimized for a contiguous
+		      alphabet.  */
+		   /* Unlike a switch, this collapses down to one or
+		      two shift and bitmask operations on an ASCII
+		      system, with an outlier or two.   */
+		   && (('Z' - 'A' == 25
+			? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+			: ISIDST (c))
+		       || (c >= '0' && c <= '9')
+		       || c == '_' || c == '{' || c == '}'
+		       || c == '[' || c == ']' || c == '#'
+		       || c == '<' || c == '>' || c == '%'
+		       || c == ':' || c == ';' || c == '.' || c == '?'
+		       || c == '*' || c == '+' || c == '-' || c == '/'
+		       || c == '^' || c == '&' || c == '|' || c == '~'
+		       || c == '!' || c == '=' || c == ','
+		       || c == '"' || c == '\''))
+	    prefix[prefix_len++] = c;
+	  else
+	    {
+	      /* Something is wrong.  */
+	      int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note;
+	      if (prefix_len == 16)
+		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+				     col, "raw string delimiter longer "
+				     "than 16 characters");
+	      else if (c == '\n')
+		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+				     col, "invalid new-line in raw "
+				     "string delimiter");
 	      else
-		abort ();
-	      break;
+		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+				     col, "invalid character '%c' in "
+				     "raw string delimiter", c);
+	      type = CPP_OTHER;
+	      phase = PHASE_NONE;
+	      /* Continue until we get a close quote, that's probably
+		 the best failure mode.  */
+	      prefix_len = 0;
 	    }
+	  if (c != '\n')
+	    continue;
 	}
-      c = *cur++;
-      if (__builtin_expect (temp_buffer_len < 17, 0))
-	temp_buffer[temp_buffer_len++] = c;
 
-     check_c:
-      if (phase == RAW_STR_PREFIX)
+      if (phase != PHASE_NONE)
 	{
-	  while (raw_prefix_len < temp_buffer_len)
+	  if (prefix[phase] != c)
+	    phase = PHASE_NONE;
+	  else if (unsigned (phase + 1) == prefix_len)
+	    break;
+	  else
 	    {
-	      raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
-	      switch (raw_prefix[raw_prefix_len])
-		{
-		case ' ': case '(': case ')': case '\\': case '\t':
-		case '\v': case '\f': case '\n': default:
-		  break;
-		/* Basic source charset except the above chars.  */
-		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-		case 'y': case 'z':
-		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-		case 'Y': case 'Z':
-		case '0': case '1': case '2': case '3': case '4': case '5':
-		case '6': case '7': case '8': case '9':
-		case '_': case '{': case '}': case '#': case '[': case ']':
-		case '<': case '>': case '%': case ':': case ';': case '.':
-		case '?': case '*': case '+': case '-': case '/': case '^':
-		case '&': case '|': case '~': case '!': case '=': case ',':
-		case '"': case '\'':
-		  if (raw_prefix_len < 16)
-		    {
-		      raw_prefix_len++;
-		      continue;
-		    }
-		  break;
-		}
-
-	      if (raw_prefix[raw_prefix_len] != '(')
-		{
-		  int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
-		  if (raw_prefix_len == 16)
-		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
-					 col, "raw string delimiter longer "
-					      "than 16 characters");
-		  else if (raw_prefix[raw_prefix_len] == '\n')
-		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
-					 col, "invalid new-line in raw "
-					      "string delimiter");
-		  else
-		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
-					 col, "invalid character '%c' in "
-					      "raw string delimiter",
-					 (int) raw_prefix[raw_prefix_len]);
-		  pfile->buffer->cur = orig_base + raw_prefix_start - 1;
-		  create_literal (pfile, token, orig_base,
-				  raw_prefix_start - 1, CPP_OTHER);
-		  if (first_buff)
-		    _cpp_release_buff (pfile, first_buff);
-		  return;
-		}
-	      raw_prefix[raw_prefix_len] = '"';
-	      phase = RAW_STR;
-	      /* Nothing should be appended to temp_buffer during
-		 RAW_STR phase.  */
-	      temp_buffer_len = 17;
-	      break;
+	      phase = Phase (phase + 1);
+	      continue;
 	    }
-	  continue;
 	}
-      else if (phase == RAW_STR_SUFFIX)
-	{
-	  while (raw_suffix_len <= raw_prefix_len
-		 && raw_suffix_len < temp_buffer_len
-		 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
-	    raw_suffix_len++;
-	  if (raw_suffix_len > raw_prefix_len)
-	    break;
-	  if (raw_suffix_len == temp_buffer_len)
-	    continue;
-	  phase = RAW_STR;
-	  /* Nothing should be appended to temp_buffer during
-	     RAW_STR phase.  */
-	  temp_buffer_len = 17;
-	}
-      if (c == ')')
-	{
-	  phase = RAW_STR_SUFFIX;
-	  raw_suffix_len = 0;
-	  temp_buffer_len = 0;
-	}
-      else if (c == '\n')
+
+      if (!prefix_len && c == '"')
+	/* Failure mode lexing.  */
+	goto out;
+      else if (prefix_len && c == ')')
+	phase = PHASE_SUFFIX;
+      else if (!read_note && c == '\n')
 	{
+	  pos--;
+	  pfile->buffer->cur = pos;
 	  if (pfile->state.in_directive
 	      || (pfile->state.parsing_args
 		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
 	    {
-	      cur--;
-	      type = CPP_OTHER;
 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
 				   "unterminated raw string");
-	      break;
+	      type = CPP_OTHER;
+	      goto out;
 	    }
 
-	  BUF_APPEND (base, cur - base);
+	  accum.append (pfile, base, pos - base + 1);
+	  _cpp_process_line_notes (pfile, false);
 
-	  if (pfile->buffer->cur < pfile->buffer->rlimit)
+	  if (pfile->buffer->next_line < pfile->buffer->rlimit)
 	    CPP_INCREMENT_LINE (pfile, 0);
 	  pfile->buffer->need_line = true;
 
-	  pfile->buffer->cur = cur-1;
-	  _cpp_process_line_notes (pfile, false);
 	  if (!_cpp_get_fresh_line (pfile))
 	    {
-	      source_location src_loc = token->src_loc;
+	      /* We ran out of file and failed to get a line.  */
+	      location_t src_loc = token->src_loc;
 	      token->type = CPP_EOF;
 	      /* Tell the compiler the line number of the EOF token.  */
 	      token->src_loc = pfile->line_table->highest_line;
 	      token->flags = BOL;
-	      if (first_buff != NULL)
-		_cpp_release_buff (pfile, first_buff);
+	      if (accum.first)
+		_cpp_release_buff (pfile, accum.first);
 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
 				   "unterminated raw string");
+	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
+	      _cpp_pop_buffer (pfile);
 	      return;
 	    }
 
-	  cur = base = pfile->buffer->cur;
+	  pos = base = pfile->buffer->cur;
 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
 	}
     }
@@ -1900,10 +1961,8 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
     {
       /* If a string format macro, say from inttypes.h, is placed touching
 	 a string literal it could be parsed as a C++11 user-defined string
-	 literal thus breaking the program.
-	 Try to identify macros with is_macro. A warning is issued.
-	 The macro name should not start with '_' for this warning. */
-      if ((*cur != '_') && is_macro (pfile, cur))
+	 literal thus breaking the program.  */
+      if (is_macro_not_literal_suffix (pfile, pos))
 	{
 	  /* Raise a warning, but do not consume subsequent tokens.  */
 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
@@ -1913,37 +1972,37 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 				   "a space between literal and string macro");
 	}
       /* Grab user defined literal suffix.  */
-      else if (ISIDST (*cur))
+      else if (ISIDST (*pos))
 	{
 	  type = cpp_userdef_string_add_type (type);
-	  ++cur;
+	  ++pos;
 
-	  while (ISIDNUM (*cur))
-	    ++cur;
+	  while (ISIDNUM (*pos))
+	    ++pos;
 	}
     }
 
-  pfile->buffer->cur = cur;
-  if (first_buff == NULL)
-    create_literal (pfile, token, base, cur - base, type);
+ out:
+  pfile->buffer->cur = pos;
+  if (!accum.accum)
+    create_literal (pfile, token, base, pos - base, type);
   else
     {
-      uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
+      size_t extra_len = pos - base;
+      uchar *dest = _cpp_unaligned_alloc (pfile, accum.accum + extra_len + 1);
 
       token->type = type;
-      token->val.str.len = total_len + (cur - base);
+      token->val.str.len = accum.accum + extra_len;
       token->val.str.text = dest;
-      last_buff = first_buff;
-      while (last_buff != NULL)
+      for (_cpp_buff *buf = accum.first; buf; buf = buf->next)
 	{
-	  memcpy (dest, last_buff->base,
-		  BUFF_FRONT (last_buff) - last_buff->base);
-	  dest += BUFF_FRONT (last_buff) - last_buff->base;
-	  last_buff = last_buff->next;
+	  size_t len = BUFF_FRONT (buf) - buf->base;
+	  memcpy (dest, buf->base, len);
+	  dest += len;
 	}
-      _cpp_release_buff (pfile, first_buff);
-      memcpy (dest, base, cur - base);
-      dest[cur - base] = '\0';
+      _cpp_release_buff (pfile, accum.first);
+      memcpy (dest, base, extra_len);
+      dest[extra_len] = '\0';
     }
 }
 
@@ -1976,7 +2035,7 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
     }
   if (terminator == 'R')
     {
-      lex_raw_string (pfile, token, base, cur);
+      lex_raw_string (pfile, token, base);
       return;
     }
   if (terminator == '"')
@@ -2031,10 +2090,8 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
     {
       /* If a string format macro, say from inttypes.h, is placed touching
 	 a string literal it could be parsed as a C++11 user-defined string
-	 literal thus breaking the program.
-	 Try to identify macros with is_macro. A warning is issued.
-	 The macro name should not start with '_' for this warning. */
-      if ((*cur != '_') && is_macro (pfile, cur))
+	 literal thus breaking the program.  */
+      if (is_macro_not_literal_suffix (pfile, cur))
 	{
 	  /* Raise a warning, but do not consume subsequent tokens.  */
 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
@@ -2504,6 +2561,15 @@ cpp_peek_token (cpp_reader *pfile, int index)
 	  index--;
 	  break;
 	}
+      else if (peektok->type == CPP_PRAGMA)
+	{
+	  /* Don't peek past a pragma.  */
+	  if (peektok == &pfile->directive_result)
+	    /* Save the pragma in the buffer.  */
+	    *pfile->cur_token++ = *peektok;
+	  index--;
+	  break;
+	}
     }
   while (index--);
 
@@ -2556,6 +2622,151 @@ _cpp_temp_token (cpp_reader *pfile)
   return result;
 }
 
+/* We're at the beginning of a logical line (so not in
+  directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set.  See
+  if we should enter deferred_pragma mode to tokenize the rest of the
+  line as a module control-line.  */
+
+static void
+cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result)
+{
+  unsigned backup = 0; /* Tokens we peeked.  */
+  cpp_hashnode *node = result->val.node.node;
+  cpp_token *peek = result;
+  cpp_token *keyword = peek;
+  cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules;
+  int header_count = 0;
+
+  /* Make sure the incoming state is as we expect it.  This way we
+     can restore it using constants.  */
+  gcc_checking_assert (!pfile->state.in_deferred_pragma
+		       && !pfile->state.skipping
+		       && !pfile->state.parsing_args
+		       && !pfile->state.angled_headers
+		       && (pfile->state.save_comments
+			   == !CPP_OPTION (pfile, discard_comments)));
+
+  /* Enter directives mode sufficiently for peeking.  We don't have
+     to actually set in_directive.  */
+  pfile->state.in_deferred_pragma = true;
+
+  /* These two fields are needed to process tokenization in deferred
+     pragma mode.  They are not used outside deferred pragma mode or
+     directives mode.  */
+  pfile->state.pragma_allow_expansion = true;
+  pfile->directive_line = result->src_loc;
+
+  /* Saving comments is incompatible with directives mode.   */
+  pfile->state.save_comments = 0;
+
+  if (node == n_modules[spec_nodes::M_EXPORT][0])
+    {
+      peek = _cpp_lex_direct (pfile);
+      keyword = peek;
+      backup++;
+      if (keyword->type != CPP_NAME)
+	goto not_module;
+      node = keyword->val.node.node;
+      if (!(node->flags & NODE_MODULE))
+	goto not_module;
+    }
+
+  if (node == n_modules[spec_nodes::M__IMPORT][0])
+    /* __import  */
+    header_count = backup + 2 + 16;
+  else if (node == n_modules[spec_nodes::M_IMPORT][0])
+    /* import  */
+    header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed) ? 16 : 0);
+  else if (node == n_modules[spec_nodes::M_MODULE][0])
+    ; /* module  */
+  else
+    goto not_module;
+
+  /* We've seen [export] {module|import|__import}.  Check the next token.  */
+  if (header_count)
+    /* After '{,__}import' a header name may appear.  */
+    pfile->state.angled_headers = true;
+  peek = _cpp_lex_direct (pfile);
+  backup++;
+
+  /* ... import followed by identifier, ':', '<' or
+     header-name preprocessing tokens, or module
+     followed by cpp-identifier, ':' or ';' preprocessing
+     tokens.  C++ keywords are not yet relevant.  */
+  if (peek->type == CPP_NAME
+      || peek->type == CPP_COLON
+      ||  (header_count
+	   ? (peek->type == CPP_LESS
+	      || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R')
+	      || peek->type == CPP_HEADER_NAME)
+	   : peek->type == CPP_SEMICOLON))
+    {
+      pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed);
+      if (!pfile->state.pragma_allow_expansion)
+	pfile->state.prevent_expansion++;
+
+      if (!header_count && linemap_included_from
+	  (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table)))
+	cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0,
+			     "module control-line cannot be in included file");
+
+      /* The first one or two tokens cannot be macro names.  */
+      for (int ix = backup; ix--;)
+	{
+	  cpp_token *tok = ix ? keyword : result;
+	  cpp_hashnode *node = tok->val.node.node;
+
+	  /* Don't attempt to expand the token.  */
+	  tok->flags |= NO_EXPAND;
+	  if (_cpp_defined_macro_p (node)
+	      && _cpp_maybe_notify_macro_use (pfile, node, tok->src_loc)
+	      && !cpp_fun_like_macro_p (node))
+	    cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0, 
+				 "module control-line \"%s\" cannot be"
+				 " an object-like macro",
+				 NODE_NAME (node));
+	}
+
+      /* Map to underbar variants.  */
+      keyword->val.node.node = n_modules[header_count
+					 ? spec_nodes::M_IMPORT
+					 : spec_nodes::M_MODULE][1];
+      if (backup != 1)
+	result->val.node.node = n_modules[spec_nodes::M_EXPORT][1];
+
+      /* Maybe tell the tokenizer we expect a header-name down the
+	 road.  */
+      pfile->state.directive_file_token = header_count;
+    }
+  else
+    {
+    not_module:
+      /* Drop out of directive mode.  */
+      /* We aaserted save_comments had this value upon entry.  */
+      pfile->state.save_comments
+	= !CPP_OPTION (pfile, discard_comments);
+      pfile->state.in_deferred_pragma = false;
+      /* Do not let this remain on.  */
+      pfile->state.angled_headers = false;
+    }
+
+  /* In either case we want to backup the peeked tokens.  */
+  if (backup)
+    {
+      /* If we saw EOL, we should drop it, because this isn't a module
+	 control-line after all.  */
+      bool eol = peek->type == CPP_PRAGMA_EOL;
+      if (!eol || backup > 1)
+	{
+	  /* Put put the peeked tokens back  */
+	  _cpp_backup_tokens_direct (pfile, backup);
+	  /* But if the last one was an EOL, forget it.  */
+	  if (eol)
+	    pfile->lookaheads--;
+	}
+    }
+}
+
 /* Lex a token into RESULT (external interface).  Takes care of issues
    like directive handling, token lookahead, multiple include
    optimization and skipping.  */
@@ -2604,6 +2815,21 @@ _cpp_lex_token (cpp_reader *pfile)
 	    }
 	  else if (pfile->state.in_deferred_pragma)
 	    result = &pfile->directive_result;
+	  else if (result->type == CPP_NAME
+		   && (result->val.node.node->flags & NODE_MODULE)
+		   && !pfile->state.skipping
+		   /* Unlike regular directives, we do not deal with
+		      tokenizing module directives as macro arguments.
+		      That's not permitted.  */
+		   && !pfile->state.parsing_args)
+	    {
+	      /* P1857.  Before macro expansion, At start of logical
+		 line ... */
+	      /* We don't have to consider lookaheads at this point.  */
+	      gcc_checking_assert (!pfile->lookaheads);
+
+	      cpp_maybe_module_directive (pfile, result);
+	    }
 
 	  if (pfile->cb.line_change && !pfile->state.skipping)
 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
@@ -2629,8 +2855,6 @@ _cpp_lex_token (cpp_reader *pfile)
 bool
 _cpp_get_fresh_line (cpp_reader *pfile)
 {
-  int return_at_eof;
-
   /* We can't get a new line until we leave the current directive.  */
   if (pfile->state.in_directive)
     return false;
@@ -2661,10 +2885,17 @@ _cpp_get_fresh_line (cpp_reader *pfile)
 	  buffer->next_line = buffer->rlimit;
 	}
 
-      return_at_eof = buffer->return_at_eof;
-      _cpp_pop_buffer (pfile);
-      if (pfile->buffer == NULL || return_at_eof)
-	return false;
+      if (buffer->prev && !buffer->return_at_eof)
+	_cpp_pop_buffer (pfile);
+      else
+	{
+	  /* End of translation.  Do not pop the buffer yet. Increment
+	     line number so that the EOF token is on a line of its own
+	     (_cpp_lex_direct doesn't increment in that case, because
+	     it's hard for it to distinguish this special case). */
+	  CPP_INCREMENT_LINE (pfile, 0);
+	  return false;
+	}
     }
 }
 
@@ -2702,22 +2933,20 @@ _cpp_lex_direct (cpp_reader *pfile)
   buffer = pfile->buffer;
   if (buffer->need_line)
     {
-      if (pfile->state.in_deferred_pragma)
-	{
-	  result->type = CPP_PRAGMA_EOL;
-	  pfile->state.in_deferred_pragma = false;
-	  if (!pfile->state.pragma_allow_expansion)
-	    pfile->state.prevent_expansion--;
-	  return result;
-	}
+      gcc_assert (!pfile->state.in_deferred_pragma);
       if (!_cpp_get_fresh_line (pfile))
 	{
 	  result->type = CPP_EOF;
-	  if (!pfile->state.in_directive)
+	  /* Not a real EOF in a directive or arg parsing -- we refuse
+  	     to advance to the next file now, and will once we're out
+  	     of those modes.  */
+	  if (!pfile->state.in_directive && !pfile->state.parsing_args)
 	    {
 	      /* Tell the compiler the line number of the EOF token.  */
 	      result->src_loc = pfile->line_table->highest_line;
 	      result->flags = BOL;
+	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
+	      _cpp_pop_buffer (pfile);
 	    }
 	  return result;
 	}
@@ -2746,8 +2975,8 @@ _cpp_lex_direct (cpp_reader *pfile)
     }
   c = *buffer->cur++;
 
-  if (pfile->forced_token_location_p)
-    result->src_loc = *pfile->forced_token_location_p;
+  if (pfile->forced_token_location)
+    result->src_loc = pfile->forced_token_location;
   else
     result->src_loc = linemap_position_for_column (pfile->line_table,
 					  CPP_BUF_COLUMN (buffer, buffer->cur));
@@ -2760,9 +2989,28 @@ _cpp_lex_direct (cpp_reader *pfile)
       goto skipped_white;
 
     case '\n':
-      if (buffer->cur < buffer->rlimit)
+      /* Increment the line, unless this is the last line ...  */
+      if (buffer->cur < buffer->rlimit
+	  /* ... or this is a #include, (where _cpp_stack_file needs to
+	     unwind by one line) ...  */
+	  || (pfile->state.in_directive > 1
+	      /* ... except traditional-cpp increments this elsewhere.  */
+	      && !CPP_OPTION (pfile, traditional)))
 	CPP_INCREMENT_LINE (pfile, 0);
       buffer->need_line = true;
+      if (pfile->state.in_deferred_pragma)
+	{
+	  /* Produce the PRAGMA_EOL on this line.  File reading
+	     ensures there is always a \n at end of the buffer, thus
+	     in a deferred pragma we always see CPP_PRAGMA_EOL before
+	     any CPP_EOF.  */
+	  result->type = CPP_PRAGMA_EOL;
+	  result->flags &= ~PREV_WHITE;
+	  pfile->state.in_deferred_pragma = false;
+	  if (!pfile->state.pragma_allow_expansion)
+	    pfile->state.prevent_expansion--;
+	  return result;
+	}
       goto fresh_line;
 
     case '0': case '1': case '2': case '3': case '4':
@@ -2853,7 +3101,7 @@ _cpp_lex_direct (cpp_reader *pfile)
       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
 	{
 	  /* Don't warn for system headers.  */
-	  if (cpp_in_system_header (pfile))
+	  if (_cpp_in_system_header (pfile))
 	    ;
 	  /* Warn about comments if pedantically GNUC89, and not
 	     in system headers.  */
@@ -2861,10 +3109,10 @@ _cpp_lex_direct (cpp_reader *pfile)
 		   && CPP_PEDANTIC (pfile)
 		   && ! buffer->warned_cplusplus_comments)
 	    {
-	      cpp_error (pfile, CPP_DL_PEDWARN,
-			 "C++ style comments are not allowed in ISO C90");
-	      cpp_error (pfile, CPP_DL_PEDWARN,
-			 "(this will be reported only once per input file)");
+	      if (cpp_error (pfile, CPP_DL_PEDWARN,
+			     "C++ style comments are not allowed in ISO C90"))
+		cpp_error (pfile, CPP_DL_NOTE,
+			   "(this will be reported only once per input file)");
 	      buffer->warned_cplusplus_comments = 1;
 	    }
 	  /* Or if specifically desired via -Wc90-c99-compat.  */
@@ -2872,10 +3120,10 @@ _cpp_lex_direct (cpp_reader *pfile)
 		   && ! CPP_OPTION (pfile, cplusplus)
 		   && ! buffer->warned_cplusplus_comments)
 	    {
-	      cpp_error (pfile, CPP_DL_WARNING,
-			 "C++ style comments are incompatible with C90");
-	      cpp_error (pfile, CPP_DL_WARNING,
-			 "(this will be reported only once per input file)");
+	      if (cpp_error (pfile, CPP_DL_WARNING,
+			     "C++ style comments are incompatible with C90"))
+		cpp_error (pfile, CPP_DL_NOTE,
+			   "(this will be reported only once per input file)");
 	      buffer->warned_cplusplus_comments = 1;
 	    }
 	  /* In C89/C94, C++ style comments are forbidden.  */
@@ -2895,11 +3143,12 @@ _cpp_lex_direct (cpp_reader *pfile)
 		}
 	      else if (! buffer->warned_cplusplus_comments)
 		{
-		  cpp_error (pfile, CPP_DL_ERROR,
-			     "C++ style comments are not allowed in ISO C90");
-		  cpp_error (pfile, CPP_DL_ERROR,
-			     "(this will be reported only once per input "
-			     "file)");
+		  if (cpp_error (pfile, CPP_DL_ERROR,
+				 "C++ style comments are not allowed in "
+				 "ISO C90"))
+		    cpp_error (pfile, CPP_DL_NOTE,
+			       "(this will be reported only once per input "
+			       "file)");
 		  buffer->warned_cplusplus_comments = 1;
 		}
 	    }
@@ -2951,7 +3200,13 @@ _cpp_lex_direct (cpp_reader *pfile)
 
       result->type = CPP_LESS;
       if (*buffer->cur == '=')
-	buffer->cur++, result->type = CPP_LESS_EQ;
+	{
+	  buffer->cur++, result->type = CPP_LESS_EQ;
+	  if (*buffer->cur == '>'
+	      && CPP_OPTION (pfile, cplusplus)
+	      && CPP_OPTION (pfile, lang) >= CLK_GNUCXX20)
+	    buffer->cur++, result->type = CPP_SPACESHIP;
+	}
       else if (*buffer->cur == '<')
 	{
 	  buffer->cur++;
@@ -3075,7 +3330,7 @@ _cpp_lex_direct (cpp_reader *pfile)
 
     case ':':
       result->type = CPP_COLON;
-      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
+      if (*buffer->cur == ':' && CPP_OPTION (pfile, scope))
 	buffer->cur++, result->type = CPP_SCOPE;
       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
 	{
@@ -3105,12 +3360,12 @@ _cpp_lex_direct (cpp_reader *pfile)
       /* @ is a punctuator in Objective-C.  */
     case '@': result->type = CPP_ATSIGN; break;
 
-    case '$':
-    case '\\':
+    default:
       {
 	const uchar *base = --buffer->cur;
-	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 
+	/* Check for an extended identifier ($ or UCN or UTF-8).  */
+	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 	if (forms_identifier_p (pfile, true, &nst))
 	  {
 	    result->type = CPP_NAME;
@@ -3119,13 +3374,21 @@ _cpp_lex_direct (cpp_reader *pfile)
 	    warn_about_normalization (pfile, result, &nst);
 	    break;
 	  }
+
+	/* Otherwise this will form a CPP_OTHER token.  Parse valid UTF-8 as a
+	   single token.  */
 	buffer->cur++;
+	if (c >= utf8_signifier)
+	  {
+	    const uchar *pstr = base;
+	    cppchar_t s;
+	    if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
+	      buffer->cur = pstr;
+	  }
+	create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
+	break;
       }
-      /* FALLTHRU */
 
-    default:
-      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
-      break;
     }
 
   /* Potentially convert the location of the token to a range.  */
@@ -3365,7 +3628,11 @@ cpp_output_token (const cpp_token *token, FILE *fp)
       break;
 
     case SPELL_LITERAL:
+      if (token->type == CPP_HEADER_NAME)
+	fputc ('"', fp);
       fwrite (token->val.str.text, 1, token->val.str.len, fp);
+      if (token->type == CPP_HEADER_NAME)
+	fputc ('"', fp);
       break;
 
     case SPELL_NONE:
@@ -3454,6 +3721,7 @@ cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
 				|| (CPP_OPTION (pfile, objc)
 				    && token1->val.str.text[0] == '@'
 				    && (b == CPP_NAME || b == CPP_STRING)));
+    case CPP_LESS_EQ:	return c == '>';
     case CPP_STRING:
     case CPP_WSTRING:
     case CPP_UTF8STRING:
@@ -3713,6 +3981,25 @@ _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
   return result;
 }
 
+/* Commit or allocate storage from a buffer.  */
+
+void *
+_cpp_commit_buff (cpp_reader *pfile, size_t size)
+{
+  void *ptr = BUFF_FRONT (pfile->a_buff);
+
+  if (pfile->hash_table->alloc_subobject)
+    {
+      void *copy = pfile->hash_table->alloc_subobject (size);
+      memcpy (copy, ptr, size);
+      ptr = copy;
+    }
+  else
+    BUFF_FRONT (pfile->a_buff) += size;
+
+  return ptr;
+}
+
 /* Say which field of TOK is in use.  */
 
 enum cpp_token_fld_kind
@@ -3725,7 +4012,11 @@ cpp_token_val_index (const cpp_token *tok)
     case SPELL_LITERAL:
       return CPP_TOKEN_FLD_STR;
     case SPELL_OPERATOR:
-      if (tok->type == CPP_PASTE)
+      /* Operands which were originally spelled as ident keep around
+         the node for the exact spelling.  */
+      if (tok->flags & NAMED_OP)
+	return CPP_TOKEN_FLD_NODE;
+      else if (tok->type == CPP_PASTE)
 	return CPP_TOKEN_FLD_TOKEN_NO;
       else
 	return CPP_TOKEN_FLD_NONE;
@@ -3742,14 +4033,14 @@ cpp_token_val_index (const cpp_token *tok)
     }
 }
 
-/* All tokens lexed in R after calling this function will be forced to have
-   their source_location the same as the location referenced by P, until
+/* All tokens lexed in R after calling this function will be forced to
+   have their location_t to be P, until
    cpp_stop_forcing_token_locations is called for R.  */
 
 void
-cpp_force_token_locations (cpp_reader *r, source_location *p)
+cpp_force_token_locations (cpp_reader *r, location_t loc)
 {
-  r->forced_token_location_p = p;
+  r->forced_token_location = loc;
 }
 
 /* Go back to assigning locations naturally for lexed tokens.  */
@@ -3757,5 +4048,717 @@ cpp_force_token_locations (cpp_reader *r, source_location *p)
 void
 cpp_stop_forcing_token_locations (cpp_reader *r)
 {
-  r->forced_token_location_p = NULL;
+  r->forced_token_location = 0;
+}
+
+/* We're looking at \, if it's escaping EOL, look past it.  If at
+   LIMIT, don't advance.  */
+
+static const unsigned char *
+do_peek_backslash (const unsigned char *peek, const unsigned char *limit)
+{
+  const unsigned char *probe = peek;
+
+  if (__builtin_expect (peek[1] == '\n', true))
+    {
+    eol:
+      probe += 2;
+      if (__builtin_expect (probe < limit, true))
+	{
+	  peek = probe;
+	  if (*peek == '\\')
+	    /* The user might be perverse.  */
+	    return do_peek_backslash (peek, limit);
+	}
+    }
+  else if (__builtin_expect (peek[1] == '\r', false))
+    {
+      if (probe[2] == '\n')
+	probe++;
+      goto eol;
+    }
+
+  return peek;
+}
+
+static const unsigned char *
+do_peek_next (const unsigned char *peek, const unsigned char *limit)
+{
+  if (__builtin_expect (*peek == '\\', false))
+    peek = do_peek_backslash (peek, limit);
+  return peek;
+}
+
+static const unsigned char *
+do_peek_prev (const unsigned char *peek, const unsigned char *bound)
+{
+  if (peek == bound)
+    return NULL;
+
+  unsigned char c = *--peek;
+  if (__builtin_expect (c == '\n', false)
+      || __builtin_expect (c == 'r', false))
+    {
+      if (peek == bound)
+	return peek;
+      int ix = -1;
+      if (c == '\n' && peek[ix] == '\r')
+	{
+	  if (peek + ix == bound)
+	    return peek;
+	  ix--;
+	}
+
+      if (peek[ix] == '\\')
+	return do_peek_prev (peek + ix, bound);
+
+      return peek;
+    }
+  else
+    return peek;
+}
+
+/* If PEEK[-1] is identifier MATCH, scan past it and trailing white
+   space.  Otherwise return NULL.  */
+
+static const unsigned char *
+do_peek_ident (const char *match, const unsigned char *peek,
+	       const unsigned char *limit)
+{
+  for (; *++match; peek++)
+    if (*peek != *match)
+      {
+	peek = do_peek_next (peek, limit);
+	if (*peek != *match)
+	  return NULL;
+      }
+
+  /* Must now not be looking at an identifier char.  */
+  peek = do_peek_next (peek, limit);
+  if (ISIDNUM (*peek))
+    return NULL;
+
+  /* Skip control-line whitespace.  */
+ ws:
+  while (*peek == ' ' || *peek == '\t')
+    peek++;
+  if (__builtin_expect (*peek == '\\', false))
+    {
+      peek = do_peek_backslash (peek, limit);
+      if (*peek != '\\')
+	goto ws;
+    }
+
+  return peek;
+}
+
+/* Are we looking at a module control line starting as PEEK - 1?  */
+
+static bool
+do_peek_module (cpp_reader *pfile, unsigned char c,
+		const unsigned char *peek, const unsigned char *limit)
+{
+  bool import = false;
+
+  if (__builtin_expect (c == 'e', false))
+    {
+      if (!((peek[0] == 'x' || peek[0] == '\\')
+	    && (peek = do_peek_ident ("export", peek, limit))))
+	return false;
+
+      /* export, peek for import or module.  No need to peek __import
+	 here.  */
+      if (peek[0] == 'i')
+	{
+	  if (!((peek[1] == 'm' || peek[1] == '\\')
+		&& (peek = do_peek_ident ("import", peek + 1, limit))))
+	    return false;
+	  import = true;
+	}
+      else if (peek[0] == 'm')
+	{
+	  if (!((peek[1] == 'o' || peek[1] == '\\')
+		&& (peek = do_peek_ident ("module", peek + 1, limit))))
+	    return false;
+	}
+      else
+	return false;
+    }
+  else if (__builtin_expect (c == 'i', false))
+    {
+      if (!((peek[0] == 'm' || peek[0] == '\\')
+	    && (peek = do_peek_ident ("import", peek, limit))))
+	return false;
+      import = true;
+    }
+  else if (__builtin_expect (c == '_', false))
+    {
+      /* Needed for translated includes.   */
+      if (!((peek[0] == '_' || peek[0] == '\\')
+	    && (peek = do_peek_ident ("__import", peek, limit))))
+	return false;
+      import = true;
+    }
+  else if (__builtin_expect (c == 'm', false))
+    {
+      if (!((peek[0] == 'o' || peek[0] == '\\')
+	    && (peek = do_peek_ident ("module", peek, limit))))
+	return false;
+    }
+  else
+    return false;
+
+  /* Peek the next character to see if it's good enough.  We'll be at
+     the first non-whitespace char, including skipping an escaped
+     newline.  */
+  /* ... import followed by identifier, ':', '<' or header-name
+     preprocessing tokens, or module followed by identifier, ':' or
+     ';' preprocessing tokens.  */
+  unsigned char p = *peek++;
+      
+  /* A character literal is ... single quotes, ... optionally preceded
+     by u8, u, U, or L */
+  /* A string-literal is a ... double quotes, optionally prefixed by
+     R, u8, u8R, u, uR, U, UR, L, or LR */
+  if (p == 'u')
+    {
+      peek = do_peek_next (peek, limit);
+      if (*peek == '8')
+	{
+	  peek++;
+	  goto peek_u8;
+	}
+      goto peek_u;
+    }
+  else if (p == 'U' || p == 'L')
+    {
+    peek_u8:
+      peek = do_peek_next (peek, limit);
+    peek_u:
+      if (*peek == '\"' || *peek == '\'')
+	return false;
+
+      if (*peek == 'R')
+	goto peek_R;
+      /* Identifier. Ok.  */
+    }
+  else if (p == 'R')
+    {
+    peek_R:
+      if (CPP_OPTION (pfile, rliterals))
+	{
+	  peek = do_peek_next (peek, limit);
+	  if (*peek == '\"')
+	    return false;
+	}
+      /* Identifier. Ok.  */
+    }
+  else if ('Z' - 'A' == 25
+	   ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_')
+	   : ISIDST (p))
+    {
+      /* Identifier.  Ok. */
+    }
+  else if (p == '<')
+    {
+      /* Maybe angle header, ok for import.  Reject
+	 '<=', '<<' digraph:'<:'.  */
+      if (!import)
+	return false;
+      peek = do_peek_next (peek, limit);
+      if (*peek == '=' || *peek == '<'
+	  || (*peek == ':' && CPP_OPTION (pfile, digraphs)))
+	return false;
+    }
+  else if (p == ';')
+    {
+      /* SEMICOLON, ok for module.  */
+      if (import)
+	return false;
+    }
+  else if (p == '"')
+    {
+      /* STRING, ok for import.  */
+      if (!import)
+	return false;
+    }
+  else if (p == ':')
+    {
+      /* Maybe COLON, ok.  Reject '::', digraph:':>'.  */
+      peek = do_peek_next (peek, limit);
+      if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs)))
+	return false;
+    }
+  else
+    /* FIXME: Detect a unicode character, excluding those not
+       permitted as the initial character. [lex.name]/1.  I presume
+       we need to check the \[uU] spellings, and directly using
+       Unicode in say UTF8 form?  Or perhaps we do the phase-1
+       conversion of UTF8 to universal-character-names?  */
+    return false;
+
+  return true;
+}
+
+/* Directives-only scanning.  Somewhat more relaxed than correct
+   parsing -- some ill-formed programs will not be rejected.  */
+
+void
+cpp_directive_only_process (cpp_reader *pfile,
+			    void *data,
+			    void (*cb) (cpp_reader *, CPP_DO_task, void *, ...))
+{
+  bool module_p = CPP_OPTION (pfile, module_directives);
+
+  do
+    {
+    restart:
+      /* Buffer initialization, but no line cleaning. */
+      cpp_buffer *buffer = pfile->buffer;
+      buffer->cur_note = buffer->notes_used = 0;
+      buffer->cur = buffer->line_base = buffer->next_line;
+      buffer->need_line = false;
+      /* Files always end in a newline.  We rely on this for
+	 character peeking safety.  */
+      gcc_assert (buffer->rlimit[-1] == '\n');
+
+      const unsigned char *base = buffer->cur;
+      unsigned line_count = 0;
+      const unsigned char *line_start = base;
+
+      bool bol = true;
+      bool raw = false;
+
+      const unsigned char *lwm = base;
+      for (const unsigned char *pos = base, *limit = buffer->rlimit;
+	   pos < limit;)
+	{
+	  unsigned char c = *pos++;
+	  /* This matches the switch in _cpp_lex_direct.  */
+	  switch (c)
+	    {
+	    case ' ': case '\t': case '\f': case '\v':
+	      /* Whitespace, do nothing.  */
+	      break;
+
+	    case '\r': /* MAC line ending, or Windows \r\n  */
+	      if (*pos == '\n')
+		pos++;
+	      /* FALLTHROUGH */
+
+	    case '\n':
+	      bol = true;
+
+	    next_line:
+	      CPP_INCREMENT_LINE (pfile, 0);
+	      line_count++;
+	      line_start = pos;
+	      break;
+
+	    case '\\':
+	      /* <backslash><newline> is removed, and doesn't undo any
+		 preceeding escape or whatnot.  */
+	      if (*pos == '\n')
+		{
+		  pos++;
+		  goto next_line;
+		}
+	      else if (*pos == '\r')
+		{
+		  if (pos[1] == '\n')
+		    pos++;
+		  pos++;
+		  goto next_line;
+		}
+	      goto dflt;
+	      
+	    case '#':
+	      if (bol)
+		{
+		  /* Line directive.  */
+		  if (pos - 1 > base && !pfile->state.skipping)
+		    cb (pfile, CPP_DO_print, data,
+			line_count, base, pos - 1 - base);
+
+		  /* Prep things for directive handling. */
+		  buffer->next_line = pos;
+		  buffer->need_line = true;
+		  bool ok = _cpp_get_fresh_line (pfile);
+		  gcc_checking_assert (ok);
+
+		  /* Ensure proper column numbering for generated
+		     error messages. */
+		  buffer->line_base -= pos - line_start;
+
+		  _cpp_handle_directive (pfile, line_start + 1 != pos);
+
+		  /* Sanitize the line settings.  Duplicate #include's can
+		     mess things up. */
+		  // FIXME: Necessary?
+		  pfile->line_table->highest_location
+		    = pfile->line_table->highest_line;
+
+		  if (!pfile->state.skipping
+		      && pfile->buffer->next_line < pfile->buffer->rlimit)
+		    cb (pfile, CPP_DO_location, data,
+			pfile->line_table->highest_line);
+
+		  goto restart;
+		}
+	      goto dflt;
+
+	    case '/':
+	      {
+		const unsigned char *peek = do_peek_next (pos, limit);
+		if (!(*peek == '/' || *peek == '*'))
+		  goto dflt;
+
+		/* Line or block comment  */
+		bool is_block = *peek == '*';
+		bool star = false;
+		bool esc = false;
+		location_t sloc
+		  = linemap_position_for_column (pfile->line_table,
+						 pos - line_start);
+
+		while (pos < limit)
+		  {
+		    char c = *pos++;
+		    switch (c)
+		      {
+		      case '\\':
+			esc = true;
+			break;
+
+		      case '\r':
+			if (*pos == '\n')
+			  pos++;
+			/* FALLTHROUGH  */
+
+		      case '\n':
+			{
+			  CPP_INCREMENT_LINE (pfile, 0);
+			  line_count++;
+			  line_start = pos;
+			  if (!esc && !is_block)
+			    {
+			      bol = true;
+			      goto done_comment;
+			    }
+			}
+			if (!esc)
+			  star = false;
+			esc = false;
+			break;
+
+		      case '*':
+			if (pos > peek && !esc)
+			  star = is_block;
+			esc = false;
+			break;
+
+		      case '/':
+			if (star)
+			  goto done_comment;
+			/* FALLTHROUGH  */
+
+		      default:
+			star = false;
+			esc = false;
+			break;
+		      }
+		  }
+		cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
+				     "unterminated comment");
+	      done_comment:
+		lwm = pos;
+		break;
+	      }
+
+	    case '\'':
+	      if (!CPP_OPTION (pfile, digit_separators))
+		goto delimited_string;
+
+	      /* Possibly a number punctuator.  */
+	      if (!ISIDNUM (*do_peek_next (pos, limit)))
+		goto delimited_string;
+
+	      goto quote_peek;
+
+	    case '\"':
+	      if (!CPP_OPTION (pfile, rliterals))
+		goto delimited_string;
+
+	    quote_peek:
+	      {
+		/* For ' see if it's a number punctuator
+		   \.?<digit>(<digit>|<identifier-nondigit>
+		   |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
+		/* For " see if it's a raw string
+		   {U,L,u,u8}R.  This includes CPP_NUMBER detection,
+		   because that could be 0e+R.  */
+		const unsigned char *peek = pos - 1;
+		bool quote_first = c == '"';
+		bool quote_eight = false;
+		bool maybe_number_start = false;
+		bool want_number = false;
+
+		while ((peek = do_peek_prev (peek, lwm)))
+		  {
+		    unsigned char p = *peek;
+		    if (quote_first)
+		      {
+			if (!raw)
+			  {
+			    if (p != 'R')
+			      break;
+			    raw = true;
+			    continue;
+			  }
+
+			quote_first = false;
+			if (p == 'L' || p == 'U' || p == 'u')
+			  ;
+			else if (p == '8')
+			  quote_eight = true;
+			else
+			  goto second_raw;
+		      }
+		    else if (quote_eight)
+		      {
+			if (p != 'u')
+			  {
+			    raw = false;
+			    break;
+			  }
+			quote_eight = false;
+		      }
+		    else if (c == '"')
+		      {
+		      second_raw:;
+			if (!want_number && ISIDNUM (p))
+			  {
+			    raw = false;
+			    break;
+			  }
+		      }
+
+		    if (ISDIGIT (p))
+		      maybe_number_start = true;
+		    else if (p == '.')
+		      want_number = true;
+		    else if (ISIDNUM (p))
+		      maybe_number_start = false;
+		    else if (p == '+' || p == '-')
+		      {
+			if (const unsigned char *peek_prev
+			    = do_peek_prev (peek, lwm))
+			  {
+			    p = *peek_prev;
+			    if (p == 'e' || p == 'E'
+				|| p == 'p' || p == 'P')
+			      {
+				want_number = true;
+				maybe_number_start = false;
+			      }
+			    else
+			      break;
+			  }
+			else
+			  break;
+		      }
+		    else if (p == '\'' || p == '\"')
+		      {
+			/* If this is lwm, this must be the end of a
+			   previous string.  So this is a trailing
+			   literal type, (a) if those are allowed,
+			     and (b) maybe_start is false.  Otherwise
+			     this must be a CPP_NUMBER because we've
+			     met another ', and we'd have checked that
+			     in its own right.  */
+			if (peek == lwm && CPP_OPTION (pfile, uliterals))
+			  {
+			    if  (!maybe_number_start && !want_number)
+			      /* Must be a literal type.  */
+			      raw = false;
+			  }
+			else if (p == '\''
+				 && CPP_OPTION (pfile, digit_separators))
+			  maybe_number_start = true;
+			break;
+		      }
+		    else if (c == '\'')
+		      break;
+		    else if (!quote_first && !quote_eight)
+		      break;
+		  }
+
+		if (maybe_number_start)
+		  {
+		    if (c == '\'')
+		      /* A CPP NUMBER.  */
+		      goto dflt;
+		    raw = false;
+		  }
+
+		goto delimited_string;
+	      }
+
+	    delimited_string:
+	      {
+		/* (Possibly raw) string or char literal.  */
+		unsigned char end = c;
+		int delim_len = -1;
+		const unsigned char *delim = NULL;
+		location_t sloc = linemap_position_for_column (pfile->line_table,
+							       pos - line_start);
+		int esc = 0;
+
+		if (raw)
+		  {
+		    /* There can be no line breaks in the delimiter.  */
+		    delim = pos;
+		    for (delim_len = 0; (c = *pos++) != '('; delim_len++)
+		      {
+			if (delim_len == 16)
+			  {
+			    cpp_error_with_line (pfile, CPP_DL_ERROR,
+						 sloc, 0,
+						 "raw string delimiter"
+						 " longer than %d"
+						 " characters",
+						 delim_len);
+			    raw = false;
+			    pos = delim;
+			    break;
+			  }
+			if (strchr (") \\\t\v\f\n", c))
+			  {
+			    cpp_error_with_line (pfile, CPP_DL_ERROR,
+						 sloc, 0,
+						 "invalid character '%c'"
+						 " in raw string"
+						 " delimiter", c);
+			    raw = false;
+			    pos = delim;
+			    break;
+			  }
+			if (pos >= limit)
+			  goto bad_string;
+		      }
+		  }
+
+		while (pos < limit)
+		  {
+		    char c = *pos++;
+		    switch (c)
+		      {
+		      case '\\':
+			if (!raw)
+			  esc++;
+			break;
+
+		      case '\r':
+			if (*pos == '\n')
+			  pos++;
+			/* FALLTHROUGH  */
+
+		      case '\n':
+			{
+			  CPP_INCREMENT_LINE (pfile, 0);
+			  line_count++;
+			  line_start = pos;
+			}
+			if (esc)
+			  esc--;
+			break;
+
+		      case ')':
+			if (raw
+			    && pos + delim_len + 1 < limit
+			    && pos[delim_len] == end
+			    && !memcmp (delim, pos, delim_len))
+			  {
+			    pos += delim_len + 1;
+			    raw = false;
+			    goto done_string;
+			  }
+			break;
+
+		      default:
+			if (!raw && !(esc & 1) && c == end)
+			  goto done_string;
+			esc = 0;
+			break;
+		      }
+		  }
+	      bad_string:
+		cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
+				     "unterminated literal");
+		
+	      done_string:
+		raw = false;
+		lwm = pos - 1;
+	      }
+	      goto dflt;
+
+	    case '_':
+	    case 'e':
+	    case 'i':
+	    case 'm':
+	      if (bol && module_p && !pfile->state.skipping
+		  && do_peek_module (pfile, c, pos, limit))
+		{
+		  /* We've seen the start of a module control line.
+		     Start up the tokenizer.  */
+		  pos--; /* Backup over the first character.  */
+
+		  /* Backup over whitespace to start of line.  */
+		  while (pos > line_start
+			 && (pos[-1] == ' ' || pos[-1] == '\t'))
+		    pos--;
+
+		  if (pos > base)
+		    cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
+
+		  /* Prep things for directive handling. */
+		  buffer->next_line = pos;
+		  buffer->need_line = true;
+
+		  /* Now get tokens until the PRAGMA_EOL.  */
+		  do
+		    {
+		      location_t spelling;
+		      const cpp_token *tok
+			= cpp_get_token_with_location (pfile, &spelling);
+
+		      gcc_assert (pfile->state.in_deferred_pragma
+				  || tok->type == CPP_PRAGMA_EOL);
+		      cb (pfile, CPP_DO_token, data, tok, spelling);
+		    }
+		  while (pfile->state.in_deferred_pragma);
+
+		  if (pfile->buffer->next_line < pfile->buffer->rlimit)
+		    cb (pfile, CPP_DO_location, data,
+			pfile->line_table->highest_line);
+
+		  pfile->mi_valid = false;
+		  goto restart;
+		}
+	      goto dflt;
+
+	    default:
+	    dflt:
+	      bol = false;
+	      pfile->mi_valid = false;
+	      break;
+	    }
+	}
+
+      if (buffer->rlimit > base && !pfile->state.skipping)
+	cb (pfile, CPP_DO_print, data, line_count, base, buffer->rlimit - base);
+
+      _cpp_pop_buffer (pfile);
+    }
+  while (pfile->buffer);
 }