gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27
  28 /* MULTIBYTE_CHARS support only works for native compilers.
  29    ??? Ideally what we want is to model widechar support after
  30    the current floating point support.  */
  31 #ifdef CROSS_COMPILE
  32 #undef MULTIBYTE_CHARS
  33 #endif
  34
  35 #ifdef MULTIBYTE_CHARS
  36 #include "mbchar.h"
  37 #include <locale.h>
  38 #endif
  39
  40 /* Tokens with SPELL_STRING store their spelling in the token list,
  41    and it's length in the token->val.name.len.  */
  42 enum spell_type
  43 {
  44   SPELL_OPERATOR = 0,
  45   SPELL_CHAR,
  46   SPELL_IDENT,
  47   SPELL_NUMBER,
  48   SPELL_STRING,
  49   SPELL_NONE
  50 };
  51
  52 struct token_spelling
  53 {
  54   enum spell_type category;
  55   const unsigned char *name;
  56 };
  57
  58 static const unsigned char *const digraph_spellings[] =
  59 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  60
  61 #define OP(e, s) { SPELL_OPERATOR, U s           },
  62 #define TK(e, s) { s,              U STRINGX (e) },
  63 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  64 #undef OP
  65 #undef TK
  66
  67 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  68 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  69 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  70
  71 static void handle_newline PARAMS ((cpp_reader *));
  72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  73 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  74
  75 static int skip_block_comment PARAMS ((cpp_reader *));
  76 static int skip_line_comment PARAMS ((cpp_reader *));
  77 static void adjust_column PARAMS ((cpp_reader *));
  78 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  80 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  81                                                     const U_CHAR *));
  82 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  83 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  84 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  85 static void unterminated PARAMS ((cpp_reader *, int));
  86 static bool trigraph_p PARAMS ((cpp_reader *));
  87 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  89 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  90                                    const unsigned char *, unsigned int *));
  91 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  92
  93 static unsigned int hex_digit_value PARAMS ((unsigned int));
  94 static _cpp_buff *new_buff PARAMS ((size_t));
  95
  96 /* Utility routine:
  97
  98    Compares, the token TOKEN to the NUL-terminated string STRING.
  99    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 100
 101 int
 102 cpp_ideq (token, string)
 103      const cpp_token *token;
 104      const char *string;
 105 {
 106   if (token->type != CPP_NAME)
 107     return 0;
 108
 109   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 110 }
 111
 112 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 113    Returns with buffer->cur pointing to the character immediately
 114    following the newline (combination).  */
 115 static void
 116 handle_newline (pfile)
 117      cpp_reader *pfile;
 118 {
 119   cpp_buffer *buffer = pfile->buffer;
 120
 121   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 122      only accept CR-LF; maybe we should fall back to that behaviour?  */
 123   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 124     buffer->cur++;
 125
 126   buffer->line_base = buffer->cur;
 127   buffer->col_adjust = 0;
 128   pfile->line++;
 129 }
 130
 131 /* Subroutine of skip_escaped_newlines; called when a 3-character
 132    sequence beginning with "??" is encountered.  buffer->cur points to
 133    the second '?'.
 134
 135    Warn if necessary, and returns true if the sequence forms a
 136    trigraph and the trigraph should be honoured.  */
 137 static bool
 138 trigraph_p (pfile)
 139      cpp_reader *pfile;
 140 {
 141   cpp_buffer *buffer = pfile->buffer;
 142   cppchar_t from_char = buffer->cur[1];
 143   bool accept;
 144
 145   if (!_cpp_trigraph_map[from_char])
 146     return false;
 147
 148   accept = CPP_OPTION (pfile, trigraphs);
 149
 150   /* Don't warn about trigraphs in comments.  */
 151   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 152     {
 153       if (accept)
 154         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
 155                                "trigraph ??%c converted to %c",
 156                                (int) from_char,
 157                                (int) _cpp_trigraph_map[from_char]);
 158       else if (buffer->cur != buffer->last_Wtrigraphs)
 159         {
 160           buffer->last_Wtrigraphs = buffer->cur;
 161           cpp_warning_with_line (pfile, pfile->line,
 162                                  CPP_BUF_COL (buffer) - 1,
 163                                  "trigraph ??%c ignored", (int) from_char);
 164         }
 165     }
 166
 167   return accept;
 168 }
 169
 170 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 171    lie in buffer->cur[-1].  Returns the next byte, which will be in
 172    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 173    2 of the ISO C standard.  */
 174 static cppchar_t
 175 skip_escaped_newlines (pfile)
 176      cpp_reader *pfile;
 177 {
 178   cpp_buffer *buffer = pfile->buffer;
 179   cppchar_t next = buffer->cur[-1];
 180
 181   /* Only do this if we apply stages 1 and 2.  */
 182   if (!buffer->from_stage3)
 183     {
 184       const unsigned char *saved_cur;
 185       cppchar_t next1;
 186
 187       do
 188         {
 189           if (next == '?')
 190             {
 191               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 192                 break;
 193
 194               /* Translate the trigraph.  */
 195               next = _cpp_trigraph_map[buffer->cur[1]];
 196               buffer->cur += 2;
 197               if (next != '\\')
 198                 break;
 199             }
 200
 201           if (buffer->cur == buffer->rlimit)
 202             break;
 203
 204           /* We have a backslash, and room for at least one more
 205              character.  Skip horizontal whitespace.  */
 206           saved_cur = buffer->cur;
 207           do
 208             next1 = *buffer->cur++;
 209           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 210
 211           if (!is_vspace (next1))
 212             {
 213               buffer->cur = saved_cur;
 214               break;
 215             }
 216
 217           if (saved_cur != buffer->cur - 1
 218               && !pfile->state.lexing_comment)
 219             cpp_warning (pfile, "backslash and newline separated by space");
 220
 221           handle_newline (pfile);
 222           buffer->backup_to = buffer->cur;
 223           if (buffer->cur == buffer->rlimit)
 224             {
 225               cpp_pedwarn (pfile, "backslash-newline at end of file");
 226               next = EOF;
 227             }
 228           else
 229             next = *buffer->cur++;
 230         }
 231       while (next == '\\' || next == '?');
 232     }
 233
 234   return next;
 235 }
 236
 237 /* Obtain the next character, after trigraph conversion and skipping
 238    an arbitrarily long string of escaped newlines.  The common case of
 239    no trigraphs or escaped newlines falls through quickly.  On return,
 240    buffer->backup_to points to where to return to if the character is
 241    not to be processed.  */
 242 static cppchar_t
 243 get_effective_char (pfile)
 244      cpp_reader *pfile;
 245 {
 246   cppchar_t next;
 247   cpp_buffer *buffer = pfile->buffer;
 248
 249   buffer->backup_to = buffer->cur;
 250   next = *buffer->cur++;
 251   if (__builtin_expect (next == '?' || next == '\\', 0))
 252     next = skip_escaped_newlines (pfile);
 253
 254    return next;
 255 }
 256
 257 /* Skip a C-style block comment.  We find the end of the comment by
 258    seeing if an asterisk is before every '/' we encounter.  Returns
 259    non-zero if comment terminated by EOF, zero otherwise.  */
 260 static int
 261 skip_block_comment (pfile)
 262      cpp_reader *pfile;
 263 {
 264   cpp_buffer *buffer = pfile->buffer;
 265   cppchar_t c = EOF, prevc = EOF;
 266
 267   pfile->state.lexing_comment = 1;
 268   while (buffer->cur != buffer->rlimit)
 269     {
 270       prevc = c, c = *buffer->cur++;
 271
 272       /* FIXME: For speed, create a new character class of characters
 273          of interest inside block comments.  */
 274       if (c == '?' || c == '\\')
 275         c = skip_escaped_newlines (pfile);
 276
 277       /* People like decorating comments with '*', so check for '/'
 278          instead for efficiency.  */
 279       if (c == '/')
 280         {
 281           if (prevc == '*')
 282             break;
 283
 284           /* Warn about potential nested comments, but not if the '/'
 285              comes immediately before the true comment delimiter.
 286              Don't bother to get it right across escaped newlines.  */
 287           if (CPP_OPTION (pfile, warn_comments)
 288               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 289             cpp_warning_with_line (pfile,
 290                                    pfile->line, CPP_BUF_COL (buffer),
 291                                    "\"/*\" within comment");
 292         }
 293       else if (is_vspace (c))
 294         handle_newline (pfile);
 295       else if (c == '\t')
 296         adjust_column (pfile);
 297     }
 298
 299   pfile->state.lexing_comment = 0;
 300   return c != '/' || prevc != '*';
 301 }
 302
 303 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 304    terminating newline.  Handles escaped newlines.  Returns non-zero
 305    if a multiline comment.  */
 306 static int
 307 skip_line_comment (pfile)
 308      cpp_reader *pfile;
 309 {
 310   cpp_buffer *buffer = pfile->buffer;
 311   unsigned int orig_line = pfile->line;
 312   cppchar_t c;
 313
 314   pfile->state.lexing_comment = 1;
 315   do
 316     {
 317       if (buffer->cur == buffer->rlimit)
 318         goto at_eof;
 319
 320       c = *buffer->cur++;
 321       if (c == '?' || c == '\\')
 322         c = skip_escaped_newlines (pfile);
 323     }
 324   while (!is_vspace (c));
 325
 326   /* Step back over the newline, except at EOF.  */
 327   buffer->cur--;
 328  at_eof:
 329
 330   pfile->state.lexing_comment = 0;
 331   return orig_line != pfile->line;
 332 }
 333
 334 /* pfile->buffer->cur is one beyond the \t character.  Update
 335    col_adjust so we track the column correctly.  */
 336 static void
 337 adjust_column (pfile)
 338      cpp_reader *pfile;
 339 {
 340   cpp_buffer *buffer = pfile->buffer;
 341   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 342
 343   /* Round it up to multiple of the tabstop, but subtract 1 since the
 344      tab itself occupies a character position.  */
 345   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 346                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 347 }
 348
 349 /* Skips whitespace, saving the next non-whitespace character.
 350    Adjusts pfile->col_adjust to account for tabs.  Without this,
 351    tokens might be assigned an incorrect column.  */
 352 static int
 353 skip_whitespace (pfile, c)
 354      cpp_reader *pfile;
 355      cppchar_t c;
 356 {
 357   cpp_buffer *buffer = pfile->buffer;
 358   unsigned int warned = 0;
 359
 360   do
 361     {
 362       /* Horizontal space always OK.  */
 363       if (c == ' ')
 364         ;
 365       else if (c == '\t')
 366         adjust_column (pfile);
 367       /* Just \f \v or \0 left.  */
 368       else if (c == '\0')
 369         {
 370           if (buffer->cur - 1 == buffer->rlimit)
 371             return 0;
 372           if (!warned)
 373             {
 374               cpp_warning (pfile, "null character(s) ignored");
 375               warned = 1;
 376             }
 377         }
 378       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 379         cpp_pedwarn_with_line (pfile, pfile->line,
 380                                CPP_BUF_COL (buffer),
 381                                "%s in preprocessing directive",
 382                                c == '\f' ? "form feed" : "vertical tab");
 383
 384       c = *buffer->cur++;
 385     }
 386   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 387   while (is_nvspace (c));
 388
 389   buffer->cur--;
 390   return 1;
 391 }
 392
 393 /* See if the characters of a number token are valid in a name (no
 394    '.', '+' or '-').  */
 395 static int
 396 name_p (pfile, string)
 397      cpp_reader *pfile;
 398      const cpp_string *string;
 399 {
 400   unsigned int i;
 401
 402   for (i = 0; i < string->len; i++)
 403     if (!is_idchar (string->text[i]))
 404       return 0;
 405
 406   return 1;
 407 }
 408
 409 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 410    a critical inner loop.  The common case is an identifier which has
 411    not been split by backslash-newline, does not contain a dollar
 412    sign, and has already been scanned (roughly 10:1 ratio of
 413    seen:unseen identifiers in normal code; the distribution is
 414    Poisson-like).  Second most common case is a new identifier, not
 415    split and no dollar sign.  The other possibilities are rare and
 416    have been relegated to parse_identifier_slow.  */
 417
 418 static cpp_hashnode *
 419 parse_identifier (pfile)
 420      cpp_reader *pfile;
 421 {
 422   cpp_hashnode *result;
 423   const U_CHAR *cur;
 424
 425   /* Fast-path loop.  Skim over a normal identifier.
 426      N.B. ISIDNUM does not include $.  */
 427   cur = pfile->buffer->cur;
 428   while (ISIDNUM (*cur))
 429     cur++;
 430
 431   /* Check for slow-path cases.  */
 432   if (*cur == '?' || *cur == '\\' || *cur == '$')
 433     result = parse_identifier_slow (pfile, cur);
 434   else
 435     {
 436       const U_CHAR *base = pfile->buffer->cur - 1;
 437       result = (cpp_hashnode *)
 438         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 439       pfile->buffer->cur = cur;
 440     }
 441
 442   /* Rarely, identifiers require diagnostics when lexed.
 443      XXX Has to be forced out of the fast path.  */
 444   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 445                         && !pfile->state.skipping, 0))
 446     {
 447       /* It is allowed to poison the same identifier twice.  */
 448       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 449         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 450                    NODE_NAME (result));
 451
 452       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 453          replacement list of a variadic macro.  */
 454       if (result == pfile->spec_nodes.n__VA_ARGS__
 455           && !pfile->state.va_args_ok)
 456         cpp_pedwarn (pfile,
 457         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 458     }
 459
 460   return result;
 461 }
 462
 463 /* Slow path.  This handles identifiers which have been split, and
 464    identifiers which contain dollar signs.  The part of the identifier
 465    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 466 static cpp_hashnode *
 467 parse_identifier_slow (pfile, cur)
 468      cpp_reader *pfile;
 469      const U_CHAR *cur;
 470 {
 471   cpp_buffer *buffer = pfile->buffer;
 472   const U_CHAR *base = buffer->cur - 1;
 473   struct obstack *stack = &pfile->hash_table->stack;
 474   unsigned int c, saw_dollar = 0, len;
 475
 476   /* Copy the part of the token which is known to be okay.  */
 477   obstack_grow (stack, base, cur - base);
 478
 479   /* Now process the part which isn't.  We are looking at one of
 480      '$', '\\', or '?' on entry to this loop.  */
 481   c = *cur++;
 482   buffer->cur = cur;
 483   do
 484     {
 485       while (is_idchar (c))
 486         {
 487           obstack_1grow (stack, c);
 488
 489           if (c == '$')
 490             saw_dollar++;
 491
 492           c = *buffer->cur++;
 493         }
 494
 495       /* Potential escaped newline?  */
 496       buffer->backup_to = buffer->cur - 1;
 497       if (c != '?' && c != '\\')
 498         break;
 499       c = skip_escaped_newlines (pfile);
 500     }
 501   while (is_idchar (c));
 502
 503   /* Step back over the unwanted char.  */
 504   BACKUP ();
 505
 506   /* $ is not an identifier character in the standard, but is commonly
 507      accepted as an extension.  Don't warn about it in skipped
 508      conditional blocks.  */
 509   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 510     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 511
 512   /* Identifiers are null-terminated.  */
 513   len = obstack_object_size (stack);
 514   obstack_1grow (stack, '\0');
 515
 516   return (cpp_hashnode *)
 517     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 518 }
 519
 520 /* Parse a number, skipping embedded backslash-newlines.  */
 521 static void
 522 parse_number (pfile, number, c, leading_period)
 523      cpp_reader *pfile;
 524      cpp_string *number;
 525      cppchar_t c;
 526      int leading_period;
 527 {
 528   cpp_buffer *buffer = pfile->buffer;
 529   unsigned char *dest, *limit;
 530
 531   dest = BUFF_FRONT (pfile->u_buff);
 532   limit = BUFF_LIMIT (pfile->u_buff);
 533
 534   /* Place a leading period.  */
 535   if (leading_period)
 536     {
 537       if (dest == limit)
 538         {
 539           _cpp_extend_buff (pfile, &pfile->u_buff, 1);
 540           dest = BUFF_FRONT (pfile->u_buff);
 541           limit = BUFF_LIMIT (pfile->u_buff);
 542         }
 543       *dest++ = '.';
 544     }
 545
 546   do
 547     {
 548       do
 549         {
 550           /* Need room for terminating null.  */
 551           if ((size_t) (limit - dest) < 2)
 552             {
 553               size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 554               _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 555               dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 556               limit = BUFF_LIMIT (pfile->u_buff);
 557             }
 558           *dest++ = c;
 559
 560           c = *buffer->cur++;
 561         }
 562       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 563
 564       /* Potential escaped newline?  */
 565       buffer->backup_to = buffer->cur - 1;
 566       if (c != '?' && c != '\\')
 567         break;
 568       c = skip_escaped_newlines (pfile);
 569     }
 570   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 571
 572   /* Step back over the unwanted char.  */
 573   BACKUP ();
 574
 575   /* Null-terminate the number.  */
 576   *dest = '\0';
 577
 578   number->text = BUFF_FRONT (pfile->u_buff);
 579   number->len = dest - number->text;
 580   BUFF_FRONT (pfile->u_buff) = dest + 1;
 581 }
 582
 583 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 584 static void
 585 unterminated (pfile, term)
 586      cpp_reader *pfile;
 587      int term;
 588 {
 589   cpp_error (pfile, "missing terminating %c character", term);
 590
 591   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 592     {
 593       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 594                            "possible start of unterminated string literal");
 595       pfile->mls_line = 0;
 596     }
 597 }
 598
 599 /* Subroutine of parse_string.  */
 600 static int
 601 unescaped_terminator_p (pfile, dest)
 602      cpp_reader *pfile;
 603      const unsigned char *dest;
 604 {
 605   const unsigned char *start, *temp;
 606
 607   /* In #include-style directives, terminators are not escapeable.  */
 608   if (pfile->state.angled_headers)
 609     return 1;
 610
 611   start = BUFF_FRONT (pfile->u_buff);
 612
 613   /* An odd number of consecutive backslashes represents an escaped
 614      terminator.  */
 615   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 616     ;
 617
 618   return ((dest - temp) & 1) == 0;
 619 }
 620
 621 /* Parses a string, character constant, or angle-bracketed header file
 622    name.  Handles embedded trigraphs and escaped newlines.  The stored
 623    string is guaranteed NUL-terminated, but it is not guaranteed that
 624    this is the first NUL since embedded NULs are preserved.
 625    Multi-line strings are allowed, but they are deprecated.
 626
 627    When this function returns, buffer->cur points to the next
 628    character to be processed.  */
 629 static void
 630 parse_string (pfile, token, terminator)
 631      cpp_reader *pfile;
 632      cpp_token *token;
 633      cppchar_t terminator;
 634 {
 635   cpp_buffer *buffer = pfile->buffer;
 636   unsigned char *dest, *limit;
 637   cppchar_t c;
 638   bool warned_nulls = false, warned_multi = false;
 639
 640   dest = BUFF_FRONT (pfile->u_buff);
 641   limit = BUFF_LIMIT (pfile->u_buff);
 642
 643   for (;;)
 644     {
 645       /* We need room for another char, possibly the terminating NUL.  */
 646       if ((size_t) (limit - dest) < 1)
 647         {
 648           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 649           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 650           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 651           limit = BUFF_LIMIT (pfile->u_buff);
 652         }
 653
 654       /* Handle trigraphs, escaped newlines etc.  */
 655       c = *buffer->cur++;
 656       if (c == '?' || c == '\\')
 657         c = skip_escaped_newlines (pfile);
 658
 659       if (c == terminator)
 660         {
 661           if (unescaped_terminator_p (pfile, dest))
 662             break;
 663         }
 664       else if (is_vspace (c))
 665         {
 666           /* In assembly language, silently terminate string and
 667              character literals at end of line.  This is a kludge
 668              around not knowing where comments are.  */
 669           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 670             {
 671               buffer->cur--;
 672               break;
 673             }
 674
 675           /* Character constants and header names may not extend over
 676              multiple lines.  In Standard C, neither may strings.
 677              Unfortunately, we accept multiline strings as an
 678              extension, except in #include family directives.  */
 679           if (terminator != '"' || pfile->state.angled_headers)
 680             {
 681               unterminated (pfile, terminator);
 682               buffer->cur--;
 683               break;
 684             }
 685
 686           if (!warned_multi)
 687             {
 688               warned_multi = true;
 689               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 690             }
 691
 692           if (pfile->mls_line == 0)
 693             {
 694               pfile->mls_line = token->line;
 695               pfile->mls_col = token->col;
 696             }
 697
 698           handle_newline (pfile);
 699           c = '\n';
 700         }
 701       else if (c == '\0')
 702         {
 703           if (buffer->cur - 1 == buffer->rlimit)
 704             {
 705               unterminated (pfile, terminator);
 706               buffer->cur--;
 707               break;
 708             }
 709           if (!warned_nulls)
 710             {
 711               warned_nulls = true;
 712               cpp_warning (pfile, "null character(s) preserved in literal");
 713             }
 714         }
 715
 716       *dest++ = c;
 717     }
 718
 719   *dest = '\0';
 720
 721   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 722   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 723   BUFF_FRONT (pfile->u_buff) = dest + 1;
 724 }
 725
 726 /* The stored comment includes the comment start and any terminator.  */
 727 static void
 728 save_comment (pfile, token, from)
 729      cpp_reader *pfile;
 730      cpp_token *token;
 731      const unsigned char *from;
 732 {
 733   unsigned char *buffer;
 734   unsigned int len;
 735
 736   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 737
 738   /* C++ comments probably (not definitely) have moved past a new
 739      line, which we don't want to save in the comment.  */
 740   if (is_vspace (pfile->buffer->cur[-1]))
 741     len--;
 742   buffer = _cpp_unaligned_alloc (pfile, len);
 743
 744   token->type = CPP_COMMENT;
 745   token->val.str.len = len;
 746   token->val.str.text = buffer;
 747
 748   buffer[0] = '/';
 749   memcpy (buffer + 1, from, len - 1);
 750 }
 751
 752 /* Allocate COUNT tokens for RUN.  */
 753 void
 754 _cpp_init_tokenrun (run, count)
 755      tokenrun *run;
 756      unsigned int count;
 757 {
 758   run->base = xnewvec (cpp_token, count);
 759   run->limit = run->base + count;
 760   run->next = NULL;
 761 }
 762
 763 /* Returns the next tokenrun, or creates one if there is none.  */
 764 static tokenrun *
 765 next_tokenrun (run)
 766      tokenrun *run;
 767 {
 768   if (run->next == NULL)
 769     {
 770       run->next = xnew (tokenrun);
 771       run->next->prev = run;
 772       _cpp_init_tokenrun (run->next, 250);
 773     }
 774
 775   return run->next;
 776 }
 777
 778 /* Allocate a single token that is invalidated at the same time as the
 779    rest of the tokens on the line.  Has its line and col set to the
 780    same as the last lexed token, so that diagnostics appear in the
 781    right place.  */
 782 cpp_token *
 783 _cpp_temp_token (pfile)
 784      cpp_reader *pfile;
 785 {
 786   cpp_token *old, *result;
 787
 788   old = pfile->cur_token - 1;
 789   if (pfile->cur_token == pfile->cur_run->limit)
 790     {
 791       pfile->cur_run = next_tokenrun (pfile->cur_run);
 792       pfile->cur_token = pfile->cur_run->base;
 793     }
 794
 795   result = pfile->cur_token++;
 796   result->line = old->line;
 797   result->col = old->col;
 798   return result;
 799 }
 800
 801 /* Lex a token into RESULT (external interface).  Takes care of issues
 802    like directive handling, token lookahead, multiple include
 803    optimization and skipping.  */
 804 const cpp_token *
 805 _cpp_lex_token (pfile)
 806      cpp_reader *pfile;
 807 {
 808   cpp_token *result;
 809
 810   for (;;)
 811     {
 812       if (pfile->cur_token == pfile->cur_run->limit)
 813         {
 814           pfile->cur_run = next_tokenrun (pfile->cur_run);
 815           pfile->cur_token = pfile->cur_run->base;
 816         }
 817
 818       if (pfile->lookaheads)
 819         {
 820           pfile->lookaheads--;
 821           result = pfile->cur_token++;
 822         }
 823       else
 824         result = _cpp_lex_direct (pfile);
 825
 826       if (result->flags & BOL)
 827         {
 828           /* Is this a directive.  If _cpp_handle_directive returns
 829              false, it is an assembler #.  */
 830           if (result->type == CPP_HASH
 831               && !pfile->state.parsing_args
 832               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 833             continue;
 834           if (pfile->cb.line_change && !pfile->state.skipping)
 835             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 836         }
 837
 838       /* We don't skip tokens in directives.  */
 839       if (pfile->state.in_directive)
 840         break;
 841
 842       /* Outside a directive, invalidate controlling macros.  At file
 843          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 844          get here and MI optimisation works.  */
 845       pfile->mi_valid = false;
 846
 847       if (!pfile->state.skipping || result->type == CPP_EOF)
 848         break;
 849     }
 850
 851   return result;
 852 }
 853
 854 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 855   do {                                          \
 856     if (get_effective_char (pfile) == CHAR)     \
 857       result->type = THEN_TYPE;                 \
 858     else                                        \
 859       {                                         \
 860         BACKUP ();                              \
 861         result->type = ELSE_TYPE;               \
 862       }                                         \
 863   } while (0)
 864
 865 /* Lex a token into pfile->cur_token, which is also incremented, to
 866    get diagnostics pointing to the correct location.
 867
 868    Does not handle issues such as token lookahead, multiple-include
 869    optimisation, directives, skipping etc.  This function is only
 870    suitable for use by _cpp_lex_token, and in special cases like
 871    lex_expansion_token which doesn't care for any of these issues.
 872
 873    When meeting a newline, returns CPP_EOF if parsing a directive,
 874    otherwise returns to the start of the token buffer if permissible.
 875    Returns the location of the lexed token.  */
 876 cpp_token *
 877 _cpp_lex_direct (pfile)
 878      cpp_reader *pfile;
 879 {
 880   cppchar_t c;
 881   cpp_buffer *buffer;
 882   const unsigned char *comment_start;
 883   cpp_token *result = pfile->cur_token++;
 884
 885  fresh_line:
 886   buffer = pfile->buffer;
 887   result->flags = buffer->saved_flags;
 888   buffer->saved_flags = 0;
 889  update_tokens_line:
 890   result->line = pfile->line;
 891
 892  skipped_white:
 893   c = *buffer->cur++;
 894   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 895
 896  trigraph:
 897   switch (c)
 898     {
 899     case ' ': case '\t': case '\f': case '\v': case '\0':
 900       result->flags |= PREV_WHITE;
 901       if (skip_whitespace (pfile, c))
 902         goto skipped_white;
 903
 904       /* EOF.  */
 905       buffer->cur--;
 906       buffer->saved_flags = BOL;
 907       if (!pfile->state.parsing_args && !pfile->state.in_directive)
 908         {
 909           if (buffer->cur != buffer->line_base)
 910             {
 911               /* Non-empty files should end in a newline.  Don't warn
 912                  for command line and _Pragma buffers.  */
 913               if (!buffer->from_stage3)
 914                 cpp_pedwarn (pfile, "no newline at end of file");
 915               handle_newline (pfile);
 916             }
 917
 918           /* Don't pop the last buffer.  */
 919           if (buffer->prev)
 920             {
 921               unsigned char stop = buffer->return_at_eof;
 922
 923               _cpp_pop_buffer (pfile);
 924               if (!stop)
 925                 goto fresh_line;
 926             }
 927         }
 928       result->type = CPP_EOF;
 929       break;
 930
 931     case '\n': case '\r':
 932       handle_newline (pfile);
 933       buffer->saved_flags = BOL;
 934       if (! pfile->state.in_directive)
 935         {
 936           if (pfile->state.parsing_args == 2)
 937             buffer->saved_flags |= PREV_WHITE;
 938           if (!pfile->keep_tokens)
 939             {
 940               pfile->cur_run = &pfile->base_run;
 941               result = pfile->base_run.base;
 942               pfile->cur_token = result + 1;
 943             }
 944           goto fresh_line;
 945         }
 946       result->type = CPP_EOF;
 947       break;
 948
 949     case '?':
 950     case '\\':
 951       /* These could start an escaped newline, or '?' a trigraph.  Let
 952          skip_escaped_newlines do all the work.  */
 953       {
 954         unsigned int line = pfile->line;
 955
 956         c = skip_escaped_newlines (pfile);
 957         if (line != pfile->line)
 958           {
 959             buffer->cur--;
 960             /* We had at least one escaped newline of some sort.
 961                Update the token's line and column.  */
 962             goto update_tokens_line;
 963           }
 964       }
 965
 966       /* We are either the original '?' or '\\', or a trigraph.  */
 967       if (c == '?')
 968         result->type = CPP_QUERY;
 969       else if (c == '\\')
 970         goto random_char;
 971       else
 972         goto trigraph;
 973       break;
 974
 975     case '0': case '1': case '2': case '3': case '4':
 976     case '5': case '6': case '7': case '8': case '9':
 977       result->type = CPP_NUMBER;
 978       parse_number (pfile, &result->val.str, c, 0);
 979       break;
 980
 981     case '$':
 982       if (!CPP_OPTION (pfile, dollars_in_ident))
 983         goto random_char;
 984       /* Fall through...  */
 985
 986     case '_':
 987     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 988     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 989     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 990     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 991     case 'y': case 'z':
 992     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 993     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 994     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 995     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 996     case 'Y': case 'Z':
 997       result->type = CPP_NAME;
 998       result->val.node = parse_identifier (pfile);
 999
1000       /* 'L' may introduce wide characters or strings.  */
1001       if (result->val.node == pfile->spec_nodes.n_L)
1002         {
1003           c = *buffer->cur;
1004           if (c == '\'' || c == '"')
1005             {
1006               buffer->cur++;
1007               result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1008               parse_string (pfile, result, c);
1009             }
1010         }
1011       /* Convert named operators to their proper types.  */
1012       else if (result->val.node->flags & NODE_OPERATOR)
1013         {
1014           result->flags |= NAMED_OP;
1015           result->type = result->val.node->value.operator;
1016         }
1017       break;
1018
1019     case '\'':
1020     case '"':
1021       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1022       parse_string (pfile, result, c);
1023       break;
1024
1025     case '/':
1026       /* A potential block or line comment.  */
1027       comment_start = buffer->cur;
1028       c = get_effective_char (pfile);
1029
1030       if (c == '*')
1031         {
1032           if (skip_block_comment (pfile))
1033             cpp_error (pfile, "unterminated comment");
1034         }
1035       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1036                             || CPP_IN_SYSTEM_HEADER (pfile)))
1037         {
1038           /* Warn about comments only if pedantically GNUC89, and not
1039              in system headers.  */
1040           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1041               && ! buffer->warned_cplusplus_comments)
1042             {
1043               cpp_pedwarn (pfile,
1044                            "C++ style comments are not allowed in ISO C89");
1045               cpp_pedwarn (pfile,
1046                            "(this will be reported only once per input file)");
1047               buffer->warned_cplusplus_comments = 1;
1048             }
1049
1050           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1051             cpp_warning (pfile, "multi-line comment");
1052         }
1053       else if (c == '=')
1054         {
1055           result->type = CPP_DIV_EQ;
1056           break;
1057         }
1058       else
1059         {
1060           BACKUP ();
1061           result->type = CPP_DIV;
1062           break;
1063         }
1064
1065       if (!pfile->state.save_comments)
1066         {
1067           result->flags |= PREV_WHITE;
1068           goto update_tokens_line;
1069         }
1070
1071       /* Save the comment as a token in its own right.  */
1072       save_comment (pfile, result, comment_start);
1073       break;
1074
1075     case '<':
1076       if (pfile->state.angled_headers)
1077         {
1078           result->type = CPP_HEADER_NAME;
1079           parse_string (pfile, result, '>');
1080           break;
1081         }
1082
1083       c = get_effective_char (pfile);
1084       if (c == '=')
1085         result->type = CPP_LESS_EQ;
1086       else if (c == '<')
1087         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1088       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1089         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1090       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1091         {
1092           result->type = CPP_OPEN_SQUARE;
1093           result->flags |= DIGRAPH;
1094         }
1095       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1096         {
1097           result->type = CPP_OPEN_BRACE;
1098           result->flags |= DIGRAPH;
1099         }
1100       else
1101         {
1102           BACKUP ();
1103           result->type = CPP_LESS;
1104         }
1105       break;
1106
1107     case '>':
1108       c = get_effective_char (pfile);
1109       if (c == '=')
1110         result->type = CPP_GREATER_EQ;
1111       else if (c == '>')
1112         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1113       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1114         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1115       else
1116         {
1117           BACKUP ();
1118           result->type = CPP_GREATER;
1119         }
1120       break;
1121
1122     case '%':
1123       c = get_effective_char (pfile);
1124       if (c == '=')
1125         result->type = CPP_MOD_EQ;
1126       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1127         {
1128           result->flags |= DIGRAPH;
1129           result->type = CPP_HASH;
1130           if (get_effective_char (pfile) == '%')
1131             {
1132               const unsigned char *pos = buffer->cur;
1133
1134               if (get_effective_char (pfile) == ':')
1135                 result->type = CPP_PASTE;
1136               else
1137                 buffer->cur = pos - 1;
1138             }
1139           else
1140             BACKUP ();
1141         }
1142       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1143         {
1144           result->flags |= DIGRAPH;
1145           result->type = CPP_CLOSE_BRACE;
1146         }
1147       else
1148         {
1149           BACKUP ();
1150           result->type = CPP_MOD;
1151         }
1152       break;
1153
1154     case '.':
1155       result->type = CPP_DOT;
1156       c = get_effective_char (pfile);
1157       if (c == '.')
1158         {
1159           const unsigned char *pos = buffer->cur;
1160
1161           if (get_effective_char (pfile) == '.')
1162             result->type = CPP_ELLIPSIS;
1163           else
1164             buffer->cur = pos - 1;
1165         }
1166       /* All known character sets have 0...9 contiguous.  */
1167       else if (ISDIGIT (c))
1168         {
1169           result->type = CPP_NUMBER;
1170           parse_number (pfile, &result->val.str, c, 1);
1171         }
1172       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1173         result->type = CPP_DOT_STAR;
1174       else
1175         BACKUP ();
1176       break;
1177
1178     case '+':
1179       c = get_effective_char (pfile);
1180       if (c == '+')
1181         result->type = CPP_PLUS_PLUS;
1182       else if (c == '=')
1183         result->type = CPP_PLUS_EQ;
1184       else
1185         {
1186           BACKUP ();
1187           result->type = CPP_PLUS;
1188         }
1189       break;
1190
1191     case '-':
1192       c = get_effective_char (pfile);
1193       if (c == '>')
1194         {
1195           result->type = CPP_DEREF;
1196           if (CPP_OPTION (pfile, cplusplus))
1197             {
1198               if (get_effective_char (pfile) == '*')
1199                 result->type = CPP_DEREF_STAR;
1200               else
1201                 BACKUP ();
1202             }
1203         }
1204       else if (c == '-')
1205         result->type = CPP_MINUS_MINUS;
1206       else if (c == '=')
1207         result->type = CPP_MINUS_EQ;
1208       else
1209         {
1210           BACKUP ();
1211           result->type = CPP_MINUS;
1212         }
1213       break;
1214
1215     case '&':
1216       c = get_effective_char (pfile);
1217       if (c == '&')
1218         result->type = CPP_AND_AND;
1219       else if (c == '=')
1220         result->type = CPP_AND_EQ;
1221       else
1222         {
1223           BACKUP ();
1224           result->type = CPP_AND;
1225         }
1226       break;
1227
1228     case '|':
1229       c = get_effective_char (pfile);
1230       if (c == '|')
1231         result->type = CPP_OR_OR;
1232       else if (c == '=')
1233         result->type = CPP_OR_EQ;
1234       else
1235         {
1236           BACKUP ();
1237           result->type = CPP_OR;
1238         }
1239       break;
1240
1241     case ':':
1242       c = get_effective_char (pfile);
1243       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1244         result->type = CPP_SCOPE;
1245       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1246         {
1247           result->flags |= DIGRAPH;
1248           result->type = CPP_CLOSE_SQUARE;
1249         }
1250       else
1251         {
1252           BACKUP ();
1253           result->type = CPP_COLON;
1254         }
1255       break;
1256
1257     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1258     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1259     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1260     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1261     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1262
1263     case '~': result->type = CPP_COMPL; break;
1264     case ',': result->type = CPP_COMMA; break;
1265     case '(': result->type = CPP_OPEN_PAREN; break;
1266     case ')': result->type = CPP_CLOSE_PAREN; break;
1267     case '[': result->type = CPP_OPEN_SQUARE; break;
1268     case ']': result->type = CPP_CLOSE_SQUARE; break;
1269     case '{': result->type = CPP_OPEN_BRACE; break;
1270     case '}': result->type = CPP_CLOSE_BRACE; break;
1271     case ';': result->type = CPP_SEMICOLON; break;
1272
1273       /* @ is a punctuator in Objective C.  */
1274     case '@': result->type = CPP_ATSIGN; break;
1275
1276     random_char:
1277     default:
1278       result->type = CPP_OTHER;
1279       result->val.c = c;
1280       break;
1281     }
1282
1283   return result;
1284 }
1285
1286 /* An upper bound on the number of bytes needed to spell a token,
1287    including preceding whitespace.  */
1288 unsigned int
1289 cpp_token_len (token)
1290      const cpp_token *token;
1291 {
1292   unsigned int len;
1293
1294   switch (TOKEN_SPELL (token))
1295     {
1296     default:            len = 0;                                break;
1297     case SPELL_NUMBER:
1298     case SPELL_STRING:  len = token->val.str.len;               break;
1299     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1300     }
1301   /* 1 for whitespace, 4 for comment delimiters.  */
1302   return len + 5;
1303 }
1304
1305 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1306    already contain the enough space to hold the token's spelling.
1307    Returns a pointer to the character after the last character
1308    written.  */
1309 unsigned char *
1310 cpp_spell_token (pfile, token, buffer)
1311      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1312      const cpp_token *token;
1313      unsigned char *buffer;
1314 {
1315   switch (TOKEN_SPELL (token))
1316     {
1317     case SPELL_OPERATOR:
1318       {
1319         const unsigned char *spelling;
1320         unsigned char c;
1321
1322         if (token->flags & DIGRAPH)
1323           spelling
1324             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1325         else if (token->flags & NAMED_OP)
1326           goto spell_ident;
1327         else
1328           spelling = TOKEN_NAME (token);
1329
1330         while ((c = *spelling++) != '\0')
1331           *buffer++ = c;
1332       }
1333       break;
1334
1335     case SPELL_CHAR:
1336       *buffer++ = token->val.c;
1337       break;
1338
1339     spell_ident:
1340     case SPELL_IDENT:
1341       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1342       buffer += NODE_LEN (token->val.node);
1343       break;
1344
1345     case SPELL_NUMBER:
1346       memcpy (buffer, token->val.str.text, token->val.str.len);
1347       buffer += token->val.str.len;
1348       break;
1349
1350     case SPELL_STRING:
1351       {
1352         int left, right, tag;
1353         switch (token->type)
1354           {
1355           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1356           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1357           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1358           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1359           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1360           default:
1361             cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1362             return buffer;
1363           }
1364         if (tag) *buffer++ = tag;
1365         *buffer++ = left;
1366         memcpy (buffer, token->val.str.text, token->val.str.len);
1367         buffer += token->val.str.len;
1368         *buffer++ = right;
1369       }
1370       break;
1371
1372     case SPELL_NONE:
1373       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1374       break;
1375     }
1376
1377   return buffer;
1378 }
1379
1380 /* Returns a token as a null-terminated string.  The string is
1381    temporary, and automatically freed later.  Useful for diagnostics.  */
1382 unsigned char *
1383 cpp_token_as_text (pfile, token)
1384      cpp_reader *pfile;
1385      const cpp_token *token;
1386 {
1387   unsigned int len = cpp_token_len (token);
1388   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1389
1390   end = cpp_spell_token (pfile, token, start);
1391   end[0] = '\0';
1392
1393   return start;
1394 }
1395
1396 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1397 const char *
1398 cpp_type2name (type)
1399      enum cpp_ttype type;
1400 {
1401   return (const char *) token_spellings[type].name;
1402 }
1403
1404 /* Writes the spelling of token to FP, without any preceding space.
1405    Separated from cpp_spell_token for efficiency - to avoid stdio
1406    double-buffering.  */
1407 void
1408 cpp_output_token (token, fp)
1409      const cpp_token *token;
1410      FILE *fp;
1411 {
1412   switch (TOKEN_SPELL (token))
1413     {
1414     case SPELL_OPERATOR:
1415       {
1416         const unsigned char *spelling;
1417         int c;
1418
1419         if (token->flags & DIGRAPH)
1420           spelling
1421             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1422         else if (token->flags & NAMED_OP)
1423           goto spell_ident;
1424         else
1425           spelling = TOKEN_NAME (token);
1426
1427         c = *spelling;
1428         do
1429           putc (c, fp);
1430         while ((c = *++spelling) != '\0');
1431       }
1432       break;
1433
1434     case SPELL_CHAR:
1435       putc (token->val.c, fp);
1436       break;
1437
1438     spell_ident:
1439     case SPELL_IDENT:
1440       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1441     break;
1442
1443     case SPELL_NUMBER:
1444       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1445       break;
1446
1447     case SPELL_STRING:
1448       {
1449         int left, right, tag;
1450         switch (token->type)
1451           {
1452           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1453           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1454           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1455           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1456           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1457           default:
1458             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1459             return;
1460           }
1461         if (tag) putc (tag, fp);
1462         putc (left, fp);
1463         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1464         putc (right, fp);
1465       }
1466       break;
1467
1468     case SPELL_NONE:
1469       /* An error, most probably.  */
1470       break;
1471     }
1472 }
1473
1474 /* Compare two tokens.  */
1475 int
1476 _cpp_equiv_tokens (a, b)
1477      const cpp_token *a, *b;
1478 {
1479   if (a->type == b->type && a->flags == b->flags)
1480     switch (TOKEN_SPELL (a))
1481       {
1482       default:                  /* Keep compiler happy.  */
1483       case SPELL_OPERATOR:
1484         return 1;
1485       case SPELL_CHAR:
1486         return a->val.c == b->val.c; /* Character.  */
1487       case SPELL_NONE:
1488         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1489       case SPELL_IDENT:
1490         return a->val.node == b->val.node;
1491       case SPELL_NUMBER:
1492       case SPELL_STRING:
1493         return (a->val.str.len == b->val.str.len
1494                 && !memcmp (a->val.str.text, b->val.str.text,
1495                             a->val.str.len));
1496       }
1497
1498   return 0;
1499 }
1500
1501 /* Returns nonzero if a space should be inserted to avoid an
1502    accidental token paste for output.  For simplicity, it is
1503    conservative, and occasionally advises a space where one is not
1504    needed, e.g. "." and ".2".  */
1505
1506 int
1507 cpp_avoid_paste (pfile, token1, token2)
1508      cpp_reader *pfile;
1509      const cpp_token *token1, *token2;
1510 {
1511   enum cpp_ttype a = token1->type, b = token2->type;
1512   cppchar_t c;
1513
1514   if (token1->flags & NAMED_OP)
1515     a = CPP_NAME;
1516   if (token2->flags & NAMED_OP)
1517     b = CPP_NAME;
1518
1519   c = EOF;
1520   if (token2->flags & DIGRAPH)
1521     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1522   else if (token_spellings[b].category == SPELL_OPERATOR)
1523     c = token_spellings[b].name[0];
1524
1525   /* Quickly get everything that can paste with an '='.  */
1526   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1527     return 1;
1528
1529   switch (a)
1530     {
1531     case CPP_GREATER:   return c == '>' || c == '?';
1532     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1533     case CPP_PLUS:      return c == '+';
1534     case CPP_MINUS:     return c == '-' || c == '>';
1535     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1536     case CPP_MOD:       return c == ':' || c == '>';
1537     case CPP_AND:       return c == '&';
1538     case CPP_OR:        return c == '|';
1539     case CPP_COLON:     return c == ':' || c == '>';
1540     case CPP_DEREF:     return c == '*';
1541     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1542     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1543     case CPP_NAME:      return ((b == CPP_NUMBER
1544                                  && name_p (pfile, &token2->val.str))
1545                                 || b == CPP_NAME
1546                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1547     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1548                                 || c == '.' || c == '+' || c == '-');
1549     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1550                                 && token1->val.c == '@'
1551                                 && (b == CPP_NAME || b == CPP_STRING));
1552     default:            break;
1553     }
1554
1555   return 0;
1556 }
1557
1558 /* Output all the remaining tokens on the current line, and a newline
1559    character, to FP.  Leading whitespace is removed.  If there are
1560    macros, special token padding is not performed.  */
1561 void
1562 cpp_output_line (pfile, fp)
1563      cpp_reader *pfile;
1564      FILE *fp;
1565 {
1566   const cpp_token *token;
1567
1568   token = cpp_get_token (pfile);
1569   while (token->type != CPP_EOF)
1570     {
1571       cpp_output_token (token, fp);
1572       token = cpp_get_token (pfile);
1573       if (token->flags & PREV_WHITE)
1574         putc (' ', fp);
1575     }
1576
1577   putc ('\n', fp);
1578 }
1579
1580 /* Returns the value of a hexadecimal digit.  */
1581 static unsigned int
1582 hex_digit_value (c)
1583      unsigned int c;
1584 {
1585   if (hex_p (c))
1586     return hex_value (c);
1587   else
1588     abort ();
1589 }
1590
1591 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1592    failure if cpplib is not parsing C++ or C99.  Such failure is
1593    silent, and no variables are updated.  Otherwise returns 0, and
1594    warns if -Wtraditional.
1595
1596    [lex.charset]: The character designated by the universal character
1597    name \UNNNNNNNN is that character whose character short name in
1598    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1599    universal character name \uNNNN is that character whose character
1600    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1601    for a universal character name is less than 0x20 or in the range
1602    0x7F-0x9F (inclusive), or if the universal character name
1603    designates a character in the basic source character set, then the
1604    program is ill-formed.
1605
1606    We assume that wchar_t is Unicode, so we don't need to do any
1607    mapping.  Is this ever wrong?
1608
1609    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1610    LIMIT is the end of the string or charconst.  PSTR is updated to
1611    point after the UCS on return, and the UCS is written into PC.  */
1612
1613 static int
1614 maybe_read_ucs (pfile, pstr, limit, pc)
1615      cpp_reader *pfile;
1616      const unsigned char **pstr;
1617      const unsigned char *limit;
1618      unsigned int *pc;
1619 {
1620   const unsigned char *p = *pstr;
1621   unsigned int code = 0;
1622   unsigned int c = *pc, length;
1623
1624   /* Only attempt to interpret a UCS for C++ and C99.  */
1625   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1626     return 1;
1627
1628   if (CPP_WTRADITIONAL (pfile))
1629     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1630
1631   length = (c == 'u' ? 4: 8);
1632
1633   if ((size_t) (limit - p) < length)
1634     {
1635       cpp_error (pfile, "incomplete universal-character-name");
1636       /* Skip to the end to avoid more diagnostics.  */
1637       p = limit;
1638     }
1639   else
1640     {
1641       for (; length; length--, p++)
1642         {
1643           c = *p;
1644           if (ISXDIGIT (c))
1645             code = (code << 4) + hex_digit_value (c);
1646           else
1647             {
1648               cpp_error (pfile,
1649                          "non-hex digit '%c' in universal-character-name", c);
1650               /* We shouldn't skip in case there are multibyte chars.  */
1651               break;
1652             }
1653         }
1654     }
1655
1656 #ifdef TARGET_EBCDIC
1657   cpp_error (pfile, "universal-character-name on EBCDIC target");
1658   code = 0x3f;  /* EBCDIC invalid character */
1659 #else
1660  /* True extended characters are OK.  */
1661   if (code >= 0xa0
1662       && !(code & 0x80000000)
1663       && !(code >= 0xD800 && code <= 0xDFFF))
1664     ;
1665   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1666      hex escapes so that this also works with EBCDIC hosts.  */
1667   else if (code == 0x24 || code == 0x40 || code == 0x60)
1668     ;
1669   /* Don't give another error if one occurred above.  */
1670   else if (length == 0)
1671     cpp_error (pfile, "universal-character-name out of range");
1672 #endif
1673
1674   *pstr = p;
1675   *pc = code;
1676   return 0;
1677 }
1678
1679 /* Interpret an escape sequence, and return its value.  PSTR points to
1680    the input pointer, which is just after the backslash.  LIMIT is how
1681    much text we have.  MASK is a bitmask for the precision for the
1682    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1683    interpret escapes that did not exist in traditional C.
1684
1685    Handles all relevant diagnostics.  */
1686
1687 unsigned int
1688 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1689      cpp_reader *pfile;
1690      const unsigned char **pstr;
1691      const unsigned char *limit;
1692      unsigned HOST_WIDE_INT mask;
1693      int traditional;
1694 {
1695   int unknown = 0;
1696   const unsigned char *str = *pstr;
1697   unsigned int c = *str++;
1698
1699   switch (c)
1700     {
1701     case '\\': case '\'': case '"': case '?': break;
1702     case 'b': c = TARGET_BS;      break;
1703     case 'f': c = TARGET_FF;      break;
1704     case 'n': c = TARGET_NEWLINE; break;
1705     case 'r': c = TARGET_CR;      break;
1706     case 't': c = TARGET_TAB;     break;
1707     case 'v': c = TARGET_VT;      break;
1708
1709     case '(': case '{': case '[': case '%':
1710       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1711          '\%' is used to prevent SCCS from getting confused.  */
1712       unknown = CPP_PEDANTIC (pfile);
1713       break;
1714
1715     case 'a':
1716       if (CPP_WTRADITIONAL (pfile))
1717         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1718       if (!traditional)
1719         c = TARGET_BELL;
1720       break;
1721
1722     case 'e': case 'E':
1723       if (CPP_PEDANTIC (pfile))
1724         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1725       c = TARGET_ESC;
1726       break;
1727
1728     case 'u': case 'U':
1729       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1730       break;
1731
1732     case 'x':
1733       if (CPP_WTRADITIONAL (pfile))
1734         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1735
1736       if (!traditional)
1737         {
1738           unsigned int i = 0, overflow = 0;
1739           int digits_found = 0;
1740
1741           while (str < limit)
1742             {
1743               c = *str;
1744               if (! ISXDIGIT (c))
1745                 break;
1746               str++;
1747               overflow |= i ^ (i << 4 >> 4);
1748               i = (i << 4) + hex_digit_value (c);
1749               digits_found = 1;
1750             }
1751
1752           if (!digits_found)
1753             cpp_error (pfile, "\\x used with no following hex digits");
1754
1755           if (overflow | (i != (i & mask)))
1756             {
1757               cpp_pedwarn (pfile, "hex escape sequence out of range");
1758               i &= mask;
1759             }
1760           c = i;
1761         }
1762       break;
1763
1764     case '0':  case '1':  case '2':  case '3':
1765     case '4':  case '5':  case '6':  case '7':
1766       {
1767         unsigned int i = c - '0';
1768         int count = 0;
1769
1770         while (str < limit && ++count < 3)
1771           {
1772             c = *str;
1773             if (c < '0' || c > '7')
1774               break;
1775             str++;
1776             i = (i << 3) + c - '0';
1777           }
1778
1779         if (i != (i & mask))
1780           {
1781             cpp_pedwarn (pfile, "octal escape sequence out of range");
1782             i &= mask;
1783           }
1784         c = i;
1785       }
1786       break;
1787
1788     default:
1789       unknown = 1;
1790       break;
1791     }
1792
1793   if (unknown)
1794     {
1795       if (ISGRAPH (c))
1796         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1797       else
1798         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1799     }
1800
1801   if (c > mask)
1802     cpp_pedwarn (pfile, "escape sequence out of range for character");
1803
1804   *pstr = str;
1805   return c;
1806 }
1807
1808 #ifndef MAX_CHAR_TYPE_SIZE
1809 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1810 #endif
1811
1812 #ifndef MAX_WCHAR_TYPE_SIZE
1813 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1814 #endif
1815
1816 /* Interpret a (possibly wide) character constant in TOKEN.
1817    WARN_MULTI warns about multi-character charconsts, if not
1818    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1819    that did not exist in traditional C.  PCHARS_SEEN points to a
1820    variable that is filled in with the number of characters seen.  */
1821 HOST_WIDE_INT
1822 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1823      cpp_reader *pfile;
1824      const cpp_token *token;
1825      int warn_multi;
1826      int traditional;
1827      unsigned int *pchars_seen;
1828 {
1829   const unsigned char *str = token->val.str.text;
1830   const unsigned char *limit = str + token->val.str.len;
1831   unsigned int chars_seen = 0;
1832   unsigned int width, max_chars, c;
1833   unsigned HOST_WIDE_INT mask;
1834   HOST_WIDE_INT result = 0;
1835
1836 #ifdef MULTIBYTE_CHARS
1837   (void) local_mbtowc (NULL, NULL, 0);
1838 #endif
1839
1840   /* Width in bits.  */
1841   if (token->type == CPP_CHAR)
1842     width = MAX_CHAR_TYPE_SIZE;
1843   else
1844     width = MAX_WCHAR_TYPE_SIZE;
1845
1846   if (width < HOST_BITS_PER_WIDE_INT)
1847     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1848   else
1849     mask = ~0;
1850   max_chars = HOST_BITS_PER_WIDE_INT / width;
1851
1852   while (str < limit)
1853     {
1854 #ifdef MULTIBYTE_CHARS
1855       wchar_t wc;
1856       int char_len;
1857
1858       char_len = local_mbtowc (&wc, str, limit - str);
1859       if (char_len == -1)
1860         {
1861           cpp_warning (pfile, "ignoring invalid multibyte character");
1862           c = *str++;
1863         }
1864       else
1865         {
1866           str += char_len;
1867           c = wc;
1868         }
1869 #else
1870       c = *str++;
1871 #endif
1872
1873       if (c == '\\')
1874         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1875
1876 #ifdef MAP_CHARACTER
1877       if (ISPRINT (c))
1878         c = MAP_CHARACTER (c);
1879 #endif
1880
1881       /* Merge character into result; ignore excess chars.  */
1882       if (++chars_seen <= max_chars)
1883         {
1884           if (width < HOST_BITS_PER_WIDE_INT)
1885             result = (result << width) | (c & mask);
1886           else
1887             result = c;
1888         }
1889     }
1890
1891   if (chars_seen == 0)
1892     cpp_error (pfile, "empty character constant");
1893   else if (chars_seen > max_chars)
1894     {
1895       chars_seen = max_chars;
1896       cpp_warning (pfile, "character constant too long");
1897     }
1898   else if (chars_seen > 1 && !traditional && warn_multi)
1899     cpp_warning (pfile, "multi-character character constant");
1900
1901   /* If char type is signed, sign-extend the constant.  The
1902      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1903   if (token->type == CPP_CHAR && chars_seen)
1904     {
1905       unsigned int nbits = chars_seen * width;
1906
1907       mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
1908       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1909           || ((result >> (nbits - 1)) & 1) == 0)
1910         result &= mask;
1911       else
1912         result |= ~mask;
1913     }
1914
1915   *pchars_seen = chars_seen;
1916   return result;
1917 }
1918
1919 /* Memory buffers.  Changing these three constants can have a dramatic
1920    effect on performance.  The values here are reasonable defaults,
1921    but might be tuned.  If you adjust them, be sure to test across a
1922    range of uses of cpplib, including heavy nested function-like macro
1923    expansion.  Also check the change in peak memory usage (NJAMD is a
1924    good tool for this).  */
1925 #define MIN_BUFF_SIZE 8000
1926 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1927 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1928         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1929
1930 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1931   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1932 #endif
1933
1934 struct dummy
1935 {
1936   char c;
1937   union
1938   {
1939     double d;
1940     int *p;
1941   } u;
1942 };
1943
1944 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1945 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1946
1947 /* Create a new allocation buffer.  Place the control block at the end
1948    of the buffer, so that buffer overflows will cause immediate chaos.  */
1949 static _cpp_buff *
1950 new_buff (len)
1951      size_t len;
1952 {
1953   _cpp_buff *result;
1954   unsigned char *base;
1955
1956   if (len < MIN_BUFF_SIZE)
1957     len = MIN_BUFF_SIZE;
1958   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1959
1960   base = xmalloc (len + sizeof (_cpp_buff));
1961   result = (_cpp_buff *) (base + len);
1962   result->base = base;
1963   result->cur = base;
1964   result->limit = base + len;
1965   result->next = NULL;
1966   return result;
1967 }
1968
1969 /* Place a chain of unwanted allocation buffers on the free list.  */
1970 void
1971 _cpp_release_buff (pfile, buff)
1972      cpp_reader *pfile;
1973      _cpp_buff *buff;
1974 {
1975   _cpp_buff *end = buff;
1976
1977   while (end->next)
1978     end = end->next;
1979   end->next = pfile->free_buffs;
1980   pfile->free_buffs = buff;
1981 }
1982
1983 /* Return a free buffer of size at least MIN_SIZE.  */
1984 _cpp_buff *
1985 _cpp_get_buff (pfile, min_size)
1986      cpp_reader *pfile;
1987      size_t min_size;
1988 {
1989   _cpp_buff *result, **p;
1990
1991   for (p = &pfile->free_buffs;; p = &(*p)->next)
1992     {
1993       size_t size;
1994
1995       if (*p == NULL)
1996         return new_buff (min_size);
1997       result = *p;
1998       size = result->limit - result->base;
1999       /* Return a buffer that's big enough, but don't waste one that's
2000          way too big.  */
2001       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2002         break;
2003     }
2004
2005   *p = result->next;
2006   result->next = NULL;
2007   result->cur = result->base;
2008   return result;
2009 }
2010
2011 /* Creates a new buffer with enough space to hold the uncommitted
2012    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2013    the excess bytes to the new buffer.  Chains the new buffer after
2014    BUFF, and returns the new buffer.  */
2015 _cpp_buff *
2016 _cpp_append_extend_buff (pfile, buff, min_extra)
2017      cpp_reader *pfile;
2018      _cpp_buff *buff;
2019      size_t min_extra;
2020 {
2021   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2022   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2023
2024   buff->next = new_buff;
2025   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2026   return new_buff;
2027 }
2028
2029 /* Creates a new buffer with enough space to hold the uncommitted
2030    remaining bytes of the buffer pointed to by BUFF, and at least
2031    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2032    Chains the new buffer before the buffer pointed to by BUFF, and
2033    updates the pointer to point to the new buffer.  */
2034 void
2035 _cpp_extend_buff (pfile, pbuff, min_extra)
2036      cpp_reader *pfile;
2037      _cpp_buff **pbuff;
2038      size_t min_extra;
2039 {
2040   _cpp_buff *new_buff, *old_buff = *pbuff;
2041   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2042
2043   new_buff = _cpp_get_buff (pfile, size);
2044   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2045   new_buff->next = old_buff;
2046   *pbuff = new_buff;
2047 }
2048
2049 /* Free a chain of buffers starting at BUFF.  */
2050 void
2051 _cpp_free_buff (buff)
2052      _cpp_buff *buff;
2053 {
2054   _cpp_buff *next;
2055
2056   for (; buff; buff = next)
2057     {
2058       next = buff->next;
2059       free (buff->base);
2060     }
2061 }
2062
2063 /* Allocate permanent, unaligned storage of length LEN.  */
2064 unsigned char *
2065 _cpp_unaligned_alloc (pfile, len)
2066      cpp_reader *pfile;
2067      size_t len;
2068 {
2069   _cpp_buff *buff = pfile->u_buff;
2070   unsigned char *result = buff->cur;
2071
2072   if (len > (size_t) (buff->limit - result))
2073     {
2074       buff = _cpp_get_buff (pfile, len);
2075       buff->next = pfile->u_buff;
2076       pfile->u_buff = buff;
2077       result = buff->cur;
2078     }
2079
2080   buff->cur = result + len;
2081   return result;
2082 }
2083
2084 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2085    That buffer is used for growing allocations when saving macro
2086    replacement lists in a #define, and when parsing an answer to an
2087    assertion in #assert, #unassert or #if (and therefore possibly
2088    whilst expanding macros).  It therefore must not be used by any
2089    code that they might call: specifically the lexer and the guts of
2090    the macro expander.
2091
2092    All existing other uses clearly fit this restriction: storing
2093    registered pragmas during initialization.  */
2094 unsigned char *
2095 _cpp_aligned_alloc (pfile, len)
2096      cpp_reader *pfile;
2097      size_t len;
2098 {
2099   _cpp_buff *buff = pfile->a_buff;
2100   unsigned char *result = buff->cur;
2101
2102   if (len > (size_t) (buff->limit - result))
2103     {
2104       buff = _cpp_get_buff (pfile, len);
2105       buff->next = pfile->a_buff;
2106       pfile->a_buff = buff;
2107       result = buff->cur;
2108     }
2109
2110   buff->cur = result + len;
2111   return result;
2112 }