gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "intl.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 #ifdef HAVE_MMAP_FILE
  30 # include <sys/mman.h>
  31 #endif
  32
  33 #define PEEKBUF(BUFFER, N) \
  34   ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
  35 #define GETBUF(BUFFER) \
  36   ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
  37 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
  38
  39 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
  40 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
  41 #define GETC() GETBUF (CPP_BUFFER (pfile))
  42 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
  43
  44 static void skip_block_comment  PARAMS ((cpp_reader *));
  45 static void skip_line_comment   PARAMS ((cpp_reader *));
  46 static int maybe_macroexpand    PARAMS ((cpp_reader *, long));
  47 static int skip_comment         PARAMS ((cpp_reader *, int));
  48 static int copy_comment         PARAMS ((cpp_reader *, int));
  49 static void skip_string         PARAMS ((cpp_reader *, int));
  50 static void parse_string        PARAMS ((cpp_reader *, int));
  51 static U_CHAR *find_position    PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
  52 static void null_warning        PARAMS ((cpp_reader *, unsigned int));
  53
  54 static void safe_fwrite         PARAMS ((cpp_reader *, const U_CHAR *,
  55                                          size_t, FILE *));
  56 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
  57                                          unsigned int));
  58 static void bump_column         PARAMS ((cpp_printer *, unsigned int,
  59                                          unsigned int));
  60 static void expand_name_space   PARAMS ((cpp_toklist *, unsigned int));
  61 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
  62                                          unsigned int));
  63
  64 #define auto_expand_name_space(list) \
  65     expand_name_space ((list), 1 + (list)->name_cap / 2)
  66
  67 #ifdef NEW_LEXER
  68
  69 void init_trigraph_map PARAMS ((void));
  70 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
  71                                                 unsigned char *));
  72 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
  73                                                      const unsigned char *));
  74 static int skip_block_comment2 PARAMS ((cpp_reader *));
  75 static int skip_line_comment2 PARAMS ((cpp_reader *));
  76 static void skip_whitespace PARAMS ((cpp_reader *, int));
  77 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  78 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  79 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
  80                                   unsigned int, int));
  81 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
  82 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
  83                                   const unsigned char *,
  84                                   unsigned int, unsigned int));
  85 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
  86
  87 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
  88
  89 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
  90                                             unsigned char *, int));
  91
  92 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
  93                                           cpp_token *));
  94
  95 /* Macros on a cpp_name.  */
  96 #define INIT_TOKEN_NAME(list, token) \
  97   do {(token)->val.name.len = 0; \
  98       (token)->val.name.text = (list)->namebuf + (list)->name_used; \
  99       (list)->tokens_used = token - (list)->tokens + 1; \
 100   } while (0)
 101
 102 /* Maybe put these in the ISTABLE eventually.  */
 103 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
 104 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
 105
 106 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 107    character, if any, is in buffer.  */
 108 #define handle_newline(cur, limit, c) \
 109   do {\
 110   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 111     (cur)++; \
 112   CPP_BUMP_LINE_CUR (pfile, (cur)); \
 113   pfile->col_adjust = 0; \
 114   } while (0)
 115
 116 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
 117 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 118
 119 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
 120 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
 121 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
 122 #define BACKUP_DIGRAPH(ttype) do { \
 123   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 124
 125 /* An upper bound on the number of bytes needed to spell a token,
 126    including preceding whitespace.  */
 127 #define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
 128                                SPELL_NONE ? (token)->val.name.len: 0))
 129
 130 #endif
 131
 132 /* Order here matters.  Those beyond SPELL_NONE store their spelling
 133    in the token list, and it's length in the token->val.name.len.  */
 134 enum spell_type
 135 {
 136   SPELL_OPERATOR = 0,
 137   SPELL_NONE,
 138   SPELL_CHAR,    /* FIXME: revert order of NONE and CHAR after transition. */
 139   SPELL_IDENT,
 140   SPELL_STRING
 141 };
 142
 143 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
 144 #define I(e, s) {SPELL_IDENT, s},
 145 #define S(e, s) {SPELL_STRING, s},
 146 #define C(e, s) {SPELL_CHAR, s},
 147 #define N(e, s) {SPELL_NONE, s},
 148
 149 static const struct token_spelling
 150 {
 151   ENUM_BITFIELD(spell_type) type : CHAR_BIT;
 152   const U_CHAR *spelling;
 153 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 154
 155 #undef T
 156 #undef I
 157 #undef S
 158 #undef C
 159 #undef N
 160
 161 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 162
 163 void
 164 _cpp_grow_token_buffer (pfile, n)
 165      cpp_reader *pfile;
 166      long n;
 167 {
 168   long old_written = CPP_WRITTEN (pfile);
 169   pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
 170   pfile->token_buffer = (U_CHAR *)
 171     xrealloc(pfile->token_buffer, pfile->token_buffer_size);
 172   CPP_SET_WRITTEN (pfile, old_written);
 173 }
 174
 175 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
 176    If BUFFER != NULL, then use the LENGTH characters in BUFFER
 177    as the new input buffer.
 178    Return the new buffer, or NULL on failure.  */
 179
 180 cpp_buffer *
 181 cpp_push_buffer (pfile, buffer, length)
 182      cpp_reader *pfile;
 183      const U_CHAR *buffer;
 184      long length;
 185 {
 186   cpp_buffer *buf = CPP_BUFFER (pfile);
 187   cpp_buffer *new;
 188   if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
 189     {
 190       cpp_fatal (pfile, "macro or `#include' recursion too deep");
 191       return NULL;
 192     }
 193
 194   new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
 195
 196   new->buf = new->cur = buffer;
 197   new->rlimit = buffer + length;
 198   new->prev = buf;
 199   new->mark = NULL;
 200   new->line_base = NULL;
 201
 202   CPP_BUFFER (pfile) = new;
 203   return new;
 204 }
 205
 206 cpp_buffer *
 207 cpp_pop_buffer (pfile)
 208      cpp_reader *pfile;
 209 {
 210   cpp_buffer *buf = CPP_BUFFER (pfile);
 211   if (ACTIVE_MARK_P (pfile))
 212     cpp_ice (pfile, "mark active in cpp_pop_buffer");
 213
 214   if (buf->ihash)
 215     {
 216       _cpp_unwind_if_stack (pfile, buf);
 217       if (buf->buf)
 218         free ((PTR) buf->buf);
 219       if (pfile->system_include_depth)
 220         pfile->system_include_depth--;
 221       if (pfile->potential_control_macro)
 222         {
 223           buf->ihash->cmacro = pfile->potential_control_macro;
 224           pfile->potential_control_macro = 0;
 225         }
 226       pfile->input_stack_listing_current = 0;
 227     }
 228   else if (buf->macro)
 229     {
 230       cpp_hashnode *m = buf->macro;
 231
 232       m->disabled = 0;
 233       if ((m->type == T_FMACRO && buf->mapped)
 234           || m->type == T_SPECLINE || m->type == T_FILE
 235           || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
 236           || m->type == T_STDC)
 237         free ((PTR) buf->buf);
 238     }
 239   CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
 240   free (buf);
 241   pfile->buffer_stack_depth--;
 242   return CPP_BUFFER (pfile);
 243 }
 244
 245 /* Deal with the annoying semantics of fwrite.  */
 246 static void
 247 safe_fwrite (pfile, buf, len, fp)
 248      cpp_reader *pfile;
 249      const U_CHAR *buf;
 250      size_t len;
 251      FILE *fp;
 252 {
 253   size_t count;
 254
 255   while (len)
 256     {
 257       count = fwrite (buf, 1, len, fp);
 258       if (count == 0)
 259         goto error;
 260       len -= count;
 261       buf += count;
 262     }
 263   return;
 264
 265  error:
 266   cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
 267 }
 268
 269 /* Notify the compiler proper that the current line number has jumped,
 270    or the current file name has changed.  */
 271
 272 static void
 273 output_line_command (pfile, print, line)
 274      cpp_reader *pfile;
 275      cpp_printer *print;
 276      unsigned int line;
 277 {
 278   cpp_buffer *ip = cpp_file_buffer (pfile);
 279   enum { same = 0, enter, leave, rname } change;
 280   static const char * const codes[] = { "", " 1", " 2", "" };
 281
 282   if (CPP_OPTION (pfile, no_line_commands))
 283     return;
 284
 285   /* Determine whether the current filename has changed, and if so,
 286      how.  'nominal_fname' values are unique, so they can be compared
 287      by comparing pointers.  */
 288   if (ip->nominal_fname == print->last_fname)
 289     change = same;
 290   else
 291     {
 292       if (pfile->buffer_stack_depth == print->last_bsd)
 293         change = rname;
 294       else
 295         {
 296           if (pfile->buffer_stack_depth > print->last_bsd)
 297             change = enter;
 298           else
 299             change = leave;
 300           print->last_bsd = pfile->buffer_stack_depth;
 301         }
 302       print->last_fname = ip->nominal_fname;
 303     }
 304   /* If the current file has not changed, we can output a few newlines
 305      instead if we want to increase the line number by a small amount.
 306      We cannot do this if print->lineno is zero, because that means we
 307      haven't output any line commands yet.  (The very first line
 308      command output is a `same_file' command.)  */
 309   if (change == same && print->lineno != 0
 310       && line >= print->lineno && line < print->lineno + 8)
 311     {
 312       while (line > print->lineno)
 313         {
 314           putc ('\n', print->outf);
 315           print->lineno++;
 316         }
 317       return;
 318     }
 319
 320 #ifndef NO_IMPLICIT_EXTERN_C
 321   if (CPP_OPTION (pfile, cplusplus))
 322     fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
 323              codes[change],
 324              ip->system_header_p ? " 3" : "",
 325              (ip->system_header_p == 2) ? " 4" : "");
 326   else
 327 #endif
 328     fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
 329              codes[change],
 330              ip->system_header_p ? " 3" : "");
 331   print->lineno = line;
 332 }
 333
 334 /* Write the contents of the token_buffer to the output stream, and
 335    clear the token_buffer.  Also handles generating line commands and
 336    keeping track of file transitions.  */
 337
 338 void
 339 cpp_output_tokens (pfile, print)
 340      cpp_reader *pfile;
 341      cpp_printer *print;
 342 {
 343   cpp_buffer *ip;
 344
 345   if (CPP_WRITTEN (pfile) - print->written)
 346     {
 347       if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
 348         print->lineno++;
 349       safe_fwrite (pfile, pfile->token_buffer,
 350                    CPP_WRITTEN (pfile) - print->written, print->outf);
 351     }
 352
 353   ip = cpp_file_buffer (pfile);
 354   if (ip)
 355     output_line_command (pfile, print, CPP_BUF_LINE (ip));
 356
 357   CPP_SET_WRITTEN (pfile, print->written);
 358 }
 359
 360 /* Helper for cpp_output_list - increases the column number to match
 361    what we expect it to be.  */
 362
 363 static void
 364 bump_column (print, from, to)
 365      cpp_printer *print;
 366      unsigned int from, to;
 367 {
 368   unsigned int tabs, spcs;
 369   unsigned int delta = to - from;
 370
 371   /* Only if FROM is 0, advance by tabs.  */
 372   if (from == 0)
 373     tabs = delta / 8, spcs = delta % 8;
 374   else
 375     tabs = 0, spcs = delta;
 376
 377   while (tabs--) putc ('\t', print->outf);
 378   while (spcs--) putc (' ', print->outf);
 379 }
 380
 381 /* Write out the list L onto pfile->token_buffer.  This function is
 382    incomplete:
 383
 384    1) pfile->token_buffer is not going to continue to exist.
 385    2) At the moment, tokens don't carry the information described
 386    in cpplib.h; they are all strings.
 387    3) The list has to be a complete line, and has to be written starting
 388    at the beginning of a line.  */
 389
 390 void
 391 cpp_output_list (pfile, print, list)
 392      cpp_reader *pfile;
 393      cpp_printer *print;
 394      const cpp_toklist *list;
 395 {
 396   unsigned int i;
 397   unsigned int curcol = 1;
 398
 399   /* XXX Probably does not do what is intended.  */
 400   if (print->lineno != list->line)
 401     output_line_command (pfile, print, list->line);
 402
 403   for (i = 0; i < list->tokens_used; i++)
 404     {
 405       if (TOK_TYPE (list, i) == CPP_VSPACE)
 406         {
 407           output_line_command (pfile, print, list->tokens[i].aux);
 408           continue;
 409         }
 410
 411       if (curcol < TOK_COL (list, i))
 412         {
 413           /* Insert space to bring the column to what it should be.  */
 414           bump_column (print, curcol - 1, TOK_COL (list, i));
 415           curcol = TOK_COL (list, i);
 416         }
 417       /* XXX We may have to insert space to prevent an accidental
 418          token paste.  */
 419       safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
 420       curcol += TOK_LEN (list, i);
 421     }
 422 }
 423
 424 /* Scan a string (which may have escape marks), perform macro expansion,
 425    and write the result to the token_buffer.  */
 426
 427 void
 428 _cpp_expand_to_buffer (pfile, buf, length)
 429      cpp_reader *pfile;
 430      const U_CHAR *buf;
 431      int length;
 432 {
 433   cpp_buffer *stop;
 434   enum cpp_ttype token;
 435   U_CHAR *buf1;
 436
 437   if (length < 0)
 438     {
 439       cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
 440       return;
 441     }
 442
 443   /* Copy the buffer, because it might be in an unsafe place - for
 444      example, a sequence on the token_buffer, where the pointers will
 445      be invalidated if we enlarge the token_buffer.  */
 446   buf1 = alloca (length);
 447   memcpy (buf1, buf, length);
 448
 449   /* Set up the input on the input stack.  */
 450   stop = CPP_BUFFER (pfile);
 451   if (cpp_push_buffer (pfile, buf1, length) == NULL)
 452     return;
 453   CPP_BUFFER (pfile)->has_escapes = 1;
 454
 455   /* Scan the input, create the output.  */
 456   for (;;)
 457     {
 458       token = cpp_get_token (pfile);
 459       if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 460         break;
 461     }
 462 }
 463
 464 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 465
 466 void
 467 cpp_scan_buffer_nooutput (pfile)
 468      cpp_reader *pfile;
 469 {
 470   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 471   enum cpp_ttype token;
 472   unsigned int old_written = CPP_WRITTEN (pfile);
 473   /* In no-output mode, we can ignore everything but directives.  */
 474   for (;;)
 475     {
 476       if (! pfile->only_seen_white)
 477         _cpp_skip_rest_of_line (pfile);
 478       token = cpp_get_token (pfile);
 479       if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 480         break;
 481     }
 482   CPP_SET_WRITTEN (pfile, old_written);
 483 }
 484
 485 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 486
 487 void
 488 cpp_scan_buffer (pfile, print)
 489      cpp_reader *pfile;
 490      cpp_printer *print;
 491 {
 492   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 493   enum cpp_ttype token;
 494
 495   for (;;)
 496     {
 497       token = cpp_get_token (pfile);
 498       if (token == CPP_VSPACE || token == CPP_EOF
 499           /* XXX Temporary kluge - force flush after #include only */
 500           || (token == CPP_DIRECTIVE
 501               && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
 502         {
 503           cpp_output_tokens (pfile, print);
 504           if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 505             return;
 506         }
 507     }
 508 }
 509
 510 /* Return the topmost cpp_buffer that corresponds to a file (not a macro).  */
 511
 512 cpp_buffer *
 513 cpp_file_buffer (pfile)
 514      cpp_reader *pfile;
 515 {
 516   cpp_buffer *ip;
 517
 518   for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
 519     if (ip->ihash != NULL)
 520       return ip;
 521   return NULL;
 522 }
 523
 524 /* Token-buffer helper functions.  */
 525
 526 /* Expand a token list's string space. It is *vital* that
 527    list->tokens_used is correct, to get pointer fix-up right.  */
 528 static void
 529 expand_name_space (list, len)
 530      cpp_toklist *list;
 531      unsigned int len;
 532 {
 533   const U_CHAR *old_namebuf;
 534
 535   old_namebuf = list->namebuf;
 536   list->name_cap += len;
 537   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 538
 539   /* Fix up token text pointers.  */
 540   if (list->namebuf != old_namebuf)
 541     {
 542       unsigned int i;
 543
 544       for (i = 0; i < list->tokens_used; i++)
 545         if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
 546           list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
 547     }
 548 }
 549
 550 /* Expand the number of tokens in a list.  */
 551 void
 552 _cpp_expand_token_space (list, count)
 553      cpp_toklist *list;
 554      unsigned int count;
 555 {
 556   unsigned int n;
 557
 558   list->tokens_cap += count;
 559   n = list->tokens_cap;
 560   if (list->flags & LIST_OFFSET)
 561     list->tokens--, n++;
 562   list->tokens = (cpp_token *)
 563     xrealloc (list->tokens, n * sizeof (cpp_token));
 564   if (list->flags & LIST_OFFSET)
 565     list->tokens++;             /* Skip the dummy.  */
 566 }
 567
 568 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 569    an extra token in front of the token list, as this allows the lexer
 570    to always peek at the previous token without worrying about
 571    underflowing the list, and some initial space.  Otherwise, no
 572    token- or name-space is allocated, and there is no dummy token.  */
 573 void
 574 _cpp_init_toklist (list, flags)
 575      cpp_toklist *list;
 576      int flags;
 577 {
 578   /* We malloc zero bytes because we may want to realloc later, and
 579      some old implementations don't like realloc-ing a null pointer.  */
 580   if (flags == NO_DUMMY_TOKEN)
 581     {
 582       list->tokens_cap = 0;
 583       list->tokens = (cpp_token *) malloc (0);
 584       list->name_cap = 0;
 585       list->flags = 0;
 586     }
 587   else
 588     {
 589       /* Initialize token space.  Put a dummy token before the start
 590          that will fail matches.  */
 591       list->tokens_cap = 256;   /* 4K's worth.  */
 592       list->tokens = (cpp_token *)
 593         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 594       list->tokens[0].type = CPP_EOF;
 595       list->tokens++;
 596
 597       /* Initialize name space.  */
 598       list->name_cap = 1024;
 599       list->flags = LIST_OFFSET;
 600     }
 601
 602   /* Allocate name space.  */
 603   list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 604
 605   _cpp_clear_toklist (list);
 606 }
 607
 608 /* Clear a token list.  */
 609 void
 610 _cpp_clear_toklist (list)
 611      cpp_toklist *list;
 612 {
 613   list->tokens_used = 0;
 614   list->name_used = 0;
 615   list->dirno = -1;
 616   list->flags &= LIST_OFFSET;  /* clear all but that one */
 617 }
 618
 619 /* Free a token list.  Does not free the list itself, which may be
 620    embedded in a larger structure.  */
 621 void
 622 _cpp_free_toklist (list)
 623      cpp_toklist *list;
 624 {
 625   if (list->flags & LIST_OFFSET)
 626     free (list->tokens - 1);    /* Backup over dummy token.  */
 627   else
 628     free (list->tokens);
 629   free (list->namebuf);
 630 }
 631
 632 /* Slice a token list: copy the sublist [START, FINISH) into COPY.
 633    COPY is assumed not to be initialized.  The comment space is not
 634    copied.  */
 635 void
 636 _cpp_slice_toklist (copy, start, finish)
 637      cpp_toklist *copy;
 638      const cpp_token *start, *finish;
 639 {
 640   unsigned int i, n;
 641   size_t bytes;
 642
 643   n = finish - start;
 644   copy->tokens_cap = n;
 645   copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
 646   memcpy (copy->tokens, start, n * sizeof (cpp_token));
 647
 648   bytes = 0;
 649   for (i = 0; i < n; i++)
 650     if (token_spellings[start[i].type].type > SPELL_NONE)
 651       bytes += start[i].val.name.len;
 652
 653   copy->namebuf = xmalloc (bytes);
 654   bytes = 0;
 655   for (i = 0; i < n; i++)
 656     if (token_spellings[start[i].type].type > SPELL_NONE)
 657       {
 658         memcpy (copy->namebuf + bytes,
 659                 start[i].val.name.text, start[i].val.name.len);
 660         copy->tokens[i].val.name.text = copy->namebuf + bytes;
 661         bytes += start[i].val.name.len;
 662       }
 663
 664   copy->tokens_cap = n;
 665   copy->tokens_used = n;
 666   copy->name_used = bytes;
 667   copy->name_cap = bytes;
 668
 669   copy->flags = 0;
 670   copy->dirno = -1;
 671 }
 672
 673 /* Shrink a token list down to the minimum size.  */
 674 void
 675 _cpp_squeeze_toklist (list)
 676      cpp_toklist *list;
 677 {
 678   long delta;
 679   const U_CHAR *old_namebuf;
 680
 681   if (list->flags & LIST_OFFSET)
 682     {
 683       list->tokens--;
 684       memmove (list->tokens, list->tokens + 1,
 685                list->tokens_used * sizeof (cpp_token));
 686       list->tokens = xrealloc (list->tokens,
 687                                list->tokens_used * sizeof (cpp_token));
 688       list->flags &= ~LIST_OFFSET;
 689     }
 690   else
 691     list->tokens = xrealloc (list->tokens,
 692                              list->tokens_used * sizeof (cpp_token));
 693   list->tokens_cap = list->tokens_used;
 694
 695   old_namebuf = list->namebuf;
 696   list->namebuf = xrealloc (list->namebuf, list->name_used);
 697   list->name_cap = list->name_used;
 698
 699   /* Fix up token text pointers.  */
 700   delta = list->namebuf - old_namebuf;
 701   if (delta)
 702     {
 703       unsigned int i;
 704
 705       for (i = 0; i < list->tokens_used; i++)
 706         if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
 707           list->tokens[i].val.name.text += delta;
 708     }
 709 }
 710
 711 /* Compare two tokens.  */
 712 int
 713 _cpp_equiv_tokens (a, b)
 714      const cpp_token *a, *b;
 715 {
 716   if (a->type != b->type
 717       || a->flags != b->flags
 718       || a->aux != b->aux)
 719     return 0;
 720
 721   if (token_spellings[a->type].type > SPELL_NONE)
 722     {
 723       if (a->val.name.len != b->val.name.len
 724           || ustrncmp(a->val.name.text,
 725                       b->val.name.text,
 726                       a->val.name.len))
 727         return 0;
 728     }
 729   return 1;
 730 }
 731
 732 /* Compare two token lists.  */
 733 int
 734 _cpp_equiv_toklists (a, b)
 735      const cpp_toklist *a, *b;
 736 {
 737   unsigned int i;
 738
 739   if (a->tokens_used != b->tokens_used)
 740     return 0;
 741
 742   for (i = 0; i < a->tokens_used; i++)
 743     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 744       return 0;
 745   return 1;
 746 }
 747
 748 /* Scan until we encounter a token of type STOP or a newline, and
 749    create a token list for it.  Does not macro-expand or execute
 750    directives.  The final token is not included in the list or
 751    consumed from the input.  Returns the type of the token stopped at. */
 752
 753 enum cpp_ttype
 754 _cpp_scan_until (pfile, list, stop)
 755      cpp_reader *pfile;
 756      cpp_toklist *list;
 757      enum cpp_ttype stop;
 758 {
 759   int i, col;
 760   long written, len;
 761   enum cpp_ttype type;
 762   int space_before;
 763
 764   _cpp_clear_toklist (list);
 765   list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
 766
 767   written = CPP_WRITTEN (pfile);
 768   i = 0;
 769   space_before = 0;
 770   for (;;)
 771     {
 772       col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
 773       type = _cpp_lex_token (pfile);
 774       len = CPP_WRITTEN (pfile) - written;
 775       CPP_SET_WRITTEN (pfile, written);
 776       if (type == CPP_HSPACE)
 777         {
 778           if (CPP_PEDANTIC (pfile))
 779             pedantic_whitespace (pfile, pfile->token_buffer + written, len);
 780           space_before = 1;
 781           continue;
 782         }
 783       else if (type == CPP_COMMENT)
 784         /* Only happens when processing -traditional macro definitions.
 785            Do not give this a token entry, but do not change space_before
 786            either.  */
 787         continue;
 788
 789       if (list->tokens_used >= list->tokens_cap)
 790         _cpp_expand_token_space (list, 256);
 791       if (list->name_used + len >= list->name_cap)
 792         expand_name_space (list, list->name_used + len + 1 - list->name_cap);
 793
 794       if (type == CPP_MACRO)
 795         type = CPP_NAME;
 796
 797       if (type == CPP_VSPACE || type == stop)
 798         break;
 799
 800       list->tokens_used++;
 801       TOK_TYPE  (list, i) = type;
 802       TOK_COL   (list, i) = col;
 803       TOK_AUX   (list, i) = 0;
 804       TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
 805
 806       TOK_LEN (list, i) = len;
 807       if (token_spellings[type].type > SPELL_NONE)
 808         {
 809           memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
 810           TOK_NAME (list, i) = list->namebuf + list->name_used;
 811           list->name_used += len;
 812         }
 813       else
 814         TOK_NAME (list, i) = token_spellings[type].spelling;
 815       i++;
 816       space_before = 0;
 817     }
 818
 819   /* XXX Temporary kluge: put back the newline (or whatever).  */
 820   FORWARD(-1);
 821
 822   /* Don't consider the first token to have white before.  */
 823   TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
 824   return type;
 825 }
 826
 827 /* Skip a C-style block comment.  We know it's a comment, and point is
 828    at the second character of the starter.  */
 829 static void
 830 skip_block_comment (pfile)
 831      cpp_reader *pfile;
 832 {
 833   unsigned int line, col;
 834   const U_CHAR *limit, *cur;
 835
 836   FORWARD(1);
 837   line = CPP_BUF_LINE (CPP_BUFFER (pfile));
 838   col = CPP_BUF_COL (CPP_BUFFER (pfile));
 839   limit = CPP_BUFFER (pfile)->rlimit;
 840   cur = CPP_BUFFER (pfile)->cur;
 841
 842   while (cur < limit)
 843     {
 844       char c = *cur++;
 845       if (c == '\n' || c == '\r')
 846         {
 847           /* \r cannot be a macro escape marker here. */
 848           if (!ACTIVE_MARK_P (pfile))
 849             CPP_BUMP_LINE_CUR (pfile, cur);
 850         }
 851       else if (c == '*')
 852         {
 853           /* Check for teminator.  */
 854           if (cur < limit && *cur == '/')
 855             goto out;
 856
 857           /* Warn about comment starter embedded in comment.  */
 858           if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
 859             cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
 860                                    cur - CPP_BUFFER (pfile)->line_base,
 861                                    "'/*' within comment");
 862         }
 863     }
 864
 865   cpp_error_with_line (pfile, line, col, "unterminated comment");
 866   cur--;
 867  out:
 868   CPP_BUFFER (pfile)->cur = cur + 1;
 869 }
 870
 871 /* Skip a C++/Chill line comment.  We know it's a comment, and point
 872    is at the second character of the initiator.  */
 873 static void
 874 skip_line_comment (pfile)
 875      cpp_reader *pfile;
 876 {
 877   FORWARD(1);
 878   for (;;)
 879     {
 880       int c = GETC ();
 881
 882       /* We don't have to worry about EOF in here.  */
 883       if (c == '\n')
 884         {
 885           /* Don't consider final '\n' to be part of comment.  */
 886           FORWARD(-1);
 887           return;
 888         }
 889       else if (c == '\r')
 890         {
 891           /* \r cannot be a macro escape marker here. */
 892           if (!ACTIVE_MARK_P (pfile))
 893             CPP_BUMP_LINE (pfile);
 894           if (CPP_OPTION (pfile, warn_comments))
 895             cpp_warning (pfile, "backslash-newline within line comment");
 896         }
 897     }
 898 }
 899
 900 /* Skip a comment - C, C++, or Chill style.  M is the first character
 901    of the comment marker.  If this really is a comment, skip to its
 902    end and return ' '.  If this is not a comment, return M (which will
 903    be '/' or '-').  */
 904
 905 static int
 906 skip_comment (pfile, m)
 907      cpp_reader *pfile;
 908      int m;
 909 {
 910   if (m == '/' && PEEKC() == '*')
 911     {
 912       skip_block_comment (pfile);
 913       return ' ';
 914     }
 915   else if (m == '/' && PEEKC() == '/')
 916     {
 917       if (CPP_BUFFER (pfile)->system_header_p)
 918         {
 919           /* We silently allow C++ comments in system headers, irrespective
 920              of conformance mode, because lots of busted systems do that
 921              and trying to clean it up in fixincludes is a nightmare.  */
 922           skip_line_comment (pfile);
 923           return ' ';
 924         }
 925       else if (CPP_OPTION (pfile, cplusplus_comments))
 926         {
 927           if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
 928             {
 929               if (CPP_WTRADITIONAL (pfile))
 930                 cpp_pedwarn (pfile,
 931                         "C++ style comments are not allowed in traditional C");
 932               else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
 933                 cpp_pedwarn (pfile,
 934                         "C++ style comments are not allowed in ISO C89");
 935               if (CPP_WTRADITIONAL (pfile)
 936                   || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
 937                 cpp_pedwarn (pfile,
 938                            "(this will be reported only once per input file)");
 939               CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
 940             }
 941           skip_line_comment (pfile);
 942           return ' ';
 943         }
 944       else
 945         return m;
 946     }
 947   else if (m == '-' && PEEKC() == '-'
 948            && CPP_OPTION (pfile, chill))
 949     {
 950       skip_line_comment (pfile);
 951       return ' ';
 952     }
 953   else
 954     return m;
 955 }
 956
 957 /* Identical to skip_comment except that it copies the comment into the
 958    token_buffer.  This is used if !discard_comments.  */
 959 static int
 960 copy_comment (pfile, m)
 961      cpp_reader *pfile;
 962      int m;
 963 {
 964   const U_CHAR *start = CPP_BUFFER (pfile)->cur;  /* XXX Layering violation */
 965   const U_CHAR *limit;
 966
 967   if (skip_comment (pfile, m) == m)
 968     return m;
 969
 970   limit = CPP_BUFFER (pfile)->cur;
 971   CPP_RESERVE (pfile, limit - start + 2);
 972   CPP_PUTC_Q (pfile, m);
 973   for (; start <= limit; start++)
 974     if (*start != '\r')
 975       CPP_PUTC_Q (pfile, *start);
 976
 977   return ' ';
 978 }
 979
 980 static void
 981 null_warning (pfile, count)
 982      cpp_reader *pfile;
 983      unsigned int count;
 984 {
 985   if (count == 1)
 986     cpp_warning (pfile, "embedded null character ignored");
 987   else
 988     cpp_warning (pfile, "embedded null characters ignored");
 989 }
 990
 991 /* Skip whitespace \-newline and comments.  Does not macro-expand.  */
 992
 993 void
 994 _cpp_skip_hspace (pfile)
 995      cpp_reader *pfile;
 996 {
 997   unsigned int null_count = 0;
 998   int c;
 999
1000   while (1)
1001     {
1002       c = GETC();
1003       if (c == EOF)
1004         goto out;
1005       else if (is_hspace(c))
1006         {
1007           if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
1008             cpp_pedwarn (pfile, "%s in preprocessing directive",
1009                          c == '\f' ? "formfeed" : "vertical tab");
1010           else if (c == '\0')
1011             null_count++;
1012         }
1013       else if (c == '\r')
1014         {
1015           /* \r is a backslash-newline marker if !has_escapes, and
1016              a deletable-whitespace or no-reexpansion marker otherwise. */
1017           if (CPP_BUFFER (pfile)->has_escapes)
1018             {
1019               if (PEEKC() == ' ')
1020                 FORWARD(1);
1021               else
1022                 break;
1023             }
1024           else
1025             CPP_BUMP_LINE (pfile);
1026         }
1027       else if (c == '/' || c == '-')
1028         {
1029           c = skip_comment (pfile, c);
1030           if (c  != ' ')
1031             break;
1032         }
1033       else
1034         break;
1035     }
1036   FORWARD(-1);
1037  out:
1038   if (null_count)
1039     null_warning (pfile, null_count);
1040 }
1041
1042 /* Read and discard the rest of the current line.  */
1043
1044 void
1045 _cpp_skip_rest_of_line (pfile)
1046      cpp_reader *pfile;
1047 {
1048   for (;;)
1049     {
1050       int c = GETC();
1051       switch (c)
1052         {
1053         case '\n':
1054           FORWARD(-1);
1055         case EOF:
1056           return;
1057
1058         case '\r':
1059           if (! CPP_BUFFER (pfile)->has_escapes)
1060             CPP_BUMP_LINE (pfile);
1061           break;
1062
1063         case '\'':
1064         case '\"':
1065           skip_string (pfile, c);
1066           break;
1067
1068         case '/':
1069         case '-':
1070           skip_comment (pfile, c);
1071           break;
1072
1073         case '\f':
1074         case '\v':
1075           if (CPP_PEDANTIC (pfile))
1076             cpp_pedwarn (pfile, "%s in preprocessing directive",
1077                          c == '\f' ? "formfeed" : "vertical tab");
1078           break;
1079
1080         }
1081     }
1082 }
1083
1084 /* Parse an identifier starting with C.  */
1085
1086 void
1087 _cpp_parse_name (pfile, c)
1088      cpp_reader *pfile;
1089      int c;
1090 {
1091   for (;;)
1092   {
1093       if (! is_idchar(c))
1094       {
1095           FORWARD (-1);
1096           break;
1097       }
1098
1099       if (c == '$' && CPP_PEDANTIC (pfile))
1100         cpp_pedwarn (pfile, "`$' in identifier");
1101
1102       CPP_RESERVE(pfile, 2); /* One more for final NUL.  */
1103       CPP_PUTC_Q (pfile, c);
1104       c = GETC();
1105       if (c == EOF)
1106         break;
1107   }
1108   return;
1109 }
1110
1111 /* Parse and skip over a string starting with C.  A single quoted
1112    string is treated like a double -- some programs (e.g., troff) are
1113    perverse this way.  (However, a single quoted string is not allowed
1114    to extend over multiple lines.)  */
1115 static void
1116 skip_string (pfile, c)
1117      cpp_reader *pfile;
1118      int c;
1119 {
1120   unsigned int start_line, start_column;
1121   unsigned int null_count = 0;
1122
1123   start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1124   start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
1125   while (1)
1126     {
1127       int cc = GETC();
1128       switch (cc)
1129         {
1130         case EOF:
1131           cpp_error_with_line (pfile, start_line, start_column,
1132                                "unterminated string or character constant");
1133           if (pfile->multiline_string_line != start_line
1134               && pfile->multiline_string_line != 0)
1135             cpp_error_with_line (pfile,
1136                                  pfile->multiline_string_line, -1,
1137                          "possible real start of unterminated constant");
1138           pfile->multiline_string_line = 0;
1139           goto out;
1140
1141         case '\0':
1142           null_count++;
1143           break;
1144
1145         case '\n':
1146           CPP_BUMP_LINE (pfile);
1147           /* In Fortran and assembly language, silently terminate
1148              strings of either variety at end of line.  This is a
1149              kludge around not knowing where comments are in these
1150              languages.  */
1151           if (CPP_OPTION (pfile, lang_fortran)
1152               || CPP_OPTION (pfile, lang_asm))
1153             {
1154               FORWARD(-1);
1155               goto out;
1156             }
1157           /* Character constants may not extend over multiple lines.
1158              In Standard C, neither may strings.  We accept multiline
1159              strings as an extension.  */
1160           if (c == '\'')
1161             {
1162               cpp_error_with_line (pfile, start_line, start_column,
1163                                    "unterminated character constant");
1164               FORWARD(-1);
1165               goto out;
1166             }
1167           if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1168             cpp_pedwarn_with_line (pfile, start_line, start_column,
1169                                    "string constant runs past end of line");
1170           if (pfile->multiline_string_line == 0)
1171             pfile->multiline_string_line = start_line;
1172           break;
1173
1174         case '\r':
1175           if (CPP_BUFFER (pfile)->has_escapes)
1176             {
1177               cpp_ice (pfile, "\\r escape inside string constant");
1178               FORWARD(1);
1179             }
1180           else
1181             /* Backslash newline is replaced by nothing at all.  */
1182             CPP_BUMP_LINE (pfile);
1183           break;
1184
1185         case '\\':
1186           FORWARD(1);
1187           break;
1188
1189         case '\"':
1190         case '\'':
1191           if (cc == c)
1192             goto out;
1193           break;
1194         }
1195     }
1196
1197  out:
1198   if (null_count == 1)
1199     cpp_warning (pfile, "null character in string or character constant");
1200   else if (null_count > 1)
1201     cpp_warning (pfile, "null characters in string or character constant");
1202 }
1203
1204 /* Parse a string and copy it to the output.  */
1205
1206 static void
1207 parse_string (pfile, c)
1208      cpp_reader *pfile;
1209      int c;
1210 {
1211   const U_CHAR *start = CPP_BUFFER (pfile)->cur;  /* XXX Layering violation */
1212   const U_CHAR *limit;
1213
1214   skip_string (pfile, c);
1215
1216   limit = CPP_BUFFER (pfile)->cur;
1217   CPP_RESERVE (pfile, limit - start + 2);
1218   CPP_PUTC_Q (pfile, c);
1219   for (; start < limit; start++)
1220     if (*start != '\r')
1221       CPP_PUTC_Q (pfile, *start);
1222 }
1223
1224 /* Get the next token, and add it to the text in pfile->token_buffer.
1225    Return the kind of token we got.  */
1226
1227 enum cpp_ttype
1228 _cpp_lex_token (pfile)
1229      cpp_reader *pfile;
1230 {
1231   register int c, c2;
1232   enum cpp_ttype token;
1233
1234   if (CPP_BUFFER (pfile) == NULL)
1235     return CPP_EOF;
1236
1237  get_next:
1238   c = GETC();
1239   switch (c)
1240     {
1241     case EOF:
1242       return CPP_EOF;
1243
1244     case '/':
1245       if (PEEKC () == '=')
1246         goto op2;
1247
1248     comment:
1249       if (CPP_OPTION (pfile, discard_comments))
1250         c = skip_comment (pfile, c);
1251       else
1252         c = copy_comment (pfile, c);
1253       if (c != ' ')
1254         goto randomchar;
1255
1256       /* Comments are equivalent to spaces.
1257          For -traditional, a comment is equivalent to nothing.  */
1258       if (!CPP_OPTION (pfile, discard_comments))
1259         return CPP_COMMENT;
1260       else if (CPP_TRADITIONAL (pfile))
1261         goto get_next;
1262       else
1263         {
1264           CPP_PUTC (pfile, c);
1265           return CPP_HSPACE;
1266         }
1267
1268     case '#':
1269       CPP_PUTC (pfile, c);
1270
1271     hash:
1272       c2 = PEEKC ();
1273       if (c2 == '#')
1274         {
1275           FORWARD (1);
1276           CPP_PUTC (pfile, c2);
1277           return CPP_PASTE;
1278         }
1279       else if (c2 == '%' && PEEKN (1) == ':')
1280         {
1281           /* Digraph: "%:" == "#".  */
1282           FORWARD (1);
1283           CPP_RESERVE (pfile, 2);
1284           CPP_PUTC_Q (pfile, c2);
1285           CPP_PUTC_Q (pfile, GETC ());
1286           return CPP_PASTE;
1287         }
1288       else
1289         return CPP_HASH;
1290
1291     case '\"':
1292     case '\'':
1293       parse_string (pfile, c);
1294       return c == '\'' ? CPP_CHAR : CPP_STRING;
1295
1296     case '$':
1297       if (!CPP_OPTION (pfile, dollars_in_ident))
1298         goto randomchar;
1299       goto letter;
1300
1301     case ':':
1302       c2 = PEEKC ();
1303       /* Digraph: ":>" == "]".  */
1304       if (c2 == '>'
1305           || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1306         goto op2;
1307       goto randomchar;
1308
1309     case '&':
1310     case '+':
1311     case '|':
1312       c2 = PEEKC ();
1313       if (c2 == c || c2 == '=')
1314         goto op2;
1315       goto randomchar;
1316
1317     case '%':
1318       /* Digraphs: "%:" == "#", "%>" == "}".  */
1319       c2 = PEEKC ();
1320       if (c2 == ':')
1321         {
1322           FORWARD (1);
1323           CPP_RESERVE (pfile, 2);
1324           CPP_PUTC_Q (pfile, c);
1325           CPP_PUTC_Q (pfile, c2);
1326           goto hash;
1327         }
1328       else if (c2 == '>')
1329         {
1330           FORWARD (1);
1331           CPP_RESERVE (pfile, 2);
1332           CPP_PUTC_Q (pfile, c);
1333           CPP_PUTC_Q (pfile, c2);
1334           return CPP_OPEN_BRACE;
1335         }
1336       /* else fall through */
1337
1338     case '*':
1339     case '!':
1340     case '=':
1341     case '^':
1342       if (PEEKC () == '=')
1343         goto op2;
1344       goto randomchar;
1345
1346     case '-':
1347       c2 = PEEKC ();
1348       if (c2 == '-')
1349         {
1350           if (CPP_OPTION (pfile, chill))
1351             goto comment;  /* Chill style comment */
1352           else
1353             goto op2;
1354         }
1355       else if (c2 == '=')
1356         goto op2;
1357       else if (c2 == '>')
1358         {
1359           if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1360             {
1361               /* In C++, there's a ->* operator.  */
1362               token = CPP_OTHER;
1363               CPP_RESERVE (pfile, 4);
1364               CPP_PUTC_Q (pfile, c);
1365               CPP_PUTC_Q (pfile, GETC ());
1366               CPP_PUTC_Q (pfile, GETC ());
1367               return token;
1368             }
1369           goto op2;
1370         }
1371       goto randomchar;
1372
1373     case '<':
1374       if (pfile->parsing_include_directive)
1375         {
1376           for (;;)
1377             {
1378               CPP_PUTC (pfile, c);
1379               if (c == '>')
1380                 break;
1381               c = GETC ();
1382               if (c == '\n' || c == EOF)
1383                 {
1384                   cpp_error (pfile,
1385                              "missing '>' in `#include <FILENAME>'");
1386                   break;
1387                 }
1388               else if (c == '\r')
1389                 {
1390                   if (!CPP_BUFFER (pfile)->has_escapes)
1391                     {
1392                       /* Backslash newline is replaced by nothing. */
1393                       CPP_ADJUST_WRITTEN (pfile, -1);
1394                       CPP_BUMP_LINE (pfile);
1395                     }
1396                   else
1397                     {
1398                       /* We might conceivably get \r- or \r<space> in
1399                          here.  Just delete 'em. */
1400                       int d = GETC();
1401                       if (d != '-' && d != ' ')
1402                         cpp_ice (pfile, "unrecognized escape \\r%c", d);
1403                       CPP_ADJUST_WRITTEN (pfile, -1);
1404                     }
1405                 }
1406             }
1407           return CPP_STRING;
1408         }
1409       /* Digraphs: "<%" == "{", "<:" == "[".  */
1410       c2 = PEEKC ();
1411       if (c2 == '%')
1412         {
1413           FORWARD (1);
1414           CPP_RESERVE (pfile, 2);
1415           CPP_PUTC_Q (pfile, c);
1416           CPP_PUTC_Q (pfile, c2);
1417           return CPP_CLOSE_BRACE;
1418         }
1419       else if (c2 == ':')
1420         goto op2;
1421       /* else fall through */
1422     case '>':
1423       c2 = PEEKC ();
1424       if (c2 == '=')
1425         goto op2;
1426       /* GNU C++ supports MIN and MAX operators <? and >?.  */
1427       if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1428         goto randomchar;
1429       FORWARD(1);
1430       CPP_RESERVE (pfile, 3);
1431       CPP_PUTC_Q (pfile, c);
1432       CPP_PUTC_Q (pfile, c2);
1433       if (PEEKC () == '=')
1434         CPP_PUTC_Q (pfile, GETC ());
1435       return CPP_OTHER;
1436
1437     case '.':
1438       c2 = PEEKC ();
1439       if (ISDIGIT (c2))
1440         {
1441           CPP_PUTC (pfile, c);
1442           c = GETC ();
1443           goto number;
1444         }
1445
1446       /* In C++ there's a .* operator.  */
1447       if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1448         goto op2;
1449
1450       if (c2 == '.' && PEEKN(1) == '.')
1451         {
1452           CPP_RESERVE (pfile, 3);
1453           CPP_PUTC_Q (pfile, '.');
1454           CPP_PUTC_Q (pfile, '.');
1455           CPP_PUTC_Q (pfile, '.');
1456           FORWARD (2);
1457           return CPP_ELLIPSIS;
1458         }
1459       goto randomchar;
1460
1461     op2:
1462       CPP_RESERVE (pfile, 2);
1463       CPP_PUTC_Q (pfile, c);
1464       CPP_PUTC_Q (pfile, GETC ());
1465       return CPP_OTHER;
1466
1467     case 'L':
1468       c2 = PEEKC ();
1469       if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1470         {
1471           CPP_PUTC (pfile, c);
1472           c = GETC ();
1473           parse_string (pfile, c);
1474           return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1475         }
1476       goto letter;
1477
1478     case '0': case '1': case '2': case '3': case '4':
1479     case '5': case '6': case '7': case '8': case '9':
1480     number:
1481     c2  = '.';
1482     for (;;)
1483       {
1484         CPP_RESERVE (pfile, 2);
1485         CPP_PUTC_Q (pfile, c);
1486         c = PEEKC ();
1487         if (c == EOF)
1488           break;
1489         if (!is_numchar(c) && c != '.'
1490             && ((c2 != 'e' && c2 != 'E'
1491                  && ((c2 != 'p' && c2 != 'P')
1492                      || CPP_OPTION (pfile, c89)))
1493                 || (c != '+' && c != '-')))
1494           break;
1495         FORWARD(1);
1496         c2= c;
1497       }
1498     return CPP_NUMBER;
1499     case 'b': case 'c': case 'd': case 'h': case 'o':
1500     case 'B': case 'C': case 'D': case 'H': case 'O':
1501       if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1502         {
1503           CPP_RESERVE (pfile, 2);
1504           CPP_PUTC_Q (pfile, c);
1505           CPP_PUTC_Q (pfile, '\'');
1506           FORWARD(1);
1507           for (;;)
1508             {
1509               c = GETC();
1510               if (c == EOF)
1511                 goto chill_number_eof;
1512               if (!is_numchar(c))
1513                 break;
1514               CPP_PUTC (pfile, c);
1515             }
1516           if (c == '\'')
1517             {
1518               CPP_RESERVE (pfile, 2);
1519               CPP_PUTC_Q (pfile, c);
1520               return CPP_STRING;
1521             }
1522           else
1523             {
1524               FORWARD(-1);
1525             chill_number_eof:
1526               return CPP_NUMBER;
1527             }
1528         }
1529       else
1530         goto letter;
1531     case '_':
1532     case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1533     case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1534     case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1535     case 'x': case 'y': case 'z':
1536     case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1537     case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1538     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1539     case 'Y': case 'Z':
1540     letter:
1541     _cpp_parse_name (pfile, c);
1542     return CPP_MACRO;
1543
1544     case ' ':  case '\t':  case '\v': case '\f': case '\0':
1545       {
1546         int null_count = 0;
1547
1548         for (;;)
1549           {
1550             if (c == '\0')
1551               null_count++;
1552             else
1553               CPP_PUTC (pfile, c);
1554             c = PEEKC ();
1555             if (c == EOF || !is_hspace(c))
1556               break;
1557             FORWARD(1);
1558           }
1559         if (null_count)
1560           null_warning (pfile, null_count);
1561         return CPP_HSPACE;
1562       }
1563
1564     case '\r':
1565       if (CPP_BUFFER (pfile)->has_escapes)
1566         {
1567           c = GETC ();
1568           if (c == '-')
1569             {
1570               if (pfile->output_escapes)
1571                 CPP_PUTS (pfile, "\r-", 2);
1572               _cpp_parse_name (pfile, GETC ());
1573               return CPP_NAME;
1574             }
1575           else if (c == ' ')
1576             {
1577               /* "\r " means a space, but only if necessary to prevent
1578                  accidental token concatenation.  */
1579               CPP_RESERVE (pfile, 2);
1580               if (pfile->output_escapes)
1581                 CPP_PUTC_Q (pfile, '\r');
1582               CPP_PUTC_Q (pfile, c);
1583               return CPP_HSPACE;
1584             }
1585           else
1586             {
1587               cpp_ice (pfile, "unrecognized escape \\r%c", c);
1588               goto get_next;
1589             }
1590         }
1591       else
1592         {
1593           /* Backslash newline is ignored. */
1594           if (!ACTIVE_MARK_P (pfile))
1595             CPP_BUMP_LINE (pfile);
1596           goto get_next;
1597         }
1598
1599     case '\n':
1600       CPP_PUTC (pfile, c);
1601       return CPP_VSPACE;
1602
1603     case '(': token = CPP_OPEN_PAREN;  goto char1;
1604     case ')': token = CPP_CLOSE_PAREN; goto char1;
1605     case '{': token = CPP_OPEN_BRACE;  goto char1;
1606     case '}': token = CPP_CLOSE_BRACE; goto char1;
1607     case ',': token = CPP_COMMA;       goto char1;
1608     case ';': token = CPP_SEMICOLON;   goto char1;
1609
1610     randomchar:
1611     default:
1612       token = CPP_OTHER;
1613     char1:
1614       CPP_PUTC (pfile, c);
1615       return token;
1616     }
1617 }
1618
1619 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1620    Caller is expected to have checked no_macro_expand.  */
1621 static int
1622 maybe_macroexpand (pfile, written)
1623      cpp_reader *pfile;
1624      long written;
1625 {
1626   U_CHAR *macro = pfile->token_buffer + written;
1627   size_t len = CPP_WRITTEN (pfile) - written;
1628   cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
1629
1630   /* cpp_lookup never returns null.  */
1631   if (hp->type == T_VOID)
1632     return 0;
1633   if (hp->disabled || hp->type == T_IDENTITY)
1634     {
1635       if (pfile->output_escapes)
1636         {
1637           /* Insert a no-reexpand marker before IDENT.  */
1638           CPP_RESERVE (pfile, 2);
1639           CPP_ADJUST_WRITTEN (pfile, 2);
1640           macro = pfile->token_buffer + written;
1641
1642           memmove (macro + 2, macro, len);
1643           macro[0] = '\r';
1644           macro[1] = '-';
1645         }
1646       return 0;
1647     }
1648   if (hp->type == T_EMPTY)
1649     {
1650       /* Special case optimization: macro expands to nothing.  */
1651       CPP_SET_WRITTEN (pfile, written);
1652       CPP_PUTC_Q (pfile, ' ');
1653       return 1;
1654     }
1655
1656   /* If macro wants an arglist, verify that a '(' follows.  */
1657   if (hp->type == T_FMACRO)
1658     {
1659       int macbuf_whitespace = 0;
1660       int c;
1661
1662       while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1663         {
1664           const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1665           for (;;)
1666             {
1667               _cpp_skip_hspace (pfile);
1668               c = PEEKC ();
1669               if (c == '\n')
1670                 FORWARD(1);
1671               else
1672                 break;
1673             }
1674           if (point != CPP_BUFFER (pfile)->cur)
1675             macbuf_whitespace = 1;
1676           if (c == '(')
1677             goto is_macro_call;
1678           else if (c != EOF)
1679             goto not_macro_call;
1680           cpp_pop_buffer (pfile);
1681         }
1682
1683       CPP_SET_MARK (pfile);
1684       for (;;)
1685         {
1686           _cpp_skip_hspace (pfile);
1687           c = PEEKC ();
1688           if (c == '\n')
1689             FORWARD(1);
1690           else
1691             break;
1692         }
1693       CPP_GOTO_MARK (pfile);
1694
1695       if (c != '(')
1696         {
1697         not_macro_call:
1698           if (macbuf_whitespace)
1699             CPP_PUTC (pfile, ' ');
1700
1701           /* K+R treated this as a hard error.  */
1702           if (CPP_WTRADITIONAL (pfile))
1703             cpp_warning (pfile,
1704          "function macro %s must be used with arguments in traditional C",
1705                          hp->name);
1706           return 0;
1707         }
1708     }
1709
1710  is_macro_call:
1711   /* This is now known to be a macro call.
1712      Expand the macro, reading arguments as needed,
1713      and push the expansion on the input stack.  */
1714   _cpp_macroexpand (pfile, hp);
1715   CPP_SET_WRITTEN (pfile, written);
1716   return 1;
1717 }
1718
1719 /* Complain about \v or \f in a preprocessing directive (constraint
1720    violation, C99 6.10 para 5).  Caller has checked CPP_PEDANTIC.  */
1721 static void
1722 pedantic_whitespace (pfile, p, len)
1723      cpp_reader *pfile;
1724      U_CHAR *p;
1725      unsigned int len;
1726 {
1727   while (len)
1728     {
1729       if (*p == '\v')
1730         cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1731       else if (*p == '\f')
1732         cpp_pedwarn (pfile, "form feed in preprocessing directive");
1733       p++;
1734       len--;
1735     }
1736 }
1737
1738
1739 enum cpp_ttype
1740 cpp_get_token (pfile)
1741      cpp_reader *pfile;
1742 {
1743   enum cpp_ttype token;
1744   long written = CPP_WRITTEN (pfile);
1745   int macro_buffer;
1746
1747  get_next:
1748   token = _cpp_lex_token (pfile);
1749
1750   switch (token)
1751     {
1752     default:
1753       if (pfile->skipping)
1754         break;
1755       pfile->potential_control_macro = 0;
1756       pfile->only_seen_white = 0;
1757       break;
1758
1759     case CPP_HSPACE:
1760     case CPP_COMMENT:
1761       break;
1762
1763     case CPP_VSPACE:
1764       if (pfile->only_seen_white == 0)
1765         pfile->only_seen_white = 1;
1766       CPP_BUMP_LINE (pfile);
1767       break;
1768
1769     case CPP_HASH:
1770       pfile->potential_control_macro = 0;
1771       if (!pfile->only_seen_white)
1772         break;
1773       /* XXX shouldn't have to do this - remove the hash or %: from
1774          the token buffer.  */
1775       if (CPP_PWRITTEN (pfile)[-1] == '#')
1776         CPP_ADJUST_WRITTEN (pfile, -1);
1777       else
1778         CPP_ADJUST_WRITTEN (pfile, -2);
1779
1780       if (_cpp_handle_directive (pfile))
1781         {
1782           token = CPP_DIRECTIVE;
1783           break;
1784         }
1785       pfile->only_seen_white = 0;
1786       CPP_PUTC (pfile, '#');
1787       break;
1788
1789     case CPP_MACRO:
1790       if (pfile->skipping)
1791         break;
1792       pfile->potential_control_macro = 0;
1793       pfile->only_seen_white = 0;
1794       if (! pfile->no_macro_expand
1795           && maybe_macroexpand (pfile, written))
1796         goto get_next;
1797       token = CPP_NAME;
1798       break;
1799
1800       /* Do not run this case through the 'skipping' logic.  */
1801     case CPP_EOF:
1802       if (CPP_BUFFER (pfile) == NULL)
1803         return CPP_EOF;
1804       macro_buffer = CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile));
1805
1806       cpp_pop_buffer (pfile);
1807       if (macro_buffer)
1808         goto get_next;
1809       return CPP_EOF;
1810     }
1811
1812   if (pfile->skipping)
1813     {
1814       CPP_SET_WRITTEN (pfile, written);
1815       goto get_next;
1816     }
1817   return token;
1818 }
1819
1820 /* Like cpp_get_token, but skip spaces and comments.  */
1821
1822 enum cpp_ttype
1823 cpp_get_non_space_token (pfile)
1824      cpp_reader *pfile;
1825 {
1826   int old_written = CPP_WRITTEN (pfile);
1827   for (;;)
1828     {
1829       enum cpp_ttype token = cpp_get_token (pfile);
1830       if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1831         return token;
1832       CPP_SET_WRITTEN (pfile, old_written);
1833     }
1834 }
1835
1836 /* Like cpp_get_token, except that it does not execute directives,
1837    does not consume vertical space, and discards horizontal space.  */
1838 enum cpp_ttype
1839 _cpp_get_directive_token (pfile)
1840      cpp_reader *pfile;
1841 {
1842   long old_written;
1843   enum cpp_ttype token;
1844   int at_bol;
1845
1846  get_next:
1847   at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1848   old_written = CPP_WRITTEN (pfile);
1849   token = _cpp_lex_token (pfile);
1850   switch (token)
1851     {
1852     default:
1853       return token;
1854
1855     case CPP_VSPACE:
1856       /* Put it back and return VSPACE.  */
1857       FORWARD(-1);
1858       CPP_ADJUST_WRITTEN (pfile, -1);
1859       return CPP_VSPACE;
1860
1861     case CPP_HSPACE:
1862       /* The purpose of this rather strange check is to prevent pedantic
1863          warnings for ^L in an #ifdefed out block.  */
1864       if (CPP_PEDANTIC (pfile) && ! at_bol)
1865         pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1866                              CPP_WRITTEN (pfile) - old_written);
1867       CPP_SET_WRITTEN (pfile, old_written);
1868       goto get_next;
1869       return CPP_HSPACE;
1870
1871     case CPP_MACRO:
1872       if (! pfile->no_macro_expand
1873           && maybe_macroexpand (pfile, old_written))
1874         goto get_next;
1875       return CPP_NAME;
1876
1877     case CPP_EOF:
1878       if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1879         {
1880           cpp_pop_buffer (pfile);
1881           goto get_next;
1882         }
1883       else
1884         /* This can happen for files that don't end with a newline,
1885            and for cpp_define and friends.  Pretend they do, so
1886            callers don't have to deal.  A warning will be issued by
1887            someone else, if necessary.  */
1888         return CPP_VSPACE;
1889     }
1890 }
1891
1892 /* Determine the current line and column.  Used only by read_and_prescan. */
1893 static U_CHAR *
1894 find_position (start, limit, linep)
1895      U_CHAR *start;
1896      U_CHAR *limit;
1897      unsigned long *linep;
1898 {
1899   unsigned long line = *linep;
1900   U_CHAR *lbase = start;
1901   while (start < limit)
1902     {
1903       U_CHAR ch = *start++;
1904       if (ch == '\n' || ch == '\r')
1905         {
1906           line++;
1907           lbase = start;
1908         }
1909     }
1910   *linep = line;
1911   return lbase;
1912 }
1913
1914 /* The following table is used by _cpp_prescan.  If we have
1915    designated initializers, it can be constant data; otherwise, it is
1916    set up at runtime by _cpp_init_input_buffer.  */
1917
1918 #if (GCC_VERSION >= 2007)
1919 #define init_chartab()  /* nothing */
1920 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1921 #define END };
1922 #define s(p, v) [p] = v,
1923 #else
1924 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1925  static void init_chartab PARAMS ((void)) { \
1926  unsigned char *x = chartab;
1927 #define END }
1928 #define s(p, v) x[p] = v;
1929 #endif
1930
1931 /* Table of characters that can't be handled in the inner loop.
1932    Also contains the mapping between trigraph third characters and their
1933    replacements.  */
1934 #define SPECCASE_CR        1
1935 #define SPECCASE_BACKSLASH 2
1936 #define SPECCASE_QUESTION  3
1937
1938 CHARTAB
1939   s('\r', SPECCASE_CR)
1940   s('\\', SPECCASE_BACKSLASH)
1941   s('?',  SPECCASE_QUESTION)
1942
1943   s('=', '#')   s(')', ']')     s('!', '|')
1944   s('(', '[')   s('\'', '^')    s('>', '}')
1945   s('/', '\\')  s('<', '{')     s('-', '~')
1946 END
1947
1948 #undef CHARTAB
1949 #undef END
1950 #undef s
1951
1952 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1953 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1954
1955 /* Prescan pass over a file already loaded into BUF.  This is
1956    translation phases 1 and 2 (C99 5.1.1.2).
1957
1958    Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1959    canonical form (\n).  If enabled, convert and/or warn about
1960    trigraphs.  Convert backslash-newline to a one-character escape
1961    (\r) and remove it from "embarrassing" places (i.e. the middle of a
1962    token).  If there is no newline at the end of the file, add one and
1963    warn.  Returns -1 on failure, or the actual length of the data to
1964    be scanned.
1965
1966    This function does a lot of work, and can be a serious performance
1967    bottleneck.  It has been tuned heavily; make sure you understand it
1968    before hacking.  The common case - no trigraphs, Unix style line
1969    breaks, backslash-newline set off by whitespace, newline at EOF -
1970    has been optimized at the expense of the others.  The performance
1971    penalty for DOS style line breaks (\r\n) is about 15%.
1972
1973    Warnings lose particularly heavily since we have to determine the
1974    line number, which involves scanning from the beginning of the file
1975    or from the last warning.  The penalty for the absence of a newline
1976    at the end of reload1.c is about 60%.  (reload1.c is 329k.)
1977
1978    If your file has more than one kind of end-of-line marker, you
1979    will get messed-up line numbering.  */
1980
1981 ssize_t
1982 _cpp_prescan (pfile, fp, len)
1983      cpp_reader *pfile;
1984      cpp_buffer *fp;
1985      ssize_t len;
1986 {
1987   U_CHAR *buf, *op;
1988   const U_CHAR *ibase, *ip, *ilimit;
1989   U_CHAR *line_base;
1990   unsigned long line;
1991   unsigned int deferred_newlines;
1992
1993   /* Allocate an extra byte in case we must add a trailing \n.  */
1994   buf = (U_CHAR *) xmalloc (len + 1);
1995   line_base = op = buf;
1996   ip = ibase = fp->buf;
1997   ilimit = ibase + len;
1998   line = 1;
1999   deferred_newlines = 0;
2000
2001   for (;;)
2002     {
2003       const U_CHAR *iq;
2004
2005       /* Deal with \-newline, potentially in the middle of a token. */
2006       if (deferred_newlines)
2007         {
2008           if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
2009             {
2010               /* Previous was not white space.  Skip to white
2011                  space, if we can, before outputting the \r's */
2012               iq = ip;
2013               while (iq < ilimit
2014                      && *iq != ' '
2015                      && *iq != '\t'
2016                      && *iq != '\n'
2017                      && NORMAL(*iq))
2018                 iq++;
2019               memcpy (op, ip, iq - ip);
2020               op += iq - ip;
2021               ip += iq - ip;
2022               if (! NORMAL(*ip))
2023                 goto do_speccase;
2024             }
2025           while (deferred_newlines)
2026             deferred_newlines--, *op++ = '\r';
2027         }
2028
2029       /* Copy as much as we can without special treatment. */
2030       iq = ip;
2031       while (iq < ilimit && NORMAL (*iq)) iq++;
2032       memcpy (op, ip, iq - ip);
2033       op += iq - ip;
2034       ip += iq - ip;
2035
2036     do_speccase:
2037       if (ip >= ilimit)
2038         break;
2039
2040       switch (chartab[*ip++])
2041         {
2042         case SPECCASE_CR:  /* \r */
2043           if (ip[-2] != '\n')
2044             {
2045               if (ip < ilimit && *ip == '\n')
2046                 ip++;
2047               *op++ = '\n';
2048             }
2049           break;
2050
2051         case SPECCASE_BACKSLASH:  /* \ */
2052         backslash:
2053           if (ip < ilimit)
2054             {
2055               if (*ip == '\n')
2056                 {
2057                   deferred_newlines++;
2058                   ip++;
2059                   if (*ip == '\r') ip++;
2060                   break;
2061                 }
2062               else if (*ip == '\r')
2063                 {
2064                   deferred_newlines++;
2065                   ip++;
2066                   if (*ip == '\n') ip++;
2067                   break;
2068                 }
2069             }
2070
2071           *op++ = '\\';
2072           break;
2073
2074         case SPECCASE_QUESTION: /* ? */
2075           {
2076             unsigned int d, t;
2077
2078             *op++ = '?'; /* Normal non-trigraph case */
2079             if (ip > ilimit - 2 || ip[0] != '?')
2080               break;
2081
2082             d = ip[1];
2083             t = chartab[d];
2084             if (NONTRI (t))
2085               break;
2086
2087             if (CPP_OPTION (pfile, warn_trigraphs))
2088               {
2089                 unsigned long col;
2090                 line_base = find_position (line_base, op, &line);
2091                 col = op - line_base + 1;
2092                 if (CPP_OPTION (pfile, trigraphs))
2093                   cpp_warning_with_line (pfile, line, col,
2094                                          "trigraph ??%c converted to %c", d, t);
2095                 else
2096                   cpp_warning_with_line (pfile, line, col,
2097                                          "trigraph ??%c ignored", d);
2098               }
2099
2100             ip += 2;
2101             if (CPP_OPTION (pfile, trigraphs))
2102               {
2103                 op[-1] = t;         /* Overwrite '?' */
2104                 if (t == '\\')
2105                   {
2106                     op--;
2107                     goto backslash;
2108                   }
2109               }
2110             else
2111               {
2112                 *op++ = '?';
2113                 *op++ = d;
2114               }
2115           }
2116           break;
2117         }
2118     }
2119
2120 #ifdef HAVE_MMAP_FILE
2121   if (fp->mapped)
2122     munmap ((caddr_t) fp->buf, len);
2123   else
2124 #endif
2125     free ((PTR) fp->buf);
2126
2127   if (op[-1] != '\n')
2128     {
2129       unsigned long col;
2130       line_base = find_position (line_base, op, &line);
2131       col = op - line_base + 1;
2132       cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2133       *op++ = '\n';
2134     }
2135
2136   fp->buf = buf;
2137   return op - buf;
2138 }
2139
2140 /* Allocate pfile->input_buffer, and initialize chartab[]
2141    if it hasn't happened already.  */
2142
2143 void
2144 _cpp_init_input_buffer (pfile)
2145      cpp_reader *pfile;
2146 {
2147   U_CHAR *tmp;
2148
2149   init_chartab ();
2150   _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
2151
2152   /* Determine the appropriate size for the input buffer.  Normal C
2153      source files are smaller than eight K.  */
2154   /* 8Kbytes of buffer proper, 1 to detect running off the end without
2155      address arithmetic all the time, and 3 for pushback during buffer
2156      refill, in case there's a potential trigraph or end-of-line
2157      digraph at the end of a block. */
2158
2159   tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2160   pfile->input_buffer = tmp;
2161   pfile->input_buffer_len = 8192;
2162 }
2163
2164 /* Utility routine:
2165    Compares, in the manner of strcmp(3), the token beginning at TOKEN
2166    and extending for LEN characters to the NUL-terminated string
2167    STRING.  Typical usage:
2168
2169    if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2170                  "inline"))
2171      { ... }
2172  */
2173
2174 int
2175 cpp_idcmp (token, len, string)
2176      const U_CHAR *token;
2177      size_t len;
2178      const char *string;
2179 {
2180   size_t len2 = strlen (string);
2181   int r;
2182
2183   if ((r = memcmp (token, string, MIN (len, len2))))
2184     return r;
2185
2186   /* The longer of the two strings sorts after the shorter.  */
2187   if (len == len2)
2188     return 0;
2189   else if (len < len2)
2190     return -1;
2191   else
2192     return 1;
2193 }
2194
2195 #ifdef NEW_LEXER
2196
2197 /* Lexing algorithm.
2198
2199  The original lexer in cpplib was made up of two passes: a first pass
2200  that replaced trigraphs and deleted esacped newlines, and a second
2201  pass that tokenized the result of the first pass.  Tokenisation was
2202  performed by peeking at the next character in the input stream.  For
2203  example, if the input stream contained "!=", the handler for the !
2204  character would peek at the next character, and if it were a '='
2205  would skip over it, and return a "!=" token, otherwise it would
2206  return just the "!" token.
2207
2208  To implement a single-pass lexer, this peeking ahead is unworkable.
2209  An arbitrary number of escaped newlines, and trigraphs (in particular
2210  ??/ which translates to the escape \), could separate the '!' and '='
2211  in the input stream, yet the next token is still a "!=".
2212
2213  Suppose instead that we lex by one logical line at a time, producing
2214  a token list or stack for each logical line, and when seeing the '!'
2215  push a CPP_NOT token on the list.  Then if the '!' is part of a
2216  longer token ("!=") we know we must see the remainder of the token by
2217  the time we reach the end of the logical line.  Thus we can have the
2218  '=' handler look at the previous token (at the end of the list / top
2219  of the stack) and see if it is a "!" token, and if so, instead of
2220  pushing a "=" token revise the existing token to be a "!=" token.
2221
2222  This works in the presence of escaped newlines, because the '\' would
2223  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
2224  newline ('\n' or '\r') handler looks at the token at the top of the
2225  stack to see if it is a CPP_BACKSLASH, and if so discards both.
2226  Otherwise it pushes the newline (CPP_VSPACE) token as normal.  Hence
2227  the '=' handler would never see any intervening escaped newlines.
2228
2229  To make trigraphs work in this context, as in precedence trigraphs
2230  are highest and converted before anything else, the '?' handler does
2231  lookahead to see if it is a trigraph, and if so skips the trigraph
2232  and pushes the token it represents onto the top of the stack.  This
2233  also works in the particular case of a CPP_BACKSLASH trigraph.
2234
2235  To the preprocessor, whitespace is only significant to the point of
2236  knowing whether whitespace precedes a particular token.  For example,
2237  the '=' handler needs to know whether there was whitespace between it
2238  and a "!" token on the top of the stack, to make the token conversion
2239  decision correctly.  So each token has a PREV_WHITESPACE flag to
2240  indicate this - the standard permits consecutive whitespace to be
2241  regarded as a single space.  The compiler front ends are not
2242  interested in whitespace at all; they just require a token stream.
2243  Another place where whitespace is significant to the preprocessor is
2244  a #define statment - if there is whitespace between the macro name
2245  and an initial "(" token the macro is "object-like", otherwise it is
2246  a function-like macro that takes arguments.
2247
2248  However, all is not rosy.  Parsing of identifiers, numbers, comments
2249  and strings becomes trickier because of the possibility of raw
2250  trigraphs and escaped newlines in the input stream.
2251
2252  The trigraphs are three consecutive characters beginning with two
2253  question marks.  A question mark is not valid as part of a number or
2254  identifier, so parsing of a number or identifier terminates normally
2255  upon reaching it, returning to the mainloop which handles the
2256  trigraph just like it would in any other position.  Similarly for the
2257  backslash of a backslash-newline combination.  So we just need the
2258  escaped-newline dropper in the mainloop to check if the token on the
2259  top of the stack after dropping the escaped newline is a number or
2260  identifier, and if so to continue the processing it as if nothing had
2261  happened.
2262
2263  For strings, we replace trigraphs whenever we reach a quote or
2264  newline, because there might be a backslash trigraph escaping them.
2265  We need to be careful that we start trigraph replacing from where we
2266  left off previously, because it is possible for a first scan to leave
2267  "fake" trigraphs that a second scan would pick up as real (e.g. the
2268  sequence "????/\n=" would find a fake ??= trigraph after removing the
2269  escaped newline.)
2270
2271  For line comments, on reaching a newline we scan the previous
2272  character(s) to see if it escaped, and continue if it is.  Block
2273  comments ignore everything and just focus on finding the comment
2274  termination mark.  The only difficult thing, and it is surprisingly
2275  tricky, is checking if an asterisk precedes the final slash since
2276  they could be separated by escaped newlines.  If the preprocessor is
2277  invoked with the output comments option, we don't bother removing
2278  escaped newlines and replacing trigraphs for output.
2279
2280  Finally, numbers can begin with a period, which is pushed initially
2281  as a CPP_DOT token in its own right.  The digit handler checks if the
2282  previous token was a CPP_DOT not separated by whitespace, and if so
2283  pops it off the stack and pushes a period into the number's buffer
2284  before calling the number parser.
2285
2286 */
2287
2288 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2289                                                     U":>", U"<%", U"%>"};
2290 static unsigned char trigraph_map[256];
2291
2292 void
2293 init_trigraph_map ()
2294 {
2295   trigraph_map['='] = '#';
2296   trigraph_map['('] = '[';
2297   trigraph_map[')'] = ']';
2298   trigraph_map['/'] = '\\';
2299   trigraph_map['\''] = '^';
2300   trigraph_map['<'] = '{';
2301   trigraph_map['>'] = '}';
2302   trigraph_map['!'] = '|';
2303   trigraph_map['-'] = '~';
2304 }
2305
2306 /* Call when a trigraph is encountered.  It warns if necessary, and
2307    returns true if the trigraph should be honoured.  END is the third
2308    character of a trigraph in the input stream.  */
2309 static int
2310 trigraph_ok (pfile, end)
2311      cpp_reader *pfile;
2312      const unsigned char *end;
2313 {
2314   int accept = CPP_OPTION (pfile, trigraphs);
2315
2316   if (CPP_OPTION (pfile, warn_trigraphs))
2317     {
2318       unsigned int col = end - 1 - pfile->buffer->line_base;
2319       if (accept)
2320         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2321                                "trigraph ??%c converted to %c",
2322                                (int) *end, (int) trigraph_map[*end]);
2323       else
2324         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2325                                "trigraph ??%c ignored", (int) *end);
2326     }
2327   return accept;
2328 }
2329
2330 /* Scan a string for trigraphs, warning or replacing them inline as
2331    appropriate.  When parsing a string, we must call this routine
2332    before processing a newline character (if trigraphs are enabled),
2333    since the newline might be escaped by a preceding backslash
2334    trigraph sequence.  Returns a pointer to the end of the name after
2335    replacement.  */
2336
2337 static unsigned char*
2338 trigraph_replace (pfile, src, limit)
2339      cpp_reader *pfile;
2340      unsigned char *src;
2341      unsigned char* limit;
2342 {
2343   unsigned char *dest;
2344
2345   /* Starting with src[1], find two consecutive '?'.  The case of no
2346      trigraphs is streamlined.  */
2347
2348   for (; src + 1 < limit; src += 2)
2349     {
2350       if (src[0] != '?')
2351         continue;
2352
2353       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
2354       if (src[-1] == '?')
2355         src--;
2356       else if (src + 2 == limit || src[1] != '?')
2357         continue;
2358
2359       /* Check if it really is a trigraph.  */
2360       if (trigraph_map[src[2]] == 0)
2361         continue;
2362
2363       dest = src;
2364       goto trigraph_found;
2365     }
2366   return limit;
2367
2368   /* Now we have a trigraph, we need to scan the remaining buffer, and
2369      copy-shifting its contents left if replacement is enabled.  */
2370   for (; src + 2 < limit; dest++, src++)
2371     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2372       {
2373       trigraph_found:
2374         src += 2;
2375         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2376           *dest = trigraph_map[*src];
2377       }
2378
2379   /* Copy remaining (at most 2) characters.  */
2380   while (src < limit)
2381     *dest++ = *src++;
2382   return dest;
2383 }
2384
2385 /* If CUR is a backslash or the end of a trigraphed backslash, return
2386    a pointer to its beginning, otherwise NULL.  We don't read beyond
2387    the buffer start, because there is the start of the comment in the
2388    buffer.  */
2389 static const unsigned char *
2390 backslash_start (pfile, cur)
2391      cpp_reader *pfile;
2392      const unsigned char *cur;
2393 {
2394   if (cur[0] == '\\')
2395     return cur;
2396   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2397       && trigraph_ok (pfile, cur))
2398     return cur - 2;
2399   return 0;
2400 }
2401
2402 /* Skip a C-style block comment.  This is probably the trickiest
2403    handler.  We find the end of the comment by seeing if an asterisk
2404    is before every '/' we encounter.  The nasty complication is that a
2405    previous asterisk may be separated by one or more escaped newlines.
2406    Returns non-zero if comment terminated by EOF, zero otherwise.  */
2407 static int
2408 skip_block_comment2 (pfile)
2409      cpp_reader *pfile;
2410 {
2411   cpp_buffer *buffer = pfile->buffer;
2412   const unsigned char *char_after_star = 0;
2413   register const unsigned char *cur = buffer->cur;
2414   int seen_eof = 0;
2415
2416   /* Inner loop would think the comment has ended if the first comment
2417      character is a '/'.  Avoid this and keep the inner loop clean by
2418      skipping such a character.  */
2419   if (cur < buffer->rlimit && cur[0] == '/')
2420     cur++;
2421
2422   for (; cur < buffer->rlimit; )
2423     {
2424       unsigned char c = *cur++;
2425
2426       /* People like decorating comments with '*', so check for
2427          '/' instead for efficiency.  */
2428       if (c == '/')
2429         {
2430           if (cur[-2] == '*' || cur - 1 == char_after_star)
2431             goto out;
2432
2433           /* Warn about potential nested comments, but not when
2434              the final character inside the comment is a '/'.
2435              Don't bother to get it right across escaped newlines.  */
2436           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2437               && cur[0] == '*' && cur[1] != '/')
2438             {
2439               buffer->cur = cur;
2440               cpp_warning (pfile, "'/*' within comment");
2441             }
2442         }
2443       else if (IS_NEWLINE(c))
2444         {
2445           const unsigned char* bslash = backslash_start (pfile, cur - 2);
2446
2447           handle_newline (cur, buffer->rlimit, c);
2448           /* Work correctly if there is an asterisk before an
2449              arbirtrarily long sequence of escaped newlines.  */
2450           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2451             char_after_star = cur;
2452           else
2453             char_after_star = 0;
2454         }
2455     }
2456   seen_eof = 1;
2457
2458  out:
2459   buffer->cur = cur;
2460   return seen_eof;
2461 }
2462
2463 /* Skip a C++ or Chill line comment.  Handles escaped newlines.
2464    Returns non-zero if a multiline comment.  */
2465 static int
2466 skip_line_comment2 (pfile)
2467      cpp_reader *pfile;
2468 {
2469   cpp_buffer *buffer = pfile->buffer;
2470   register const unsigned char *cur = buffer->cur;
2471   int multiline = 0;
2472
2473   for (; cur < buffer->rlimit; )
2474     {
2475       unsigned char c = *cur++;
2476
2477       if (IS_NEWLINE (c))
2478         {
2479           /* Check for a (trigaph?) backslash escaping the newline.  */
2480           if (!backslash_start (pfile, cur - 2))
2481             goto out;
2482           multiline = 1;
2483           handle_newline (cur, buffer->rlimit, c);
2484         }
2485     }
2486   cur++;
2487
2488  out:
2489   buffer->cur = cur - 1;        /* Leave newline for caller.  */
2490   return multiline;
2491 }
2492
2493 /* Skips whitespace, stopping at next non-whitespace character.
2494    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
2495    to be assigned the correct column.  */
2496 static void
2497 skip_whitespace (pfile, in_directive)
2498      cpp_reader *pfile;
2499      int in_directive;
2500 {
2501   cpp_buffer *buffer = pfile->buffer;
2502   register const unsigned char *cur = buffer->cur;
2503   unsigned short null_count = 0;
2504
2505   for (; cur < buffer->rlimit; )
2506     {
2507       unsigned char c = *cur++;
2508
2509       if (c == '\t')
2510         {
2511           unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2512           pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2513                                 - col % CPP_OPTION(pfile, tabstop));
2514         }
2515       if (IS_HSPACE(c))         /* FIXME: Fix ISTABLE.  */
2516         continue;
2517       if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines.  */
2518         goto out;
2519       if (c == '\0')
2520         null_count++;
2521       /* Mut be '\f' or '\v' */
2522       else if (in_directive && CPP_PEDANTIC (pfile))
2523         cpp_pedwarn (pfile, "%s in preprocessing directive",
2524                      c == '\f' ? "formfeed" : "vertical tab");
2525     }
2526   cur++;
2527
2528  out:
2529   buffer->cur = cur - 1;
2530   if (null_count)
2531     cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2532                  : "embedded null character ignored");
2533 }
2534
2535 /* Parse (append) an identifier.  */
2536 static void
2537 parse_name (pfile, list, name)
2538      cpp_reader *pfile;
2539      cpp_toklist *list;
2540      cpp_name *name;
2541 {
2542   const unsigned char *name_limit;
2543   unsigned char *namebuf;
2544   cpp_buffer *buffer = pfile->buffer;
2545   register const unsigned char *cur = buffer->cur;
2546
2547  expanded:
2548   name_limit = list->namebuf + list->name_cap;
2549   namebuf = list->namebuf + list->name_used;
2550
2551   for (; cur < buffer->rlimit && namebuf < name_limit; )
2552     {
2553       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
2554
2555       if (! is_idchar(c))
2556         goto out;
2557       namebuf++;
2558       cur++;
2559       if (c == '$' && CPP_PEDANTIC (pfile))
2560         {
2561           buffer->cur = cur;
2562           cpp_pedwarn (pfile, "'$' character in identifier");
2563         }
2564     }
2565
2566   /* Run out of name space?  */
2567   if (cur < buffer->rlimit)
2568     {
2569       list->name_used = namebuf - list->namebuf;
2570       auto_expand_name_space (list);
2571       goto expanded;
2572     }
2573
2574  out:
2575   buffer->cur = cur;
2576   name->len = namebuf - name->text;
2577   list->name_used = namebuf - list->namebuf;
2578 }
2579
2580 /* Parse (append) a number.  */
2581
2582 #define VALID_SIGN(c, prevc) \
2583   (((c) == '+' || (c) == '-') && \
2584    ((prevc) == 'e' || (prevc) == 'E' \
2585     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2586
2587 static void
2588 parse_number (pfile, list, name)
2589      cpp_reader *pfile;
2590      cpp_toklist *list;
2591      cpp_name *name;
2592 {
2593   const unsigned char *name_limit;
2594   unsigned char *namebuf;
2595   cpp_buffer *buffer = pfile->buffer;
2596   register const unsigned char *cur = buffer->cur;
2597
2598  expanded:
2599   name_limit = list->namebuf + list->name_cap;
2600   namebuf = list->namebuf + list->name_used;
2601
2602   for (; cur < buffer->rlimit && namebuf < name_limit; )
2603     {
2604       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
2605
2606       /* Perhaps we should accept '$' here if we accept it for
2607          identifiers.  We know namebuf[-1] is safe, because for c to
2608          be a sign we must have pushed at least one character.  */
2609       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2610         goto out;
2611
2612       namebuf++;
2613       cur++;
2614     }
2615
2616   /* Run out of name space?  */
2617   if (cur < buffer->rlimit)
2618     {
2619       list->name_used = namebuf - list->namebuf;
2620       auto_expand_name_space (list);
2621       goto expanded;
2622     }
2623
2624  out:
2625   buffer->cur = cur;
2626   name->len = namebuf - name->text;
2627   list->name_used = namebuf - list->namebuf;
2628 }
2629
2630 /* Places a string terminated by an unescaped TERMINATOR into a
2631    cpp_name, which should be expandable and thus at the top of the
2632    list's stack.  Handles embedded trigraphs, if necessary, and
2633    escaped newlines.
2634
2635    Can be used for character constants (terminator = '\''), string
2636    constants ('"') and angled headers ('>').  Multi-line strings are
2637    allowed, except for within directives.  */
2638
2639 static void
2640 parse_string2 (pfile, list, name, terminator, multiline_ok)
2641      cpp_reader *pfile;
2642      cpp_toklist *list;
2643      cpp_name *name;
2644      unsigned int terminator;
2645      int multiline_ok;
2646 {
2647   cpp_buffer *buffer = pfile->buffer;
2648   register const unsigned char *cur = buffer->cur;
2649   const unsigned char *name_limit;
2650   unsigned char *namebuf;
2651   unsigned int null_count = 0;
2652   int trigraphed_len = 0;
2653
2654  expanded:
2655   name_limit = list->namebuf + list->name_cap;
2656   namebuf = list->namebuf + list->name_used;
2657
2658   for (; cur < buffer->rlimit && namebuf < name_limit; )
2659     {
2660       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
2661
2662       if (c == '\0')
2663         null_count++;
2664       else if (c == terminator || IS_NEWLINE (c))
2665         {
2666           /* Needed for trigraph_replace and multiline string warning.  */
2667           buffer->cur = cur;
2668
2669           /* Scan for trigraphs before checking if backslash-escaped.  */
2670           if (CPP_OPTION (pfile, trigraphs)
2671               || CPP_OPTION (pfile, warn_trigraphs))
2672             {
2673               namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2674                                             namebuf);
2675               trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2676               if (trigraphed_len < 0)
2677                 trigraphed_len = 0;
2678             }
2679
2680           namebuf--;     /* Drop the newline / terminator from the name.  */
2681           if (IS_NEWLINE (c))
2682             {
2683               /* Drop a backslash newline, and continue. */
2684               if (namebuf[-1] == '\\')
2685                 {
2686                   handle_newline (cur, buffer->rlimit, c);
2687                   namebuf--;
2688                   continue;
2689                 }
2690
2691               cur--;
2692
2693               /* In Fortran and assembly language, silently terminate
2694                  strings of either variety at end of line.  This is a
2695                  kludge around not knowing where comments are in these
2696                  languages.  */
2697               if (CPP_OPTION (pfile, lang_fortran)
2698                   || CPP_OPTION (pfile, lang_asm))
2699                 goto out;
2700
2701               /* Character constants, headers and asserts may not
2702                  extend over multiple lines.  In Standard C, neither
2703                  may strings.  We accept multiline strings as an
2704                  extension, but not in directives.  */
2705               if (!multiline_ok)
2706                 goto unterminated;
2707
2708               cur++;  /* Move forwards again.  */
2709
2710               if (pfile->multiline_string_line == 0)
2711                 {
2712                   pfile->multiline_string_line = list->line;
2713                   if (CPP_PEDANTIC (pfile))
2714                     cpp_pedwarn (pfile, "multi-line string constant");
2715                 }
2716
2717               *namebuf++ = '\n';
2718               handle_newline (cur, buffer->rlimit, c);
2719             }
2720           else
2721             {
2722               unsigned char *temp;
2723
2724               /* An odd number of consecutive backslashes represents
2725                  an escaped terminator.  */
2726               temp = namebuf - 1;
2727               while (temp >= name->text && *temp == '\\')
2728                 temp--;
2729
2730               if ((namebuf - temp) & 1)
2731                 goto out;
2732               namebuf++;
2733             }
2734         }
2735     }
2736
2737   /* Run out of name space?  */
2738   if (cur < buffer->rlimit)
2739     {
2740       list->name_used = namebuf - list->namebuf;
2741       auto_expand_name_space (list);
2742       goto expanded;
2743     }
2744
2745   /* We may not have trigraph-replaced the input for this code path,
2746      but as the input is in error by being unterminated we don't
2747      bother.  Prevent warnings about no newlines at EOF.  */
2748   if (IS_NEWLINE(cur[-1]))
2749     cur--;
2750
2751  unterminated:
2752   cpp_error (pfile, "missing terminating %c character", (int) terminator);
2753
2754   if (terminator == '\"' && pfile->multiline_string_line != list->line
2755       && pfile->multiline_string_line != 0)
2756     {
2757       cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2758                            "possible start of unterminated string literal");
2759       pfile->multiline_string_line = 0;
2760     }
2761
2762  out:
2763   buffer->cur = cur;
2764   name->len = namebuf - name->text;
2765   list->name_used = namebuf - list->namebuf;
2766
2767   if (null_count > 0)
2768     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2769                          : "null character preserved"));
2770 }
2771
2772 /* The character TYPE helps us distinguish comment types: '*' = C
2773    style, '-' = Chill-style and '/' = C++ style.  For code simplicity,
2774    the stored comment includes the comment start and any terminator.  */
2775
2776 #define COMMENT_START_LEN 2
2777 static void
2778 save_comment (list, token, from, len, type)
2779      cpp_toklist *list;
2780      cpp_token *token;
2781      const unsigned char *from;
2782      unsigned int len;
2783      unsigned int type;
2784 {
2785   unsigned char *buffer;
2786
2787   len += COMMENT_START_LEN;
2788
2789   if (list->name_used + len > list->name_cap)
2790     expand_name_space (list, len);
2791
2792   INIT_TOKEN_NAME (list, token);
2793   token->type = CPP_COMMENT;
2794   token->val.name.len = len;
2795
2796   buffer = list->namebuf + list->name_used;
2797   list->name_used += len;
2798
2799   /* Copy the comment.  */
2800   if (type == '*')
2801     {
2802       *buffer++ = '/';
2803       *buffer++ = '*';
2804     }
2805   else
2806     {
2807       *buffer++ = type;
2808       *buffer++ = type;
2809     }
2810   memcpy (buffer, from, len - COMMENT_START_LEN);
2811 }
2812
2813 /*
2814  *  The tokenizer's main loop.  Returns a token list, representing a
2815  *  logical line in the input file.  On EOF after some tokens have
2816  *  been processed, we return immediately.  Then in next call, or if
2817  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
2818  *  token is placed in the list.
2819  *
2820  *  Implementation relies almost entirely on lookback, rather than
2821  *  looking forwards.  This means that tokenization requires just
2822  *  a single pass of the file, even in the presence of trigraphs and
2823  *  escaped newlines, providing significant performance benefits.
2824  *  Trigraph overhead is negligible if they are disabled, and low
2825  *  even when enabled.
2826  */
2827
2828 #define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
2829
2830 void
2831 _cpp_lex_line (pfile, list)
2832      cpp_reader *pfile;
2833      cpp_toklist *list;
2834 {
2835   cpp_token *cur_token, *token_limit;
2836   cpp_buffer *buffer = pfile->buffer;
2837   register const unsigned char *cur = buffer->cur;
2838   unsigned char flags = 0;
2839   unsigned int first_token = list->tokens_used;
2840
2841   list->line = CPP_BUF_LINE (buffer);
2842   pfile->col_adjust = 0;
2843  expanded:
2844   token_limit = list->tokens + list->tokens_cap;
2845   cur_token = list->tokens + list->tokens_used;
2846
2847   for (; cur < buffer->rlimit && cur_token < token_limit;)
2848     {
2849       unsigned char c = *cur++;
2850
2851       /* Optimize whitespace skipping, as most tokens are probably
2852          separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
2853
2854       if (is_hspace ((unsigned int) c))
2855         {
2856           /* Step back to get the null warning and tab correction.  */
2857           buffer->cur = cur - 1;
2858           skip_whitespace (pfile, IS_DIRECTIVE ());
2859           cur = buffer->cur;
2860
2861           flags = PREV_WHITESPACE;
2862           if (cur == buffer->rlimit)
2863             break;
2864           c = *cur++;
2865         }
2866
2867       /* Initialize current token.  Its type is set in the switch.  */
2868       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
2869       cur_token->flags = flags;
2870       flags = 0;
2871
2872       switch (c)
2873         {
2874         case '0': case '1': case '2': case '3': case '4':
2875         case '5': case '6': case '7': case '8': case '9':
2876           {
2877             int prev_dot;
2878
2879             cur--;              /* Backup character.  */
2880             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
2881             if (prev_dot)
2882               cur_token--;
2883             INIT_TOKEN_NAME (list, cur_token);
2884             /* Prepend an immediately previous CPP_DOT token.  */
2885             if (prev_dot)
2886               {
2887                 if (list->name_cap == list->name_used)
2888                   auto_expand_name_space (list);
2889
2890                 cur_token->val.name.len = 1;
2891                 list->namebuf[list->name_used++] = '.';
2892               }
2893
2894           continue_number:
2895             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
2896             buffer->cur = cur;
2897             parse_number (pfile, list, &cur_token->val.name);
2898             cur = buffer->cur;
2899             cur_token++;
2900           }
2901           break;
2902
2903         letter:
2904         case '_':
2905         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2906         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2907         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2908         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2909         case 'y': case 'z':
2910         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2911         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2912         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2913         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2914         case 'Y': case 'Z':
2915           cur--;                     /* Backup character.  */
2916           INIT_TOKEN_NAME (list, cur_token);
2917           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
2918
2919         continue_name:
2920           buffer->cur = cur;
2921           parse_name (pfile, list, &cur_token->val.name);
2922           cur = buffer->cur;
2923
2924           /* Find handler for newly created / extended directive.  */
2925           if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
2926             _cpp_check_directive (list, cur_token);
2927           cur_token++;
2928           break;
2929
2930         case '\'':
2931           /* Fall through.  */
2932         case '\"':
2933           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2934           /* Do we have a wide string?  */
2935           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2936               && cur_token[-1].val.name.len == 1
2937               && cur_token[-1].val.name.text[0] == 'L'
2938               && !CPP_TRADITIONAL (pfile))
2939             {
2940               /* No need for 'L' any more.  */
2941               list->name_used--;
2942               (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2943             }
2944
2945         do_parse_string:
2946           /* Here c is one of ' " or >.  */
2947           INIT_TOKEN_NAME (list, cur_token);
2948           buffer->cur = cur;
2949           parse_string2 (pfile, list, &cur_token->val.name, c,
2950                          c == '"' && !IS_DIRECTIVE());
2951           cur = buffer->cur;
2952           cur_token++;
2953           break;
2954
2955         case '/':
2956           cur_token->type = CPP_DIV;
2957           if (IMMED_TOKEN ())
2958             {
2959               if (PREV_TOKEN_TYPE == CPP_DIV)
2960                 {
2961                   /* We silently allow C++ comments in system headers,
2962                      irrespective of conformance mode, because lots of
2963                      broken systems do that and trying to clean it up
2964                      in fixincludes is a nightmare.  */
2965                   if (buffer->system_header_p)
2966                     goto do_line_comment;
2967                   else if (CPP_OPTION (pfile, cplusplus_comments))
2968                     {
2969                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2970                           && ! buffer->warned_cplusplus_comments)
2971                         {
2972                           buffer->cur = cur;
2973                           cpp_pedwarn (pfile,
2974                              "C++ style comments are not allowed in ISO C89");
2975                           cpp_pedwarn (pfile,
2976                           "(this will be reported only once per input file)");
2977                           buffer->warned_cplusplus_comments = 1;
2978                         }
2979                     do_line_comment:
2980                       buffer->cur = cur;
2981                       if (cur[-2] != c)
2982                         cpp_warning (pfile,
2983                                      "comment start split across lines");
2984                       if (skip_line_comment2 (pfile))
2985                         cpp_error_with_line (pfile, list->line,
2986                                              cur_token[-1].col,
2987                                              "multi-line comment");
2988
2989                       /* Back-up to first '-' or '/'.  */
2990                       cur_token--;
2991                       if (!CPP_OPTION (pfile, discard_comments)
2992                           && (!IS_DIRECTIVE() || list->dirno == 0))
2993                         save_comment (list, cur_token++, cur,
2994                                       buffer->cur - cur, c);
2995                       cur = buffer->cur;
2996
2997                       if (!CPP_OPTION (pfile, traditional))
2998                         flags = PREV_WHITESPACE;
2999                       break;
3000                     }
3001                 }
3002             }
3003           cur_token++;
3004           break;
3005
3006         case '*':
3007           cur_token->type = CPP_MULT;
3008           if (IMMED_TOKEN ())
3009             {
3010               if (PREV_TOKEN_TYPE == CPP_DIV)
3011                 {
3012                   buffer->cur = cur;
3013                   if (cur[-2] != '/')
3014                     cpp_warning (pfile,
3015                                  "comment start '/*' split across lines");
3016                   if (skip_block_comment2 (pfile))
3017                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3018                                          "unterminated comment");
3019                   else if (buffer->cur[-2] != '*')
3020                     cpp_warning (pfile,
3021                                  "comment end '*/' split across lines");
3022
3023                   /* Back up to opening '/'.  */
3024                   cur_token--;
3025                   if (!CPP_OPTION (pfile, discard_comments)
3026                       && (!IS_DIRECTIVE() || list->dirno == 0))
3027                     save_comment (list, cur_token++, cur,
3028                                   buffer->cur - cur, c);
3029                   cur = buffer->cur;
3030
3031                   if (!CPP_OPTION (pfile, traditional))
3032                     flags = PREV_WHITESPACE;
3033                   break;
3034                 }
3035               else if (CPP_OPTION (pfile, cplusplus))
3036                 {
3037                   /* In C++, there are .* and ->* operators.  */
3038                   if (PREV_TOKEN_TYPE == CPP_DEREF)
3039                     BACKUP_TOKEN (CPP_DEREF_STAR);
3040                   else if (PREV_TOKEN_TYPE == CPP_DOT)
3041                     BACKUP_TOKEN (CPP_DOT_STAR);
3042                 }
3043             }
3044           cur_token++;
3045           break;
3046
3047         case '\n':
3048         case '\r':
3049           handle_newline (cur, buffer->rlimit, c);
3050           if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3051             {
3052               /* Remove the escaped newline.  Then continue to process
3053                  any interrupted name or number.  */
3054               cur_token--;
3055               if (IMMED_TOKEN ())
3056                 {
3057                   cur_token--;
3058                   if (cur_token->type == CPP_NAME)
3059                     goto continue_name;
3060                   else if (cur_token->type == CPP_NUMBER)
3061                     goto continue_number;
3062                   cur_token++;
3063                 }
3064               /* Remember whitespace setting.  */
3065               flags = cur_token->flags;
3066               break;
3067             }
3068           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3069             {
3070               buffer->cur = cur;
3071               cpp_warning (pfile, "backslash and newline separated by space");
3072             }
3073           /* Skip vertical space until we have at least one token to
3074              return.  */
3075           if (cur_token != &list->tokens[first_token])
3076             goto out;
3077           list->line = CPP_BUF_LINE (buffer);
3078           break;
3079
3080         case '-':
3081           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3082             {
3083               if (CPP_OPTION (pfile, chill))
3084                 goto do_line_comment;
3085               REVISE_TOKEN (CPP_MINUS_MINUS);
3086             }
3087           else
3088             PUSH_TOKEN (CPP_MINUS);
3089           break;
3090
3091           /* The digraph flag checking ensures that ## and %:%:
3092              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
3093         make_hash:
3094         case '#':
3095           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3096               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3097             REVISE_TOKEN (CPP_PASTE);
3098           else
3099             PUSH_TOKEN (CPP_HASH);
3100           break;
3101
3102         case ':':
3103           cur_token->type = CPP_COLON;
3104           if (IMMED_TOKEN ())
3105             {
3106               if (PREV_TOKEN_TYPE == CPP_COLON
3107                   && CPP_OPTION (pfile, cplusplus))
3108                 BACKUP_TOKEN (CPP_SCOPE);
3109               /* Digraph: "<:" is a '['  */
3110               else if (PREV_TOKEN_TYPE == CPP_LESS)
3111                 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3112               /* Digraph: "%:" is a '#'  */
3113               else if (PREV_TOKEN_TYPE == CPP_MOD)
3114                 {
3115                   (--cur_token)->flags |= DIGRAPH;
3116                   goto make_hash;
3117                 }
3118             }
3119           cur_token++;
3120           break;
3121
3122         case '&':
3123           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3124             REVISE_TOKEN (CPP_AND_AND);
3125           else
3126             PUSH_TOKEN (CPP_AND);
3127           break;
3128
3129         make_or:
3130         case '|':
3131           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3132             REVISE_TOKEN (CPP_OR_OR);
3133           else
3134             PUSH_TOKEN (CPP_OR);
3135           break;
3136
3137         case '+':
3138           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3139             REVISE_TOKEN (CPP_PLUS_PLUS);
3140           else
3141             PUSH_TOKEN (CPP_PLUS);
3142           break;
3143
3144         case '=':
3145             /* This relies on equidistance of "?=" and "?" tokens.  */
3146           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3147             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3148           else
3149             PUSH_TOKEN (CPP_EQ);
3150           break;
3151
3152         case '>':
3153           cur_token->type = CPP_GREATER;
3154           if (IMMED_TOKEN ())
3155             {
3156               if (PREV_TOKEN_TYPE == CPP_GREATER)
3157                 BACKUP_TOKEN (CPP_RSHIFT);
3158               else if (PREV_TOKEN_TYPE == CPP_MINUS)
3159                 BACKUP_TOKEN (CPP_DEREF);
3160               /* Digraph: ":>" is a ']'  */
3161               else if (PREV_TOKEN_TYPE == CPP_COLON)
3162                 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3163               /* Digraph: "%>" is a '}'  */
3164               else if (PREV_TOKEN_TYPE == CPP_MOD)
3165                 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3166             }
3167           cur_token++;
3168           break;
3169
3170         case '<':
3171           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3172             {
3173               REVISE_TOKEN (CPP_LSHIFT);
3174               break;
3175             }
3176           /* Is this the beginning of a header name?  */
3177           if (list->flags & SYNTAX_INCLUDE)
3178             {
3179               c = '>';  /* Terminator.  */
3180               cur_token->type = CPP_HEADER_NAME;
3181               goto do_parse_string;
3182             }
3183           PUSH_TOKEN (CPP_LESS);
3184           break;
3185
3186         case '%':
3187           /* Digraph: "<%" is a '{'  */
3188           cur_token->type = CPP_MOD;
3189           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3190             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3191           cur_token++;
3192           break;
3193
3194         case '?':
3195           if (cur + 1 < buffer->rlimit && *cur == '?'
3196               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3197             {
3198               /* Handle trigraph.  */
3199               cur++;
3200               switch (*cur++)
3201                 {
3202                 case '(': goto make_open_square;
3203                 case ')': goto make_close_square;
3204                 case '<': goto make_open_brace;
3205                 case '>': goto make_close_brace;
3206                 case '=': goto make_hash;
3207                 case '!': goto make_or;
3208                 case '-': goto make_complement;
3209                 case '/': goto make_backslash;
3210                 case '\'': goto make_xor;
3211                 }
3212             }
3213           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3214             {
3215               /* GNU C++ defines <? and >? operators.  */
3216               if (PREV_TOKEN_TYPE == CPP_LESS)
3217                 {
3218                   REVISE_TOKEN (CPP_MIN);
3219                   break;
3220                 }
3221               else if (PREV_TOKEN_TYPE == CPP_GREATER)
3222                 {
3223                   REVISE_TOKEN (CPP_MAX);
3224                   break;
3225                 }
3226             }
3227           PUSH_TOKEN (CPP_QUERY);
3228           break;
3229
3230         case '.':
3231           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3232               && IMMED_TOKEN ()
3233               && !(cur_token[-1].flags & PREV_WHITESPACE))
3234             {
3235               cur_token -= 2;
3236               PUSH_TOKEN (CPP_ELLIPSIS);
3237             }
3238           else
3239             PUSH_TOKEN (CPP_DOT);
3240           break;
3241
3242         make_complement:
3243         case '~': PUSH_TOKEN (CPP_COMPL); break;
3244         make_xor:
3245         case '^': PUSH_TOKEN (CPP_XOR); break;
3246         make_open_brace:
3247         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3248         make_close_brace:
3249         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3250         make_open_square:
3251         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3252         make_close_square:
3253         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3254         make_backslash:
3255         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3256         case '!': PUSH_TOKEN (CPP_NOT); break;
3257         case ',': PUSH_TOKEN (CPP_COMMA); break;
3258         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3259         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
3260         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3261
3262         case '$':
3263           if (CPP_OPTION (pfile, dollars_in_ident))
3264             goto letter;
3265           /* Fall through */
3266         default:
3267           cur_token->aux = c;
3268           cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3269           PUSH_TOKEN (CPP_OTHER);
3270           break;
3271         }
3272     }
3273
3274   /* Run out of token space?  */
3275   if (cur_token == token_limit)
3276     {
3277       list->tokens_used = cur_token - list->tokens;
3278       _cpp_expand_token_space (list, 256);
3279       goto expanded;
3280     }
3281
3282   cur_token->flags = flags;
3283   if (cur_token == &list->tokens[first_token])
3284     {
3285       /* FIXME: move this warning to callers who care.  */
3286       if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
3287         cpp_warning (pfile, "no newline at end of file");
3288       cur_token++->type = CPP_EOF;
3289     }
3290
3291  out:
3292   list->tokens[first_token].flags |= BOL;
3293   buffer->cur = cur;
3294   list->tokens_used = cur_token - list->tokens;
3295 }
3296
3297 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
3298    already contain the enough space to hold the token's spelling.  If
3299    WHITESPACE is true, and the token was preceded by whitespace,
3300    output a single space before the token proper.  Returns a pointer
3301    to the character after the last character written.  */
3302
3303 static unsigned char *
3304 spell_token (pfile, token, buffer, whitespace)
3305      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
3306      const cpp_token *token;
3307      unsigned char *buffer;
3308      int whitespace;
3309 {
3310   /* Whitespace will not be wanted by handlers of the # and ##
3311      operators calling this function, but will be wanted by the
3312      function that writes out the preprocessed file.  */
3313   if (whitespace && token->flags & PREV_WHITESPACE)
3314     *buffer++ = ' ';
3315
3316   switch (token_spellings[token->type].type)
3317     {
3318     case SPELL_OPERATOR:
3319       {
3320         const unsigned char *spelling;
3321         unsigned char c;
3322
3323         if (token->flags & DIGRAPH)
3324           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3325         else
3326           spelling = token_spellings[token->type].spelling;
3327
3328         while ((c = *spelling++) != '\0')
3329           *buffer++ = c;
3330       }
3331       break;
3332
3333     case SPELL_IDENT:
3334       memcpy (buffer, token->val.name.text, token->val.name.len);
3335       buffer += token->val.name.len;
3336       break;
3337
3338     case SPELL_STRING:
3339       {
3340         unsigned char c;
3341
3342         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3343           *buffer++ = 'L';
3344         c = '\'';
3345         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3346           c = '"';
3347         *buffer++ = c;
3348         memcpy (buffer, token->val.name.text, token->val.name.len);
3349         buffer += token->val.name.len;
3350         *buffer++ = c;
3351       }
3352       break;
3353
3354     case SPELL_CHAR:
3355       *buffer++ = token->aux;
3356       break;
3357
3358     case SPELL_NONE:
3359       cpp_ice (pfile, "Unspellable token");
3360       break;
3361     }
3362
3363   return buffer;
3364 }
3365
3366 /* Temporary function for illustrative purposes.  */
3367 void
3368 _cpp_lex_file (pfile)
3369      cpp_reader* pfile;
3370 {
3371   cpp_toklist* list;
3372
3373   init_trigraph_map ();
3374   list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3375   _cpp_init_toklist (list, DUMMY_TOKEN);
3376
3377   for (;;)
3378     {
3379       _cpp_lex_line (pfile, list);
3380       if (list->tokens[0].type == CPP_EOF)
3381         break;
3382
3383 #if 0
3384       if (list->dirno)
3385         _cpp_handle_directive (pfile, list);
3386       else
3387 #endif
3388         _cpp_output_list (pfile, list);
3389       _cpp_clear_toklist (list);
3390     }
3391 }
3392
3393 /* Temporary function for illustrative purposes.  */
3394 static void
3395 _cpp_output_list (pfile, list)
3396      cpp_reader *pfile;
3397      cpp_toklist *list;
3398 {
3399   unsigned int i;
3400
3401   for (i = 0; i < list->tokens_used; i++)
3402     {
3403       CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
3404       pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);
3405     }
3406 }
3407
3408 #endif