avr.c (asm_output_section_name): output section attributes.
[gcc.git] / gcc / cpplex.c
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
8
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "intl.h"
26 #include "cpplib.h"
27 #include "cpphash.h"
28
29 #ifdef HAVE_MMAP_FILE
30 # include <sys/mman.h>
31 #endif
32
33 #define PEEKBUF(BUFFER, N) \
34 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
35 #define GETBUF(BUFFER) \
36 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
37 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
38
39 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
40 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
41 #define GETC() GETBUF (CPP_BUFFER (pfile))
42 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
43
44 static void skip_block_comment PARAMS ((cpp_reader *));
45 static void skip_line_comment PARAMS ((cpp_reader *));
46 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
47 static int skip_comment PARAMS ((cpp_reader *, int));
48 static int copy_comment PARAMS ((cpp_reader *, int));
49 static void skip_string PARAMS ((cpp_reader *, int));
50 static void parse_string PARAMS ((cpp_reader *, int));
51 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
52 static void null_warning PARAMS ((cpp_reader *, unsigned int));
53
54 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
55 size_t, FILE *));
56 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
57 unsigned int));
58 static void bump_column PARAMS ((cpp_printer *, unsigned int,
59 unsigned int));
60 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
61 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
62 unsigned int));
63
64 #define auto_expand_name_space(list) \
65 expand_name_space ((list), 1 + (list)->name_cap / 2)
66
67 #ifdef NEW_LEXER
68
69 void init_trigraph_map PARAMS ((void));
70 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
71 unsigned char *));
72 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
73 const unsigned char *));
74 static int skip_block_comment2 PARAMS ((cpp_reader *));
75 static int skip_line_comment2 PARAMS ((cpp_reader *));
76 static void skip_whitespace PARAMS ((cpp_reader *, int));
77 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
78 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
79 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
80 unsigned int, int));
81 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
82 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
83 const unsigned char *,
84 unsigned int, unsigned int));
85 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
86
87 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
88
89 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
90 unsigned char *, int));
91
92 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
93 cpp_token *));
94
95 /* Macros on a cpp_name. */
96 #define INIT_TOKEN_NAME(list, token) \
97 do {(token)->val.name.len = 0; \
98 (token)->val.name.text = (list)->namebuf + (list)->name_used; \
99 (list)->tokens_used = token - (list)->tokens + 1; \
100 } while (0)
101
102 /* Maybe put these in the ISTABLE eventually. */
103 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
104 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
105
106 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
107 character, if any, is in buffer. */
108 #define handle_newline(cur, limit, c) \
109 do {\
110 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
111 (cur)++; \
112 CPP_BUMP_LINE_CUR (pfile, (cur)); \
113 pfile->col_adjust = 0; \
114 } while (0)
115
116 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
117 #define PREV_TOKEN_TYPE (cur_token[-1].type)
118
119 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
120 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
121 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
122 #define BACKUP_DIGRAPH(ttype) do { \
123 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
124
125 /* An upper bound on the number of bytes needed to spell a token,
126 including preceding whitespace. */
127 #define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
128 SPELL_NONE ? (token)->val.name.len: 0))
129
130 #endif
131
132 /* Order here matters. Those beyond SPELL_NONE store their spelling
133 in the token list, and it's length in the token->val.name.len. */
134 enum spell_type
135 {
136 SPELL_OPERATOR = 0,
137 SPELL_NONE,
138 SPELL_CHAR, /* FIXME: revert order of NONE and CHAR after transition. */
139 SPELL_IDENT,
140 SPELL_STRING
141 };
142
143 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
144 #define I(e, s) {SPELL_IDENT, s},
145 #define S(e, s) {SPELL_STRING, s},
146 #define C(e, s) {SPELL_CHAR, s},
147 #define N(e, s) {SPELL_NONE, s},
148
149 static const struct token_spelling
150 {
151 ENUM_BITFIELD(spell_type) type : CHAR_BIT;
152 const U_CHAR *spelling;
153 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
154
155 #undef T
156 #undef I
157 #undef S
158 #undef C
159 #undef N
160
161 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
162
163 void
164 _cpp_grow_token_buffer (pfile, n)
165 cpp_reader *pfile;
166 long n;
167 {
168 long old_written = CPP_WRITTEN (pfile);
169 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
170 pfile->token_buffer = (U_CHAR *)
171 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
172 CPP_SET_WRITTEN (pfile, old_written);
173 }
174
175 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
176 If BUFFER != NULL, then use the LENGTH characters in BUFFER
177 as the new input buffer.
178 Return the new buffer, or NULL on failure. */
179
180 cpp_buffer *
181 cpp_push_buffer (pfile, buffer, length)
182 cpp_reader *pfile;
183 const U_CHAR *buffer;
184 long length;
185 {
186 cpp_buffer *buf = CPP_BUFFER (pfile);
187 cpp_buffer *new;
188 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
189 {
190 cpp_fatal (pfile, "macro or `#include' recursion too deep");
191 return NULL;
192 }
193
194 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
195
196 new->buf = new->cur = buffer;
197 new->rlimit = buffer + length;
198 new->prev = buf;
199 new->mark = NULL;
200 new->line_base = NULL;
201
202 CPP_BUFFER (pfile) = new;
203 return new;
204 }
205
206 cpp_buffer *
207 cpp_pop_buffer (pfile)
208 cpp_reader *pfile;
209 {
210 cpp_buffer *buf = CPP_BUFFER (pfile);
211 if (ACTIVE_MARK_P (pfile))
212 cpp_ice (pfile, "mark active in cpp_pop_buffer");
213
214 if (buf->ihash)
215 {
216 _cpp_unwind_if_stack (pfile, buf);
217 if (buf->buf)
218 free ((PTR) buf->buf);
219 if (pfile->system_include_depth)
220 pfile->system_include_depth--;
221 if (pfile->potential_control_macro)
222 {
223 buf->ihash->cmacro = pfile->potential_control_macro;
224 pfile->potential_control_macro = 0;
225 }
226 pfile->input_stack_listing_current = 0;
227 }
228 else if (buf->macro)
229 {
230 cpp_hashnode *m = buf->macro;
231
232 m->disabled = 0;
233 if ((m->type == T_FMACRO && buf->mapped)
234 || m->type == T_SPECLINE || m->type == T_FILE
235 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
236 || m->type == T_STDC)
237 free ((PTR) buf->buf);
238 }
239 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
240 free (buf);
241 pfile->buffer_stack_depth--;
242 return CPP_BUFFER (pfile);
243 }
244
245 /* Deal with the annoying semantics of fwrite. */
246 static void
247 safe_fwrite (pfile, buf, len, fp)
248 cpp_reader *pfile;
249 const U_CHAR *buf;
250 size_t len;
251 FILE *fp;
252 {
253 size_t count;
254
255 while (len)
256 {
257 count = fwrite (buf, 1, len, fp);
258 if (count == 0)
259 goto error;
260 len -= count;
261 buf += count;
262 }
263 return;
264
265 error:
266 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
267 }
268
269 /* Notify the compiler proper that the current line number has jumped,
270 or the current file name has changed. */
271
272 static void
273 output_line_command (pfile, print, line)
274 cpp_reader *pfile;
275 cpp_printer *print;
276 unsigned int line;
277 {
278 cpp_buffer *ip = cpp_file_buffer (pfile);
279 enum { same = 0, enter, leave, rname } change;
280 static const char * const codes[] = { "", " 1", " 2", "" };
281
282 if (CPP_OPTION (pfile, no_line_commands))
283 return;
284
285 /* Determine whether the current filename has changed, and if so,
286 how. 'nominal_fname' values are unique, so they can be compared
287 by comparing pointers. */
288 if (ip->nominal_fname == print->last_fname)
289 change = same;
290 else
291 {
292 if (pfile->buffer_stack_depth == print->last_bsd)
293 change = rname;
294 else
295 {
296 if (pfile->buffer_stack_depth > print->last_bsd)
297 change = enter;
298 else
299 change = leave;
300 print->last_bsd = pfile->buffer_stack_depth;
301 }
302 print->last_fname = ip->nominal_fname;
303 }
304 /* If the current file has not changed, we can output a few newlines
305 instead if we want to increase the line number by a small amount.
306 We cannot do this if print->lineno is zero, because that means we
307 haven't output any line commands yet. (The very first line
308 command output is a `same_file' command.) */
309 if (change == same && print->lineno != 0
310 && line >= print->lineno && line < print->lineno + 8)
311 {
312 while (line > print->lineno)
313 {
314 putc ('\n', print->outf);
315 print->lineno++;
316 }
317 return;
318 }
319
320 #ifndef NO_IMPLICIT_EXTERN_C
321 if (CPP_OPTION (pfile, cplusplus))
322 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
323 codes[change],
324 ip->system_header_p ? " 3" : "",
325 (ip->system_header_p == 2) ? " 4" : "");
326 else
327 #endif
328 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
329 codes[change],
330 ip->system_header_p ? " 3" : "");
331 print->lineno = line;
332 }
333
334 /* Write the contents of the token_buffer to the output stream, and
335 clear the token_buffer. Also handles generating line commands and
336 keeping track of file transitions. */
337
338 void
339 cpp_output_tokens (pfile, print)
340 cpp_reader *pfile;
341 cpp_printer *print;
342 {
343 cpp_buffer *ip;
344
345 if (CPP_WRITTEN (pfile) - print->written)
346 {
347 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
348 print->lineno++;
349 safe_fwrite (pfile, pfile->token_buffer,
350 CPP_WRITTEN (pfile) - print->written, print->outf);
351 }
352
353 ip = cpp_file_buffer (pfile);
354 if (ip)
355 output_line_command (pfile, print, CPP_BUF_LINE (ip));
356
357 CPP_SET_WRITTEN (pfile, print->written);
358 }
359
360 /* Helper for cpp_output_list - increases the column number to match
361 what we expect it to be. */
362
363 static void
364 bump_column (print, from, to)
365 cpp_printer *print;
366 unsigned int from, to;
367 {
368 unsigned int tabs, spcs;
369 unsigned int delta = to - from;
370
371 /* Only if FROM is 0, advance by tabs. */
372 if (from == 0)
373 tabs = delta / 8, spcs = delta % 8;
374 else
375 tabs = 0, spcs = delta;
376
377 while (tabs--) putc ('\t', print->outf);
378 while (spcs--) putc (' ', print->outf);
379 }
380
381 /* Write out the list L onto pfile->token_buffer. This function is
382 incomplete:
383
384 1) pfile->token_buffer is not going to continue to exist.
385 2) At the moment, tokens don't carry the information described
386 in cpplib.h; they are all strings.
387 3) The list has to be a complete line, and has to be written starting
388 at the beginning of a line. */
389
390 void
391 cpp_output_list (pfile, print, list)
392 cpp_reader *pfile;
393 cpp_printer *print;
394 const cpp_toklist *list;
395 {
396 unsigned int i;
397 unsigned int curcol = 1;
398
399 /* XXX Probably does not do what is intended. */
400 if (print->lineno != list->line)
401 output_line_command (pfile, print, list->line);
402
403 for (i = 0; i < list->tokens_used; i++)
404 {
405 if (TOK_TYPE (list, i) == CPP_VSPACE)
406 {
407 output_line_command (pfile, print, list->tokens[i].aux);
408 continue;
409 }
410
411 if (curcol < TOK_COL (list, i))
412 {
413 /* Insert space to bring the column to what it should be. */
414 bump_column (print, curcol - 1, TOK_COL (list, i));
415 curcol = TOK_COL (list, i);
416 }
417 /* XXX We may have to insert space to prevent an accidental
418 token paste. */
419 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
420 curcol += TOK_LEN (list, i);
421 }
422 }
423
424 /* Scan a string (which may have escape marks), perform macro expansion,
425 and write the result to the token_buffer. */
426
427 void
428 _cpp_expand_to_buffer (pfile, buf, length)
429 cpp_reader *pfile;
430 const U_CHAR *buf;
431 int length;
432 {
433 cpp_buffer *stop;
434 enum cpp_ttype token;
435 U_CHAR *buf1;
436
437 if (length < 0)
438 {
439 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
440 return;
441 }
442
443 /* Copy the buffer, because it might be in an unsafe place - for
444 example, a sequence on the token_buffer, where the pointers will
445 be invalidated if we enlarge the token_buffer. */
446 buf1 = alloca (length);
447 memcpy (buf1, buf, length);
448
449 /* Set up the input on the input stack. */
450 stop = CPP_BUFFER (pfile);
451 if (cpp_push_buffer (pfile, buf1, length) == NULL)
452 return;
453 CPP_BUFFER (pfile)->has_escapes = 1;
454
455 /* Scan the input, create the output. */
456 for (;;)
457 {
458 token = cpp_get_token (pfile);
459 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
460 break;
461 }
462 }
463
464 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
465
466 void
467 cpp_scan_buffer_nooutput (pfile)
468 cpp_reader *pfile;
469 {
470 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
471 enum cpp_ttype token;
472 unsigned int old_written = CPP_WRITTEN (pfile);
473 /* In no-output mode, we can ignore everything but directives. */
474 for (;;)
475 {
476 if (! pfile->only_seen_white)
477 _cpp_skip_rest_of_line (pfile);
478 token = cpp_get_token (pfile);
479 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
480 break;
481 }
482 CPP_SET_WRITTEN (pfile, old_written);
483 }
484
485 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
486
487 void
488 cpp_scan_buffer (pfile, print)
489 cpp_reader *pfile;
490 cpp_printer *print;
491 {
492 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
493 enum cpp_ttype token;
494
495 for (;;)
496 {
497 token = cpp_get_token (pfile);
498 if (token == CPP_VSPACE || token == CPP_EOF
499 /* XXX Temporary kluge - force flush after #include only */
500 || (token == CPP_DIRECTIVE
501 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
502 {
503 cpp_output_tokens (pfile, print);
504 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
505 return;
506 }
507 }
508 }
509
510 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
511
512 cpp_buffer *
513 cpp_file_buffer (pfile)
514 cpp_reader *pfile;
515 {
516 cpp_buffer *ip;
517
518 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
519 if (ip->ihash != NULL)
520 return ip;
521 return NULL;
522 }
523
524 /* Token-buffer helper functions. */
525
526 /* Expand a token list's string space. It is *vital* that
527 list->tokens_used is correct, to get pointer fix-up right. */
528 static void
529 expand_name_space (list, len)
530 cpp_toklist *list;
531 unsigned int len;
532 {
533 const U_CHAR *old_namebuf;
534
535 old_namebuf = list->namebuf;
536 list->name_cap += len;
537 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
538
539 /* Fix up token text pointers. */
540 if (list->namebuf != old_namebuf)
541 {
542 unsigned int i;
543
544 for (i = 0; i < list->tokens_used; i++)
545 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
546 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
547 }
548 }
549
550 /* Expand the number of tokens in a list. */
551 void
552 _cpp_expand_token_space (list, count)
553 cpp_toklist *list;
554 unsigned int count;
555 {
556 unsigned int n;
557
558 list->tokens_cap += count;
559 n = list->tokens_cap;
560 if (list->flags & LIST_OFFSET)
561 list->tokens--, n++;
562 list->tokens = (cpp_token *)
563 xrealloc (list->tokens, n * sizeof (cpp_token));
564 if (list->flags & LIST_OFFSET)
565 list->tokens++; /* Skip the dummy. */
566 }
567
568 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
569 an extra token in front of the token list, as this allows the lexer
570 to always peek at the previous token without worrying about
571 underflowing the list, and some initial space. Otherwise, no
572 token- or name-space is allocated, and there is no dummy token. */
573 void
574 _cpp_init_toklist (list, flags)
575 cpp_toklist *list;
576 int flags;
577 {
578 /* We malloc zero bytes because we may want to realloc later, and
579 some old implementations don't like realloc-ing a null pointer. */
580 if (flags == NO_DUMMY_TOKEN)
581 {
582 list->tokens_cap = 0;
583 list->tokens = (cpp_token *) malloc (0);
584 list->name_cap = 0;
585 list->flags = 0;
586 }
587 else
588 {
589 /* Initialize token space. Put a dummy token before the start
590 that will fail matches. */
591 list->tokens_cap = 256; /* 4K's worth. */
592 list->tokens = (cpp_token *)
593 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
594 list->tokens[0].type = CPP_EOF;
595 list->tokens++;
596
597 /* Initialize name space. */
598 list->name_cap = 1024;
599 list->flags = LIST_OFFSET;
600 }
601
602 /* Allocate name space. */
603 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
604
605 _cpp_clear_toklist (list);
606 }
607
608 /* Clear a token list. */
609 void
610 _cpp_clear_toklist (list)
611 cpp_toklist *list;
612 {
613 list->tokens_used = 0;
614 list->name_used = 0;
615 list->dirno = -1;
616 list->flags &= LIST_OFFSET; /* clear all but that one */
617 }
618
619 /* Free a token list. Does not free the list itself, which may be
620 embedded in a larger structure. */
621 void
622 _cpp_free_toklist (list)
623 cpp_toklist *list;
624 {
625 if (list->flags & LIST_OFFSET)
626 free (list->tokens - 1); /* Backup over dummy token. */
627 else
628 free (list->tokens);
629 free (list->namebuf);
630 }
631
632 /* Slice a token list: copy the sublist [START, FINISH) into COPY.
633 COPY is assumed not to be initialized. The comment space is not
634 copied. */
635 void
636 _cpp_slice_toklist (copy, start, finish)
637 cpp_toklist *copy;
638 const cpp_token *start, *finish;
639 {
640 unsigned int i, n;
641 size_t bytes;
642
643 n = finish - start;
644 copy->tokens_cap = n;
645 copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
646 memcpy (copy->tokens, start, n * sizeof (cpp_token));
647
648 bytes = 0;
649 for (i = 0; i < n; i++)
650 if (token_spellings[start[i].type].type > SPELL_NONE)
651 bytes += start[i].val.name.len;
652
653 copy->namebuf = xmalloc (bytes);
654 bytes = 0;
655 for (i = 0; i < n; i++)
656 if (token_spellings[start[i].type].type > SPELL_NONE)
657 {
658 memcpy (copy->namebuf + bytes,
659 start[i].val.name.text, start[i].val.name.len);
660 copy->tokens[i].val.name.text = copy->namebuf + bytes;
661 bytes += start[i].val.name.len;
662 }
663
664 copy->tokens_cap = n;
665 copy->tokens_used = n;
666 copy->name_used = bytes;
667 copy->name_cap = bytes;
668
669 copy->flags = 0;
670 copy->dirno = -1;
671 }
672
673 /* Shrink a token list down to the minimum size. */
674 void
675 _cpp_squeeze_toklist (list)
676 cpp_toklist *list;
677 {
678 long delta;
679 const U_CHAR *old_namebuf;
680
681 if (list->flags & LIST_OFFSET)
682 {
683 list->tokens--;
684 memmove (list->tokens, list->tokens + 1,
685 list->tokens_used * sizeof (cpp_token));
686 list->tokens = xrealloc (list->tokens,
687 list->tokens_used * sizeof (cpp_token));
688 list->flags &= ~LIST_OFFSET;
689 }
690 else
691 list->tokens = xrealloc (list->tokens,
692 list->tokens_used * sizeof (cpp_token));
693 list->tokens_cap = list->tokens_used;
694
695 old_namebuf = list->namebuf;
696 list->namebuf = xrealloc (list->namebuf, list->name_used);
697 list->name_cap = list->name_used;
698
699 /* Fix up token text pointers. */
700 delta = list->namebuf - old_namebuf;
701 if (delta)
702 {
703 unsigned int i;
704
705 for (i = 0; i < list->tokens_used; i++)
706 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
707 list->tokens[i].val.name.text += delta;
708 }
709 }
710
711 /* Compare two tokens. */
712 int
713 _cpp_equiv_tokens (a, b)
714 const cpp_token *a, *b;
715 {
716 if (a->type != b->type
717 || a->flags != b->flags
718 || a->aux != b->aux)
719 return 0;
720
721 if (token_spellings[a->type].type > SPELL_NONE)
722 {
723 if (a->val.name.len != b->val.name.len
724 || ustrncmp(a->val.name.text,
725 b->val.name.text,
726 a->val.name.len))
727 return 0;
728 }
729 return 1;
730 }
731
732 /* Compare two token lists. */
733 int
734 _cpp_equiv_toklists (a, b)
735 const cpp_toklist *a, *b;
736 {
737 unsigned int i;
738
739 if (a->tokens_used != b->tokens_used)
740 return 0;
741
742 for (i = 0; i < a->tokens_used; i++)
743 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
744 return 0;
745 return 1;
746 }
747
748 /* Scan until we encounter a token of type STOP or a newline, and
749 create a token list for it. Does not macro-expand or execute
750 directives. The final token is not included in the list or
751 consumed from the input. Returns the type of the token stopped at. */
752
753 enum cpp_ttype
754 _cpp_scan_until (pfile, list, stop)
755 cpp_reader *pfile;
756 cpp_toklist *list;
757 enum cpp_ttype stop;
758 {
759 int i, col;
760 long written, len;
761 enum cpp_ttype type;
762 int space_before;
763
764 _cpp_clear_toklist (list);
765 list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
766
767 written = CPP_WRITTEN (pfile);
768 i = 0;
769 space_before = 0;
770 for (;;)
771 {
772 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
773 type = _cpp_lex_token (pfile);
774 len = CPP_WRITTEN (pfile) - written;
775 CPP_SET_WRITTEN (pfile, written);
776 if (type == CPP_HSPACE)
777 {
778 if (CPP_PEDANTIC (pfile))
779 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
780 space_before = 1;
781 continue;
782 }
783 else if (type == CPP_COMMENT)
784 /* Only happens when processing -traditional macro definitions.
785 Do not give this a token entry, but do not change space_before
786 either. */
787 continue;
788
789 if (list->tokens_used >= list->tokens_cap)
790 _cpp_expand_token_space (list, 256);
791 if (list->name_used + len >= list->name_cap)
792 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
793
794 if (type == CPP_MACRO)
795 type = CPP_NAME;
796
797 if (type == CPP_VSPACE || type == stop)
798 break;
799
800 list->tokens_used++;
801 TOK_TYPE (list, i) = type;
802 TOK_COL (list, i) = col;
803 TOK_AUX (list, i) = 0;
804 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
805
806 TOK_LEN (list, i) = len;
807 if (token_spellings[type].type > SPELL_NONE)
808 {
809 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
810 TOK_NAME (list, i) = list->namebuf + list->name_used;
811 list->name_used += len;
812 }
813 else
814 TOK_NAME (list, i) = token_spellings[type].spelling;
815 i++;
816 space_before = 0;
817 }
818
819 /* XXX Temporary kluge: put back the newline (or whatever). */
820 FORWARD(-1);
821
822 /* Don't consider the first token to have white before. */
823 TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
824 return type;
825 }
826
827 /* Skip a C-style block comment. We know it's a comment, and point is
828 at the second character of the starter. */
829 static void
830 skip_block_comment (pfile)
831 cpp_reader *pfile;
832 {
833 unsigned int line, col;
834 const U_CHAR *limit, *cur;
835
836 FORWARD(1);
837 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
838 col = CPP_BUF_COL (CPP_BUFFER (pfile));
839 limit = CPP_BUFFER (pfile)->rlimit;
840 cur = CPP_BUFFER (pfile)->cur;
841
842 while (cur < limit)
843 {
844 char c = *cur++;
845 if (c == '\n' || c == '\r')
846 {
847 /* \r cannot be a macro escape marker here. */
848 if (!ACTIVE_MARK_P (pfile))
849 CPP_BUMP_LINE_CUR (pfile, cur);
850 }
851 else if (c == '*')
852 {
853 /* Check for teminator. */
854 if (cur < limit && *cur == '/')
855 goto out;
856
857 /* Warn about comment starter embedded in comment. */
858 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
859 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
860 cur - CPP_BUFFER (pfile)->line_base,
861 "'/*' within comment");
862 }
863 }
864
865 cpp_error_with_line (pfile, line, col, "unterminated comment");
866 cur--;
867 out:
868 CPP_BUFFER (pfile)->cur = cur + 1;
869 }
870
871 /* Skip a C++/Chill line comment. We know it's a comment, and point
872 is at the second character of the initiator. */
873 static void
874 skip_line_comment (pfile)
875 cpp_reader *pfile;
876 {
877 FORWARD(1);
878 for (;;)
879 {
880 int c = GETC ();
881
882 /* We don't have to worry about EOF in here. */
883 if (c == '\n')
884 {
885 /* Don't consider final '\n' to be part of comment. */
886 FORWARD(-1);
887 return;
888 }
889 else if (c == '\r')
890 {
891 /* \r cannot be a macro escape marker here. */
892 if (!ACTIVE_MARK_P (pfile))
893 CPP_BUMP_LINE (pfile);
894 if (CPP_OPTION (pfile, warn_comments))
895 cpp_warning (pfile, "backslash-newline within line comment");
896 }
897 }
898 }
899
900 /* Skip a comment - C, C++, or Chill style. M is the first character
901 of the comment marker. If this really is a comment, skip to its
902 end and return ' '. If this is not a comment, return M (which will
903 be '/' or '-'). */
904
905 static int
906 skip_comment (pfile, m)
907 cpp_reader *pfile;
908 int m;
909 {
910 if (m == '/' && PEEKC() == '*')
911 {
912 skip_block_comment (pfile);
913 return ' ';
914 }
915 else if (m == '/' && PEEKC() == '/')
916 {
917 if (CPP_BUFFER (pfile)->system_header_p)
918 {
919 /* We silently allow C++ comments in system headers, irrespective
920 of conformance mode, because lots of busted systems do that
921 and trying to clean it up in fixincludes is a nightmare. */
922 skip_line_comment (pfile);
923 return ' ';
924 }
925 else if (CPP_OPTION (pfile, cplusplus_comments))
926 {
927 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
928 {
929 if (CPP_WTRADITIONAL (pfile))
930 cpp_pedwarn (pfile,
931 "C++ style comments are not allowed in traditional C");
932 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
933 cpp_pedwarn (pfile,
934 "C++ style comments are not allowed in ISO C89");
935 if (CPP_WTRADITIONAL (pfile)
936 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
937 cpp_pedwarn (pfile,
938 "(this will be reported only once per input file)");
939 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
940 }
941 skip_line_comment (pfile);
942 return ' ';
943 }
944 else
945 return m;
946 }
947 else if (m == '-' && PEEKC() == '-'
948 && CPP_OPTION (pfile, chill))
949 {
950 skip_line_comment (pfile);
951 return ' ';
952 }
953 else
954 return m;
955 }
956
957 /* Identical to skip_comment except that it copies the comment into the
958 token_buffer. This is used if !discard_comments. */
959 static int
960 copy_comment (pfile, m)
961 cpp_reader *pfile;
962 int m;
963 {
964 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
965 const U_CHAR *limit;
966
967 if (skip_comment (pfile, m) == m)
968 return m;
969
970 limit = CPP_BUFFER (pfile)->cur;
971 CPP_RESERVE (pfile, limit - start + 2);
972 CPP_PUTC_Q (pfile, m);
973 for (; start <= limit; start++)
974 if (*start != '\r')
975 CPP_PUTC_Q (pfile, *start);
976
977 return ' ';
978 }
979
980 static void
981 null_warning (pfile, count)
982 cpp_reader *pfile;
983 unsigned int count;
984 {
985 if (count == 1)
986 cpp_warning (pfile, "embedded null character ignored");
987 else
988 cpp_warning (pfile, "embedded null characters ignored");
989 }
990
991 /* Skip whitespace \-newline and comments. Does not macro-expand. */
992
993 void
994 _cpp_skip_hspace (pfile)
995 cpp_reader *pfile;
996 {
997 unsigned int null_count = 0;
998 int c;
999
1000 while (1)
1001 {
1002 c = GETC();
1003 if (c == EOF)
1004 goto out;
1005 else if (is_hspace(c))
1006 {
1007 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
1008 cpp_pedwarn (pfile, "%s in preprocessing directive",
1009 c == '\f' ? "formfeed" : "vertical tab");
1010 else if (c == '\0')
1011 null_count++;
1012 }
1013 else if (c == '\r')
1014 {
1015 /* \r is a backslash-newline marker if !has_escapes, and
1016 a deletable-whitespace or no-reexpansion marker otherwise. */
1017 if (CPP_BUFFER (pfile)->has_escapes)
1018 {
1019 if (PEEKC() == ' ')
1020 FORWARD(1);
1021 else
1022 break;
1023 }
1024 else
1025 CPP_BUMP_LINE (pfile);
1026 }
1027 else if (c == '/' || c == '-')
1028 {
1029 c = skip_comment (pfile, c);
1030 if (c != ' ')
1031 break;
1032 }
1033 else
1034 break;
1035 }
1036 FORWARD(-1);
1037 out:
1038 if (null_count)
1039 null_warning (pfile, null_count);
1040 }
1041
1042 /* Read and discard the rest of the current line. */
1043
1044 void
1045 _cpp_skip_rest_of_line (pfile)
1046 cpp_reader *pfile;
1047 {
1048 for (;;)
1049 {
1050 int c = GETC();
1051 switch (c)
1052 {
1053 case '\n':
1054 FORWARD(-1);
1055 case EOF:
1056 return;
1057
1058 case '\r':
1059 if (! CPP_BUFFER (pfile)->has_escapes)
1060 CPP_BUMP_LINE (pfile);
1061 break;
1062
1063 case '\'':
1064 case '\"':
1065 skip_string (pfile, c);
1066 break;
1067
1068 case '/':
1069 case '-':
1070 skip_comment (pfile, c);
1071 break;
1072
1073 case '\f':
1074 case '\v':
1075 if (CPP_PEDANTIC (pfile))
1076 cpp_pedwarn (pfile, "%s in preprocessing directive",
1077 c == '\f' ? "formfeed" : "vertical tab");
1078 break;
1079
1080 }
1081 }
1082 }
1083
1084 /* Parse an identifier starting with C. */
1085
1086 void
1087 _cpp_parse_name (pfile, c)
1088 cpp_reader *pfile;
1089 int c;
1090 {
1091 for (;;)
1092 {
1093 if (! is_idchar(c))
1094 {
1095 FORWARD (-1);
1096 break;
1097 }
1098
1099 if (c == '$' && CPP_PEDANTIC (pfile))
1100 cpp_pedwarn (pfile, "`$' in identifier");
1101
1102 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
1103 CPP_PUTC_Q (pfile, c);
1104 c = GETC();
1105 if (c == EOF)
1106 break;
1107 }
1108 return;
1109 }
1110
1111 /* Parse and skip over a string starting with C. A single quoted
1112 string is treated like a double -- some programs (e.g., troff) are
1113 perverse this way. (However, a single quoted string is not allowed
1114 to extend over multiple lines.) */
1115 static void
1116 skip_string (pfile, c)
1117 cpp_reader *pfile;
1118 int c;
1119 {
1120 unsigned int start_line, start_column;
1121 unsigned int null_count = 0;
1122
1123 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1124 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
1125 while (1)
1126 {
1127 int cc = GETC();
1128 switch (cc)
1129 {
1130 case EOF:
1131 cpp_error_with_line (pfile, start_line, start_column,
1132 "unterminated string or character constant");
1133 if (pfile->multiline_string_line != start_line
1134 && pfile->multiline_string_line != 0)
1135 cpp_error_with_line (pfile,
1136 pfile->multiline_string_line, -1,
1137 "possible real start of unterminated constant");
1138 pfile->multiline_string_line = 0;
1139 goto out;
1140
1141 case '\0':
1142 null_count++;
1143 break;
1144
1145 case '\n':
1146 CPP_BUMP_LINE (pfile);
1147 /* In Fortran and assembly language, silently terminate
1148 strings of either variety at end of line. This is a
1149 kludge around not knowing where comments are in these
1150 languages. */
1151 if (CPP_OPTION (pfile, lang_fortran)
1152 || CPP_OPTION (pfile, lang_asm))
1153 {
1154 FORWARD(-1);
1155 goto out;
1156 }
1157 /* Character constants may not extend over multiple lines.
1158 In Standard C, neither may strings. We accept multiline
1159 strings as an extension. */
1160 if (c == '\'')
1161 {
1162 cpp_error_with_line (pfile, start_line, start_column,
1163 "unterminated character constant");
1164 FORWARD(-1);
1165 goto out;
1166 }
1167 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1168 cpp_pedwarn_with_line (pfile, start_line, start_column,
1169 "string constant runs past end of line");
1170 if (pfile->multiline_string_line == 0)
1171 pfile->multiline_string_line = start_line;
1172 break;
1173
1174 case '\r':
1175 if (CPP_BUFFER (pfile)->has_escapes)
1176 {
1177 cpp_ice (pfile, "\\r escape inside string constant");
1178 FORWARD(1);
1179 }
1180 else
1181 /* Backslash newline is replaced by nothing at all. */
1182 CPP_BUMP_LINE (pfile);
1183 break;
1184
1185 case '\\':
1186 FORWARD(1);
1187 break;
1188
1189 case '\"':
1190 case '\'':
1191 if (cc == c)
1192 goto out;
1193 break;
1194 }
1195 }
1196
1197 out:
1198 if (null_count == 1)
1199 cpp_warning (pfile, "null character in string or character constant");
1200 else if (null_count > 1)
1201 cpp_warning (pfile, "null characters in string or character constant");
1202 }
1203
1204 /* Parse a string and copy it to the output. */
1205
1206 static void
1207 parse_string (pfile, c)
1208 cpp_reader *pfile;
1209 int c;
1210 {
1211 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1212 const U_CHAR *limit;
1213
1214 skip_string (pfile, c);
1215
1216 limit = CPP_BUFFER (pfile)->cur;
1217 CPP_RESERVE (pfile, limit - start + 2);
1218 CPP_PUTC_Q (pfile, c);
1219 for (; start < limit; start++)
1220 if (*start != '\r')
1221 CPP_PUTC_Q (pfile, *start);
1222 }
1223
1224 /* Get the next token, and add it to the text in pfile->token_buffer.
1225 Return the kind of token we got. */
1226
1227 enum cpp_ttype
1228 _cpp_lex_token (pfile)
1229 cpp_reader *pfile;
1230 {
1231 register int c, c2;
1232 enum cpp_ttype token;
1233
1234 if (CPP_BUFFER (pfile) == NULL)
1235 return CPP_EOF;
1236
1237 get_next:
1238 c = GETC();
1239 switch (c)
1240 {
1241 case EOF:
1242 return CPP_EOF;
1243
1244 case '/':
1245 if (PEEKC () == '=')
1246 goto op2;
1247
1248 comment:
1249 if (CPP_OPTION (pfile, discard_comments))
1250 c = skip_comment (pfile, c);
1251 else
1252 c = copy_comment (pfile, c);
1253 if (c != ' ')
1254 goto randomchar;
1255
1256 /* Comments are equivalent to spaces.
1257 For -traditional, a comment is equivalent to nothing. */
1258 if (!CPP_OPTION (pfile, discard_comments))
1259 return CPP_COMMENT;
1260 else if (CPP_TRADITIONAL (pfile))
1261 goto get_next;
1262 else
1263 {
1264 CPP_PUTC (pfile, c);
1265 return CPP_HSPACE;
1266 }
1267
1268 case '#':
1269 CPP_PUTC (pfile, c);
1270
1271 hash:
1272 c2 = PEEKC ();
1273 if (c2 == '#')
1274 {
1275 FORWARD (1);
1276 CPP_PUTC (pfile, c2);
1277 return CPP_PASTE;
1278 }
1279 else if (c2 == '%' && PEEKN (1) == ':')
1280 {
1281 /* Digraph: "%:" == "#". */
1282 FORWARD (1);
1283 CPP_RESERVE (pfile, 2);
1284 CPP_PUTC_Q (pfile, c2);
1285 CPP_PUTC_Q (pfile, GETC ());
1286 return CPP_PASTE;
1287 }
1288 else
1289 return CPP_HASH;
1290
1291 case '\"':
1292 case '\'':
1293 parse_string (pfile, c);
1294 return c == '\'' ? CPP_CHAR : CPP_STRING;
1295
1296 case '$':
1297 if (!CPP_OPTION (pfile, dollars_in_ident))
1298 goto randomchar;
1299 goto letter;
1300
1301 case ':':
1302 c2 = PEEKC ();
1303 /* Digraph: ":>" == "]". */
1304 if (c2 == '>'
1305 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1306 goto op2;
1307 goto randomchar;
1308
1309 case '&':
1310 case '+':
1311 case '|':
1312 c2 = PEEKC ();
1313 if (c2 == c || c2 == '=')
1314 goto op2;
1315 goto randomchar;
1316
1317 case '%':
1318 /* Digraphs: "%:" == "#", "%>" == "}". */
1319 c2 = PEEKC ();
1320 if (c2 == ':')
1321 {
1322 FORWARD (1);
1323 CPP_RESERVE (pfile, 2);
1324 CPP_PUTC_Q (pfile, c);
1325 CPP_PUTC_Q (pfile, c2);
1326 goto hash;
1327 }
1328 else if (c2 == '>')
1329 {
1330 FORWARD (1);
1331 CPP_RESERVE (pfile, 2);
1332 CPP_PUTC_Q (pfile, c);
1333 CPP_PUTC_Q (pfile, c2);
1334 return CPP_OPEN_BRACE;
1335 }
1336 /* else fall through */
1337
1338 case '*':
1339 case '!':
1340 case '=':
1341 case '^':
1342 if (PEEKC () == '=')
1343 goto op2;
1344 goto randomchar;
1345
1346 case '-':
1347 c2 = PEEKC ();
1348 if (c2 == '-')
1349 {
1350 if (CPP_OPTION (pfile, chill))
1351 goto comment; /* Chill style comment */
1352 else
1353 goto op2;
1354 }
1355 else if (c2 == '=')
1356 goto op2;
1357 else if (c2 == '>')
1358 {
1359 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1360 {
1361 /* In C++, there's a ->* operator. */
1362 token = CPP_OTHER;
1363 CPP_RESERVE (pfile, 4);
1364 CPP_PUTC_Q (pfile, c);
1365 CPP_PUTC_Q (pfile, GETC ());
1366 CPP_PUTC_Q (pfile, GETC ());
1367 return token;
1368 }
1369 goto op2;
1370 }
1371 goto randomchar;
1372
1373 case '<':
1374 if (pfile->parsing_include_directive)
1375 {
1376 for (;;)
1377 {
1378 CPP_PUTC (pfile, c);
1379 if (c == '>')
1380 break;
1381 c = GETC ();
1382 if (c == '\n' || c == EOF)
1383 {
1384 cpp_error (pfile,
1385 "missing '>' in `#include <FILENAME>'");
1386 break;
1387 }
1388 else if (c == '\r')
1389 {
1390 if (!CPP_BUFFER (pfile)->has_escapes)
1391 {
1392 /* Backslash newline is replaced by nothing. */
1393 CPP_ADJUST_WRITTEN (pfile, -1);
1394 CPP_BUMP_LINE (pfile);
1395 }
1396 else
1397 {
1398 /* We might conceivably get \r- or \r<space> in
1399 here. Just delete 'em. */
1400 int d = GETC();
1401 if (d != '-' && d != ' ')
1402 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1403 CPP_ADJUST_WRITTEN (pfile, -1);
1404 }
1405 }
1406 }
1407 return CPP_STRING;
1408 }
1409 /* Digraphs: "<%" == "{", "<:" == "[". */
1410 c2 = PEEKC ();
1411 if (c2 == '%')
1412 {
1413 FORWARD (1);
1414 CPP_RESERVE (pfile, 2);
1415 CPP_PUTC_Q (pfile, c);
1416 CPP_PUTC_Q (pfile, c2);
1417 return CPP_CLOSE_BRACE;
1418 }
1419 else if (c2 == ':')
1420 goto op2;
1421 /* else fall through */
1422 case '>':
1423 c2 = PEEKC ();
1424 if (c2 == '=')
1425 goto op2;
1426 /* GNU C++ supports MIN and MAX operators <? and >?. */
1427 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1428 goto randomchar;
1429 FORWARD(1);
1430 CPP_RESERVE (pfile, 3);
1431 CPP_PUTC_Q (pfile, c);
1432 CPP_PUTC_Q (pfile, c2);
1433 if (PEEKC () == '=')
1434 CPP_PUTC_Q (pfile, GETC ());
1435 return CPP_OTHER;
1436
1437 case '.':
1438 c2 = PEEKC ();
1439 if (ISDIGIT (c2))
1440 {
1441 CPP_PUTC (pfile, c);
1442 c = GETC ();
1443 goto number;
1444 }
1445
1446 /* In C++ there's a .* operator. */
1447 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1448 goto op2;
1449
1450 if (c2 == '.' && PEEKN(1) == '.')
1451 {
1452 CPP_RESERVE (pfile, 3);
1453 CPP_PUTC_Q (pfile, '.');
1454 CPP_PUTC_Q (pfile, '.');
1455 CPP_PUTC_Q (pfile, '.');
1456 FORWARD (2);
1457 return CPP_ELLIPSIS;
1458 }
1459 goto randomchar;
1460
1461 op2:
1462 CPP_RESERVE (pfile, 2);
1463 CPP_PUTC_Q (pfile, c);
1464 CPP_PUTC_Q (pfile, GETC ());
1465 return CPP_OTHER;
1466
1467 case 'L':
1468 c2 = PEEKC ();
1469 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1470 {
1471 CPP_PUTC (pfile, c);
1472 c = GETC ();
1473 parse_string (pfile, c);
1474 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1475 }
1476 goto letter;
1477
1478 case '0': case '1': case '2': case '3': case '4':
1479 case '5': case '6': case '7': case '8': case '9':
1480 number:
1481 c2 = '.';
1482 for (;;)
1483 {
1484 CPP_RESERVE (pfile, 2);
1485 CPP_PUTC_Q (pfile, c);
1486 c = PEEKC ();
1487 if (c == EOF)
1488 break;
1489 if (!is_numchar(c) && c != '.'
1490 && ((c2 != 'e' && c2 != 'E'
1491 && ((c2 != 'p' && c2 != 'P')
1492 || CPP_OPTION (pfile, c89)))
1493 || (c != '+' && c != '-')))
1494 break;
1495 FORWARD(1);
1496 c2= c;
1497 }
1498 return CPP_NUMBER;
1499 case 'b': case 'c': case 'd': case 'h': case 'o':
1500 case 'B': case 'C': case 'D': case 'H': case 'O':
1501 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1502 {
1503 CPP_RESERVE (pfile, 2);
1504 CPP_PUTC_Q (pfile, c);
1505 CPP_PUTC_Q (pfile, '\'');
1506 FORWARD(1);
1507 for (;;)
1508 {
1509 c = GETC();
1510 if (c == EOF)
1511 goto chill_number_eof;
1512 if (!is_numchar(c))
1513 break;
1514 CPP_PUTC (pfile, c);
1515 }
1516 if (c == '\'')
1517 {
1518 CPP_RESERVE (pfile, 2);
1519 CPP_PUTC_Q (pfile, c);
1520 return CPP_STRING;
1521 }
1522 else
1523 {
1524 FORWARD(-1);
1525 chill_number_eof:
1526 return CPP_NUMBER;
1527 }
1528 }
1529 else
1530 goto letter;
1531 case '_':
1532 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1533 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1534 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1535 case 'x': case 'y': case 'z':
1536 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1537 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1538 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1539 case 'Y': case 'Z':
1540 letter:
1541 _cpp_parse_name (pfile, c);
1542 return CPP_MACRO;
1543
1544 case ' ': case '\t': case '\v': case '\f': case '\0':
1545 {
1546 int null_count = 0;
1547
1548 for (;;)
1549 {
1550 if (c == '\0')
1551 null_count++;
1552 else
1553 CPP_PUTC (pfile, c);
1554 c = PEEKC ();
1555 if (c == EOF || !is_hspace(c))
1556 break;
1557 FORWARD(1);
1558 }
1559 if (null_count)
1560 null_warning (pfile, null_count);
1561 return CPP_HSPACE;
1562 }
1563
1564 case '\r':
1565 if (CPP_BUFFER (pfile)->has_escapes)
1566 {
1567 c = GETC ();
1568 if (c == '-')
1569 {
1570 if (pfile->output_escapes)
1571 CPP_PUTS (pfile, "\r-", 2);
1572 _cpp_parse_name (pfile, GETC ());
1573 return CPP_NAME;
1574 }
1575 else if (c == ' ')
1576 {
1577 /* "\r " means a space, but only if necessary to prevent
1578 accidental token concatenation. */
1579 CPP_RESERVE (pfile, 2);
1580 if (pfile->output_escapes)
1581 CPP_PUTC_Q (pfile, '\r');
1582 CPP_PUTC_Q (pfile, c);
1583 return CPP_HSPACE;
1584 }
1585 else
1586 {
1587 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1588 goto get_next;
1589 }
1590 }
1591 else
1592 {
1593 /* Backslash newline is ignored. */
1594 if (!ACTIVE_MARK_P (pfile))
1595 CPP_BUMP_LINE (pfile);
1596 goto get_next;
1597 }
1598
1599 case '\n':
1600 CPP_PUTC (pfile, c);
1601 return CPP_VSPACE;
1602
1603 case '(': token = CPP_OPEN_PAREN; goto char1;
1604 case ')': token = CPP_CLOSE_PAREN; goto char1;
1605 case '{': token = CPP_OPEN_BRACE; goto char1;
1606 case '}': token = CPP_CLOSE_BRACE; goto char1;
1607 case ',': token = CPP_COMMA; goto char1;
1608 case ';': token = CPP_SEMICOLON; goto char1;
1609
1610 randomchar:
1611 default:
1612 token = CPP_OTHER;
1613 char1:
1614 CPP_PUTC (pfile, c);
1615 return token;
1616 }
1617 }
1618
1619 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1620 Caller is expected to have checked no_macro_expand. */
1621 static int
1622 maybe_macroexpand (pfile, written)
1623 cpp_reader *pfile;
1624 long written;
1625 {
1626 U_CHAR *macro = pfile->token_buffer + written;
1627 size_t len = CPP_WRITTEN (pfile) - written;
1628 cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
1629
1630 /* cpp_lookup never returns null. */
1631 if (hp->type == T_VOID)
1632 return 0;
1633 if (hp->disabled || hp->type == T_IDENTITY)
1634 {
1635 if (pfile->output_escapes)
1636 {
1637 /* Insert a no-reexpand marker before IDENT. */
1638 CPP_RESERVE (pfile, 2);
1639 CPP_ADJUST_WRITTEN (pfile, 2);
1640 macro = pfile->token_buffer + written;
1641
1642 memmove (macro + 2, macro, len);
1643 macro[0] = '\r';
1644 macro[1] = '-';
1645 }
1646 return 0;
1647 }
1648 if (hp->type == T_EMPTY)
1649 {
1650 /* Special case optimization: macro expands to nothing. */
1651 CPP_SET_WRITTEN (pfile, written);
1652 CPP_PUTC_Q (pfile, ' ');
1653 return 1;
1654 }
1655
1656 /* If macro wants an arglist, verify that a '(' follows. */
1657 if (hp->type == T_FMACRO)
1658 {
1659 int macbuf_whitespace = 0;
1660 int c;
1661
1662 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1663 {
1664 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1665 for (;;)
1666 {
1667 _cpp_skip_hspace (pfile);
1668 c = PEEKC ();
1669 if (c == '\n')
1670 FORWARD(1);
1671 else
1672 break;
1673 }
1674 if (point != CPP_BUFFER (pfile)->cur)
1675 macbuf_whitespace = 1;
1676 if (c == '(')
1677 goto is_macro_call;
1678 else if (c != EOF)
1679 goto not_macro_call;
1680 cpp_pop_buffer (pfile);
1681 }
1682
1683 CPP_SET_MARK (pfile);
1684 for (;;)
1685 {
1686 _cpp_skip_hspace (pfile);
1687 c = PEEKC ();
1688 if (c == '\n')
1689 FORWARD(1);
1690 else
1691 break;
1692 }
1693 CPP_GOTO_MARK (pfile);
1694
1695 if (c != '(')
1696 {
1697 not_macro_call:
1698 if (macbuf_whitespace)
1699 CPP_PUTC (pfile, ' ');
1700
1701 /* K+R treated this as a hard error. */
1702 if (CPP_WTRADITIONAL (pfile))
1703 cpp_warning (pfile,
1704 "function macro %s must be used with arguments in traditional C",
1705 hp->name);
1706 return 0;
1707 }
1708 }
1709
1710 is_macro_call:
1711 /* This is now known to be a macro call.
1712 Expand the macro, reading arguments as needed,
1713 and push the expansion on the input stack. */
1714 _cpp_macroexpand (pfile, hp);
1715 CPP_SET_WRITTEN (pfile, written);
1716 return 1;
1717 }
1718
1719 /* Complain about \v or \f in a preprocessing directive (constraint
1720 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1721 static void
1722 pedantic_whitespace (pfile, p, len)
1723 cpp_reader *pfile;
1724 U_CHAR *p;
1725 unsigned int len;
1726 {
1727 while (len)
1728 {
1729 if (*p == '\v')
1730 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1731 else if (*p == '\f')
1732 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1733 p++;
1734 len--;
1735 }
1736 }
1737
1738
1739 enum cpp_ttype
1740 cpp_get_token (pfile)
1741 cpp_reader *pfile;
1742 {
1743 enum cpp_ttype token;
1744 long written = CPP_WRITTEN (pfile);
1745 int macro_buffer;
1746
1747 get_next:
1748 token = _cpp_lex_token (pfile);
1749
1750 switch (token)
1751 {
1752 default:
1753 if (pfile->skipping)
1754 break;
1755 pfile->potential_control_macro = 0;
1756 pfile->only_seen_white = 0;
1757 break;
1758
1759 case CPP_HSPACE:
1760 case CPP_COMMENT:
1761 break;
1762
1763 case CPP_VSPACE:
1764 if (pfile->only_seen_white == 0)
1765 pfile->only_seen_white = 1;
1766 CPP_BUMP_LINE (pfile);
1767 break;
1768
1769 case CPP_HASH:
1770 pfile->potential_control_macro = 0;
1771 if (!pfile->only_seen_white)
1772 break;
1773 /* XXX shouldn't have to do this - remove the hash or %: from
1774 the token buffer. */
1775 if (CPP_PWRITTEN (pfile)[-1] == '#')
1776 CPP_ADJUST_WRITTEN (pfile, -1);
1777 else
1778 CPP_ADJUST_WRITTEN (pfile, -2);
1779
1780 if (_cpp_handle_directive (pfile))
1781 {
1782 token = CPP_DIRECTIVE;
1783 break;
1784 }
1785 pfile->only_seen_white = 0;
1786 CPP_PUTC (pfile, '#');
1787 break;
1788
1789 case CPP_MACRO:
1790 if (pfile->skipping)
1791 break;
1792 pfile->potential_control_macro = 0;
1793 pfile->only_seen_white = 0;
1794 if (! pfile->no_macro_expand
1795 && maybe_macroexpand (pfile, written))
1796 goto get_next;
1797 token = CPP_NAME;
1798 break;
1799
1800 /* Do not run this case through the 'skipping' logic. */
1801 case CPP_EOF:
1802 if (CPP_BUFFER (pfile) == NULL)
1803 return CPP_EOF;
1804 macro_buffer = CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile));
1805
1806 cpp_pop_buffer (pfile);
1807 if (macro_buffer)
1808 goto get_next;
1809 return CPP_EOF;
1810 }
1811
1812 if (pfile->skipping)
1813 {
1814 CPP_SET_WRITTEN (pfile, written);
1815 goto get_next;
1816 }
1817 return token;
1818 }
1819
1820 /* Like cpp_get_token, but skip spaces and comments. */
1821
1822 enum cpp_ttype
1823 cpp_get_non_space_token (pfile)
1824 cpp_reader *pfile;
1825 {
1826 int old_written = CPP_WRITTEN (pfile);
1827 for (;;)
1828 {
1829 enum cpp_ttype token = cpp_get_token (pfile);
1830 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1831 return token;
1832 CPP_SET_WRITTEN (pfile, old_written);
1833 }
1834 }
1835
1836 /* Like cpp_get_token, except that it does not execute directives,
1837 does not consume vertical space, and discards horizontal space. */
1838 enum cpp_ttype
1839 _cpp_get_directive_token (pfile)
1840 cpp_reader *pfile;
1841 {
1842 long old_written;
1843 enum cpp_ttype token;
1844 int at_bol;
1845
1846 get_next:
1847 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1848 old_written = CPP_WRITTEN (pfile);
1849 token = _cpp_lex_token (pfile);
1850 switch (token)
1851 {
1852 default:
1853 return token;
1854
1855 case CPP_VSPACE:
1856 /* Put it back and return VSPACE. */
1857 FORWARD(-1);
1858 CPP_ADJUST_WRITTEN (pfile, -1);
1859 return CPP_VSPACE;
1860
1861 case CPP_HSPACE:
1862 /* The purpose of this rather strange check is to prevent pedantic
1863 warnings for ^L in an #ifdefed out block. */
1864 if (CPP_PEDANTIC (pfile) && ! at_bol)
1865 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1866 CPP_WRITTEN (pfile) - old_written);
1867 CPP_SET_WRITTEN (pfile, old_written);
1868 goto get_next;
1869 return CPP_HSPACE;
1870
1871 case CPP_MACRO:
1872 if (! pfile->no_macro_expand
1873 && maybe_macroexpand (pfile, old_written))
1874 goto get_next;
1875 return CPP_NAME;
1876
1877 case CPP_EOF:
1878 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1879 {
1880 cpp_pop_buffer (pfile);
1881 goto get_next;
1882 }
1883 else
1884 /* This can happen for files that don't end with a newline,
1885 and for cpp_define and friends. Pretend they do, so
1886 callers don't have to deal. A warning will be issued by
1887 someone else, if necessary. */
1888 return CPP_VSPACE;
1889 }
1890 }
1891
1892 /* Determine the current line and column. Used only by read_and_prescan. */
1893 static U_CHAR *
1894 find_position (start, limit, linep)
1895 U_CHAR *start;
1896 U_CHAR *limit;
1897 unsigned long *linep;
1898 {
1899 unsigned long line = *linep;
1900 U_CHAR *lbase = start;
1901 while (start < limit)
1902 {
1903 U_CHAR ch = *start++;
1904 if (ch == '\n' || ch == '\r')
1905 {
1906 line++;
1907 lbase = start;
1908 }
1909 }
1910 *linep = line;
1911 return lbase;
1912 }
1913
1914 /* The following table is used by _cpp_prescan. If we have
1915 designated initializers, it can be constant data; otherwise, it is
1916 set up at runtime by _cpp_init_input_buffer. */
1917
1918 #if (GCC_VERSION >= 2007)
1919 #define init_chartab() /* nothing */
1920 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1921 #define END };
1922 #define s(p, v) [p] = v,
1923 #else
1924 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1925 static void init_chartab PARAMS ((void)) { \
1926 unsigned char *x = chartab;
1927 #define END }
1928 #define s(p, v) x[p] = v;
1929 #endif
1930
1931 /* Table of characters that can't be handled in the inner loop.
1932 Also contains the mapping between trigraph third characters and their
1933 replacements. */
1934 #define SPECCASE_CR 1
1935 #define SPECCASE_BACKSLASH 2
1936 #define SPECCASE_QUESTION 3
1937
1938 CHARTAB
1939 s('\r', SPECCASE_CR)
1940 s('\\', SPECCASE_BACKSLASH)
1941 s('?', SPECCASE_QUESTION)
1942
1943 s('=', '#') s(')', ']') s('!', '|')
1944 s('(', '[') s('\'', '^') s('>', '}')
1945 s('/', '\\') s('<', '{') s('-', '~')
1946 END
1947
1948 #undef CHARTAB
1949 #undef END
1950 #undef s
1951
1952 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1953 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1954
1955 /* Prescan pass over a file already loaded into BUF. This is
1956 translation phases 1 and 2 (C99 5.1.1.2).
1957
1958 Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1959 canonical form (\n). If enabled, convert and/or warn about
1960 trigraphs. Convert backslash-newline to a one-character escape
1961 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1962 token). If there is no newline at the end of the file, add one and
1963 warn. Returns -1 on failure, or the actual length of the data to
1964 be scanned.
1965
1966 This function does a lot of work, and can be a serious performance
1967 bottleneck. It has been tuned heavily; make sure you understand it
1968 before hacking. The common case - no trigraphs, Unix style line
1969 breaks, backslash-newline set off by whitespace, newline at EOF -
1970 has been optimized at the expense of the others. The performance
1971 penalty for DOS style line breaks (\r\n) is about 15%.
1972
1973 Warnings lose particularly heavily since we have to determine the
1974 line number, which involves scanning from the beginning of the file
1975 or from the last warning. The penalty for the absence of a newline
1976 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1977
1978 If your file has more than one kind of end-of-line marker, you
1979 will get messed-up line numbering. */
1980
1981 ssize_t
1982 _cpp_prescan (pfile, fp, len)
1983 cpp_reader *pfile;
1984 cpp_buffer *fp;
1985 ssize_t len;
1986 {
1987 U_CHAR *buf, *op;
1988 const U_CHAR *ibase, *ip, *ilimit;
1989 U_CHAR *line_base;
1990 unsigned long line;
1991 unsigned int deferred_newlines;
1992
1993 /* Allocate an extra byte in case we must add a trailing \n. */
1994 buf = (U_CHAR *) xmalloc (len + 1);
1995 line_base = op = buf;
1996 ip = ibase = fp->buf;
1997 ilimit = ibase + len;
1998 line = 1;
1999 deferred_newlines = 0;
2000
2001 for (;;)
2002 {
2003 const U_CHAR *iq;
2004
2005 /* Deal with \-newline, potentially in the middle of a token. */
2006 if (deferred_newlines)
2007 {
2008 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
2009 {
2010 /* Previous was not white space. Skip to white
2011 space, if we can, before outputting the \r's */
2012 iq = ip;
2013 while (iq < ilimit
2014 && *iq != ' '
2015 && *iq != '\t'
2016 && *iq != '\n'
2017 && NORMAL(*iq))
2018 iq++;
2019 memcpy (op, ip, iq - ip);
2020 op += iq - ip;
2021 ip += iq - ip;
2022 if (! NORMAL(*ip))
2023 goto do_speccase;
2024 }
2025 while (deferred_newlines)
2026 deferred_newlines--, *op++ = '\r';
2027 }
2028
2029 /* Copy as much as we can without special treatment. */
2030 iq = ip;
2031 while (iq < ilimit && NORMAL (*iq)) iq++;
2032 memcpy (op, ip, iq - ip);
2033 op += iq - ip;
2034 ip += iq - ip;
2035
2036 do_speccase:
2037 if (ip >= ilimit)
2038 break;
2039
2040 switch (chartab[*ip++])
2041 {
2042 case SPECCASE_CR: /* \r */
2043 if (ip[-2] != '\n')
2044 {
2045 if (ip < ilimit && *ip == '\n')
2046 ip++;
2047 *op++ = '\n';
2048 }
2049 break;
2050
2051 case SPECCASE_BACKSLASH: /* \ */
2052 backslash:
2053 if (ip < ilimit)
2054 {
2055 if (*ip == '\n')
2056 {
2057 deferred_newlines++;
2058 ip++;
2059 if (*ip == '\r') ip++;
2060 break;
2061 }
2062 else if (*ip == '\r')
2063 {
2064 deferred_newlines++;
2065 ip++;
2066 if (*ip == '\n') ip++;
2067 break;
2068 }
2069 }
2070
2071 *op++ = '\\';
2072 break;
2073
2074 case SPECCASE_QUESTION: /* ? */
2075 {
2076 unsigned int d, t;
2077
2078 *op++ = '?'; /* Normal non-trigraph case */
2079 if (ip > ilimit - 2 || ip[0] != '?')
2080 break;
2081
2082 d = ip[1];
2083 t = chartab[d];
2084 if (NONTRI (t))
2085 break;
2086
2087 if (CPP_OPTION (pfile, warn_trigraphs))
2088 {
2089 unsigned long col;
2090 line_base = find_position (line_base, op, &line);
2091 col = op - line_base + 1;
2092 if (CPP_OPTION (pfile, trigraphs))
2093 cpp_warning_with_line (pfile, line, col,
2094 "trigraph ??%c converted to %c", d, t);
2095 else
2096 cpp_warning_with_line (pfile, line, col,
2097 "trigraph ??%c ignored", d);
2098 }
2099
2100 ip += 2;
2101 if (CPP_OPTION (pfile, trigraphs))
2102 {
2103 op[-1] = t; /* Overwrite '?' */
2104 if (t == '\\')
2105 {
2106 op--;
2107 goto backslash;
2108 }
2109 }
2110 else
2111 {
2112 *op++ = '?';
2113 *op++ = d;
2114 }
2115 }
2116 break;
2117 }
2118 }
2119
2120 #ifdef HAVE_MMAP_FILE
2121 if (fp->mapped)
2122 munmap ((caddr_t) fp->buf, len);
2123 else
2124 #endif
2125 free ((PTR) fp->buf);
2126
2127 if (op[-1] != '\n')
2128 {
2129 unsigned long col;
2130 line_base = find_position (line_base, op, &line);
2131 col = op - line_base + 1;
2132 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2133 *op++ = '\n';
2134 }
2135
2136 fp->buf = buf;
2137 return op - buf;
2138 }
2139
2140 /* Allocate pfile->input_buffer, and initialize chartab[]
2141 if it hasn't happened already. */
2142
2143 void
2144 _cpp_init_input_buffer (pfile)
2145 cpp_reader *pfile;
2146 {
2147 U_CHAR *tmp;
2148
2149 init_chartab ();
2150 _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
2151
2152 /* Determine the appropriate size for the input buffer. Normal C
2153 source files are smaller than eight K. */
2154 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2155 address arithmetic all the time, and 3 for pushback during buffer
2156 refill, in case there's a potential trigraph or end-of-line
2157 digraph at the end of a block. */
2158
2159 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2160 pfile->input_buffer = tmp;
2161 pfile->input_buffer_len = 8192;
2162 }
2163
2164 /* Utility routine:
2165 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2166 and extending for LEN characters to the NUL-terminated string
2167 STRING. Typical usage:
2168
2169 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2170 "inline"))
2171 { ... }
2172 */
2173
2174 int
2175 cpp_idcmp (token, len, string)
2176 const U_CHAR *token;
2177 size_t len;
2178 const char *string;
2179 {
2180 size_t len2 = strlen (string);
2181 int r;
2182
2183 if ((r = memcmp (token, string, MIN (len, len2))))
2184 return r;
2185
2186 /* The longer of the two strings sorts after the shorter. */
2187 if (len == len2)
2188 return 0;
2189 else if (len < len2)
2190 return -1;
2191 else
2192 return 1;
2193 }
2194
2195 #ifdef NEW_LEXER
2196
2197 /* Lexing algorithm.
2198
2199 The original lexer in cpplib was made up of two passes: a first pass
2200 that replaced trigraphs and deleted esacped newlines, and a second
2201 pass that tokenized the result of the first pass. Tokenisation was
2202 performed by peeking at the next character in the input stream. For
2203 example, if the input stream contained "!=", the handler for the !
2204 character would peek at the next character, and if it were a '='
2205 would skip over it, and return a "!=" token, otherwise it would
2206 return just the "!" token.
2207
2208 To implement a single-pass lexer, this peeking ahead is unworkable.
2209 An arbitrary number of escaped newlines, and trigraphs (in particular
2210 ??/ which translates to the escape \), could separate the '!' and '='
2211 in the input stream, yet the next token is still a "!=".
2212
2213 Suppose instead that we lex by one logical line at a time, producing
2214 a token list or stack for each logical line, and when seeing the '!'
2215 push a CPP_NOT token on the list. Then if the '!' is part of a
2216 longer token ("!=") we know we must see the remainder of the token by
2217 the time we reach the end of the logical line. Thus we can have the
2218 '=' handler look at the previous token (at the end of the list / top
2219 of the stack) and see if it is a "!" token, and if so, instead of
2220 pushing a "=" token revise the existing token to be a "!=" token.
2221
2222 This works in the presence of escaped newlines, because the '\' would
2223 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2224 newline ('\n' or '\r') handler looks at the token at the top of the
2225 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2226 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2227 the '=' handler would never see any intervening escaped newlines.
2228
2229 To make trigraphs work in this context, as in precedence trigraphs
2230 are highest and converted before anything else, the '?' handler does
2231 lookahead to see if it is a trigraph, and if so skips the trigraph
2232 and pushes the token it represents onto the top of the stack. This
2233 also works in the particular case of a CPP_BACKSLASH trigraph.
2234
2235 To the preprocessor, whitespace is only significant to the point of
2236 knowing whether whitespace precedes a particular token. For example,
2237 the '=' handler needs to know whether there was whitespace between it
2238 and a "!" token on the top of the stack, to make the token conversion
2239 decision correctly. So each token has a PREV_WHITESPACE flag to
2240 indicate this - the standard permits consecutive whitespace to be
2241 regarded as a single space. The compiler front ends are not
2242 interested in whitespace at all; they just require a token stream.
2243 Another place where whitespace is significant to the preprocessor is
2244 a #define statment - if there is whitespace between the macro name
2245 and an initial "(" token the macro is "object-like", otherwise it is
2246 a function-like macro that takes arguments.
2247
2248 However, all is not rosy. Parsing of identifiers, numbers, comments
2249 and strings becomes trickier because of the possibility of raw
2250 trigraphs and escaped newlines in the input stream.
2251
2252 The trigraphs are three consecutive characters beginning with two
2253 question marks. A question mark is not valid as part of a number or
2254 identifier, so parsing of a number or identifier terminates normally
2255 upon reaching it, returning to the mainloop which handles the
2256 trigraph just like it would in any other position. Similarly for the
2257 backslash of a backslash-newline combination. So we just need the
2258 escaped-newline dropper in the mainloop to check if the token on the
2259 top of the stack after dropping the escaped newline is a number or
2260 identifier, and if so to continue the processing it as if nothing had
2261 happened.
2262
2263 For strings, we replace trigraphs whenever we reach a quote or
2264 newline, because there might be a backslash trigraph escaping them.
2265 We need to be careful that we start trigraph replacing from where we
2266 left off previously, because it is possible for a first scan to leave
2267 "fake" trigraphs that a second scan would pick up as real (e.g. the
2268 sequence "????/\n=" would find a fake ??= trigraph after removing the
2269 escaped newline.)
2270
2271 For line comments, on reaching a newline we scan the previous
2272 character(s) to see if it escaped, and continue if it is. Block
2273 comments ignore everything and just focus on finding the comment
2274 termination mark. The only difficult thing, and it is surprisingly
2275 tricky, is checking if an asterisk precedes the final slash since
2276 they could be separated by escaped newlines. If the preprocessor is
2277 invoked with the output comments option, we don't bother removing
2278 escaped newlines and replacing trigraphs for output.
2279
2280 Finally, numbers can begin with a period, which is pushed initially
2281 as a CPP_DOT token in its own right. The digit handler checks if the
2282 previous token was a CPP_DOT not separated by whitespace, and if so
2283 pops it off the stack and pushes a period into the number's buffer
2284 before calling the number parser.
2285
2286 */
2287
2288 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2289 U":>", U"<%", U"%>"};
2290 static unsigned char trigraph_map[256];
2291
2292 void
2293 init_trigraph_map ()
2294 {
2295 trigraph_map['='] = '#';
2296 trigraph_map['('] = '[';
2297 trigraph_map[')'] = ']';
2298 trigraph_map['/'] = '\\';
2299 trigraph_map['\''] = '^';
2300 trigraph_map['<'] = '{';
2301 trigraph_map['>'] = '}';
2302 trigraph_map['!'] = '|';
2303 trigraph_map['-'] = '~';
2304 }
2305
2306 /* Call when a trigraph is encountered. It warns if necessary, and
2307 returns true if the trigraph should be honoured. END is the third
2308 character of a trigraph in the input stream. */
2309 static int
2310 trigraph_ok (pfile, end)
2311 cpp_reader *pfile;
2312 const unsigned char *end;
2313 {
2314 int accept = CPP_OPTION (pfile, trigraphs);
2315
2316 if (CPP_OPTION (pfile, warn_trigraphs))
2317 {
2318 unsigned int col = end - 1 - pfile->buffer->line_base;
2319 if (accept)
2320 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2321 "trigraph ??%c converted to %c",
2322 (int) *end, (int) trigraph_map[*end]);
2323 else
2324 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2325 "trigraph ??%c ignored", (int) *end);
2326 }
2327 return accept;
2328 }
2329
2330 /* Scan a string for trigraphs, warning or replacing them inline as
2331 appropriate. When parsing a string, we must call this routine
2332 before processing a newline character (if trigraphs are enabled),
2333 since the newline might be escaped by a preceding backslash
2334 trigraph sequence. Returns a pointer to the end of the name after
2335 replacement. */
2336
2337 static unsigned char*
2338 trigraph_replace (pfile, src, limit)
2339 cpp_reader *pfile;
2340 unsigned char *src;
2341 unsigned char* limit;
2342 {
2343 unsigned char *dest;
2344
2345 /* Starting with src[1], find two consecutive '?'. The case of no
2346 trigraphs is streamlined. */
2347
2348 for (; src + 1 < limit; src += 2)
2349 {
2350 if (src[0] != '?')
2351 continue;
2352
2353 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2354 if (src[-1] == '?')
2355 src--;
2356 else if (src + 2 == limit || src[1] != '?')
2357 continue;
2358
2359 /* Check if it really is a trigraph. */
2360 if (trigraph_map[src[2]] == 0)
2361 continue;
2362
2363 dest = src;
2364 goto trigraph_found;
2365 }
2366 return limit;
2367
2368 /* Now we have a trigraph, we need to scan the remaining buffer, and
2369 copy-shifting its contents left if replacement is enabled. */
2370 for (; src + 2 < limit; dest++, src++)
2371 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2372 {
2373 trigraph_found:
2374 src += 2;
2375 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2376 *dest = trigraph_map[*src];
2377 }
2378
2379 /* Copy remaining (at most 2) characters. */
2380 while (src < limit)
2381 *dest++ = *src++;
2382 return dest;
2383 }
2384
2385 /* If CUR is a backslash or the end of a trigraphed backslash, return
2386 a pointer to its beginning, otherwise NULL. We don't read beyond
2387 the buffer start, because there is the start of the comment in the
2388 buffer. */
2389 static const unsigned char *
2390 backslash_start (pfile, cur)
2391 cpp_reader *pfile;
2392 const unsigned char *cur;
2393 {
2394 if (cur[0] == '\\')
2395 return cur;
2396 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2397 && trigraph_ok (pfile, cur))
2398 return cur - 2;
2399 return 0;
2400 }
2401
2402 /* Skip a C-style block comment. This is probably the trickiest
2403 handler. We find the end of the comment by seeing if an asterisk
2404 is before every '/' we encounter. The nasty complication is that a
2405 previous asterisk may be separated by one or more escaped newlines.
2406 Returns non-zero if comment terminated by EOF, zero otherwise. */
2407 static int
2408 skip_block_comment2 (pfile)
2409 cpp_reader *pfile;
2410 {
2411 cpp_buffer *buffer = pfile->buffer;
2412 const unsigned char *char_after_star = 0;
2413 register const unsigned char *cur = buffer->cur;
2414 int seen_eof = 0;
2415
2416 /* Inner loop would think the comment has ended if the first comment
2417 character is a '/'. Avoid this and keep the inner loop clean by
2418 skipping such a character. */
2419 if (cur < buffer->rlimit && cur[0] == '/')
2420 cur++;
2421
2422 for (; cur < buffer->rlimit; )
2423 {
2424 unsigned char c = *cur++;
2425
2426 /* People like decorating comments with '*', so check for
2427 '/' instead for efficiency. */
2428 if (c == '/')
2429 {
2430 if (cur[-2] == '*' || cur - 1 == char_after_star)
2431 goto out;
2432
2433 /* Warn about potential nested comments, but not when
2434 the final character inside the comment is a '/'.
2435 Don't bother to get it right across escaped newlines. */
2436 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2437 && cur[0] == '*' && cur[1] != '/')
2438 {
2439 buffer->cur = cur;
2440 cpp_warning (pfile, "'/*' within comment");
2441 }
2442 }
2443 else if (IS_NEWLINE(c))
2444 {
2445 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2446
2447 handle_newline (cur, buffer->rlimit, c);
2448 /* Work correctly if there is an asterisk before an
2449 arbirtrarily long sequence of escaped newlines. */
2450 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2451 char_after_star = cur;
2452 else
2453 char_after_star = 0;
2454 }
2455 }
2456 seen_eof = 1;
2457
2458 out:
2459 buffer->cur = cur;
2460 return seen_eof;
2461 }
2462
2463 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2464 Returns non-zero if a multiline comment. */
2465 static int
2466 skip_line_comment2 (pfile)
2467 cpp_reader *pfile;
2468 {
2469 cpp_buffer *buffer = pfile->buffer;
2470 register const unsigned char *cur = buffer->cur;
2471 int multiline = 0;
2472
2473 for (; cur < buffer->rlimit; )
2474 {
2475 unsigned char c = *cur++;
2476
2477 if (IS_NEWLINE (c))
2478 {
2479 /* Check for a (trigaph?) backslash escaping the newline. */
2480 if (!backslash_start (pfile, cur - 2))
2481 goto out;
2482 multiline = 1;
2483 handle_newline (cur, buffer->rlimit, c);
2484 }
2485 }
2486 cur++;
2487
2488 out:
2489 buffer->cur = cur - 1; /* Leave newline for caller. */
2490 return multiline;
2491 }
2492
2493 /* Skips whitespace, stopping at next non-whitespace character.
2494 Adjusts pfile->col_adjust to account for tabs. This enables tokens
2495 to be assigned the correct column. */
2496 static void
2497 skip_whitespace (pfile, in_directive)
2498 cpp_reader *pfile;
2499 int in_directive;
2500 {
2501 cpp_buffer *buffer = pfile->buffer;
2502 register const unsigned char *cur = buffer->cur;
2503 unsigned short null_count = 0;
2504
2505 for (; cur < buffer->rlimit; )
2506 {
2507 unsigned char c = *cur++;
2508
2509 if (c == '\t')
2510 {
2511 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2512 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2513 - col % CPP_OPTION(pfile, tabstop));
2514 }
2515 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2516 continue;
2517 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2518 goto out;
2519 if (c == '\0')
2520 null_count++;
2521 /* Mut be '\f' or '\v' */
2522 else if (in_directive && CPP_PEDANTIC (pfile))
2523 cpp_pedwarn (pfile, "%s in preprocessing directive",
2524 c == '\f' ? "formfeed" : "vertical tab");
2525 }
2526 cur++;
2527
2528 out:
2529 buffer->cur = cur - 1;
2530 if (null_count)
2531 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2532 : "embedded null character ignored");
2533 }
2534
2535 /* Parse (append) an identifier. */
2536 static void
2537 parse_name (pfile, list, name)
2538 cpp_reader *pfile;
2539 cpp_toklist *list;
2540 cpp_name *name;
2541 {
2542 const unsigned char *name_limit;
2543 unsigned char *namebuf;
2544 cpp_buffer *buffer = pfile->buffer;
2545 register const unsigned char *cur = buffer->cur;
2546
2547 expanded:
2548 name_limit = list->namebuf + list->name_cap;
2549 namebuf = list->namebuf + list->name_used;
2550
2551 for (; cur < buffer->rlimit && namebuf < name_limit; )
2552 {
2553 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2554
2555 if (! is_idchar(c))
2556 goto out;
2557 namebuf++;
2558 cur++;
2559 if (c == '$' && CPP_PEDANTIC (pfile))
2560 {
2561 buffer->cur = cur;
2562 cpp_pedwarn (pfile, "'$' character in identifier");
2563 }
2564 }
2565
2566 /* Run out of name space? */
2567 if (cur < buffer->rlimit)
2568 {
2569 list->name_used = namebuf - list->namebuf;
2570 auto_expand_name_space (list);
2571 goto expanded;
2572 }
2573
2574 out:
2575 buffer->cur = cur;
2576 name->len = namebuf - name->text;
2577 list->name_used = namebuf - list->namebuf;
2578 }
2579
2580 /* Parse (append) a number. */
2581
2582 #define VALID_SIGN(c, prevc) \
2583 (((c) == '+' || (c) == '-') && \
2584 ((prevc) == 'e' || (prevc) == 'E' \
2585 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2586
2587 static void
2588 parse_number (pfile, list, name)
2589 cpp_reader *pfile;
2590 cpp_toklist *list;
2591 cpp_name *name;
2592 {
2593 const unsigned char *name_limit;
2594 unsigned char *namebuf;
2595 cpp_buffer *buffer = pfile->buffer;
2596 register const unsigned char *cur = buffer->cur;
2597
2598 expanded:
2599 name_limit = list->namebuf + list->name_cap;
2600 namebuf = list->namebuf + list->name_used;
2601
2602 for (; cur < buffer->rlimit && namebuf < name_limit; )
2603 {
2604 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2605
2606 /* Perhaps we should accept '$' here if we accept it for
2607 identifiers. We know namebuf[-1] is safe, because for c to
2608 be a sign we must have pushed at least one character. */
2609 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2610 goto out;
2611
2612 namebuf++;
2613 cur++;
2614 }
2615
2616 /* Run out of name space? */
2617 if (cur < buffer->rlimit)
2618 {
2619 list->name_used = namebuf - list->namebuf;
2620 auto_expand_name_space (list);
2621 goto expanded;
2622 }
2623
2624 out:
2625 buffer->cur = cur;
2626 name->len = namebuf - name->text;
2627 list->name_used = namebuf - list->namebuf;
2628 }
2629
2630 /* Places a string terminated by an unescaped TERMINATOR into a
2631 cpp_name, which should be expandable and thus at the top of the
2632 list's stack. Handles embedded trigraphs, if necessary, and
2633 escaped newlines.
2634
2635 Can be used for character constants (terminator = '\''), string
2636 constants ('"') and angled headers ('>'). Multi-line strings are
2637 allowed, except for within directives. */
2638
2639 static void
2640 parse_string2 (pfile, list, name, terminator, multiline_ok)
2641 cpp_reader *pfile;
2642 cpp_toklist *list;
2643 cpp_name *name;
2644 unsigned int terminator;
2645 int multiline_ok;
2646 {
2647 cpp_buffer *buffer = pfile->buffer;
2648 register const unsigned char *cur = buffer->cur;
2649 const unsigned char *name_limit;
2650 unsigned char *namebuf;
2651 unsigned int null_count = 0;
2652 int trigraphed_len = 0;
2653
2654 expanded:
2655 name_limit = list->namebuf + list->name_cap;
2656 namebuf = list->namebuf + list->name_used;
2657
2658 for (; cur < buffer->rlimit && namebuf < name_limit; )
2659 {
2660 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2661
2662 if (c == '\0')
2663 null_count++;
2664 else if (c == terminator || IS_NEWLINE (c))
2665 {
2666 /* Needed for trigraph_replace and multiline string warning. */
2667 buffer->cur = cur;
2668
2669 /* Scan for trigraphs before checking if backslash-escaped. */
2670 if (CPP_OPTION (pfile, trigraphs)
2671 || CPP_OPTION (pfile, warn_trigraphs))
2672 {
2673 namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2674 namebuf);
2675 trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2676 if (trigraphed_len < 0)
2677 trigraphed_len = 0;
2678 }
2679
2680 namebuf--; /* Drop the newline / terminator from the name. */
2681 if (IS_NEWLINE (c))
2682 {
2683 /* Drop a backslash newline, and continue. */
2684 if (namebuf[-1] == '\\')
2685 {
2686 handle_newline (cur, buffer->rlimit, c);
2687 namebuf--;
2688 continue;
2689 }
2690
2691 cur--;
2692
2693 /* In Fortran and assembly language, silently terminate
2694 strings of either variety at end of line. This is a
2695 kludge around not knowing where comments are in these
2696 languages. */
2697 if (CPP_OPTION (pfile, lang_fortran)
2698 || CPP_OPTION (pfile, lang_asm))
2699 goto out;
2700
2701 /* Character constants, headers and asserts may not
2702 extend over multiple lines. In Standard C, neither
2703 may strings. We accept multiline strings as an
2704 extension, but not in directives. */
2705 if (!multiline_ok)
2706 goto unterminated;
2707
2708 cur++; /* Move forwards again. */
2709
2710 if (pfile->multiline_string_line == 0)
2711 {
2712 pfile->multiline_string_line = list->line;
2713 if (CPP_PEDANTIC (pfile))
2714 cpp_pedwarn (pfile, "multi-line string constant");
2715 }
2716
2717 *namebuf++ = '\n';
2718 handle_newline (cur, buffer->rlimit, c);
2719 }
2720 else
2721 {
2722 unsigned char *temp;
2723
2724 /* An odd number of consecutive backslashes represents
2725 an escaped terminator. */
2726 temp = namebuf - 1;
2727 while (temp >= name->text && *temp == '\\')
2728 temp--;
2729
2730 if ((namebuf - temp) & 1)
2731 goto out;
2732 namebuf++;
2733 }
2734 }
2735 }
2736
2737 /* Run out of name space? */
2738 if (cur < buffer->rlimit)
2739 {
2740 list->name_used = namebuf - list->namebuf;
2741 auto_expand_name_space (list);
2742 goto expanded;
2743 }
2744
2745 /* We may not have trigraph-replaced the input for this code path,
2746 but as the input is in error by being unterminated we don't
2747 bother. Prevent warnings about no newlines at EOF. */
2748 if (IS_NEWLINE(cur[-1]))
2749 cur--;
2750
2751 unterminated:
2752 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2753
2754 if (terminator == '\"' && pfile->multiline_string_line != list->line
2755 && pfile->multiline_string_line != 0)
2756 {
2757 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2758 "possible start of unterminated string literal");
2759 pfile->multiline_string_line = 0;
2760 }
2761
2762 out:
2763 buffer->cur = cur;
2764 name->len = namebuf - name->text;
2765 list->name_used = namebuf - list->namebuf;
2766
2767 if (null_count > 0)
2768 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2769 : "null character preserved"));
2770 }
2771
2772 /* The character TYPE helps us distinguish comment types: '*' = C
2773 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2774 the stored comment includes the comment start and any terminator. */
2775
2776 #define COMMENT_START_LEN 2
2777 static void
2778 save_comment (list, token, from, len, type)
2779 cpp_toklist *list;
2780 cpp_token *token;
2781 const unsigned char *from;
2782 unsigned int len;
2783 unsigned int type;
2784 {
2785 unsigned char *buffer;
2786
2787 len += COMMENT_START_LEN;
2788
2789 if (list->name_used + len > list->name_cap)
2790 expand_name_space (list, len);
2791
2792 INIT_TOKEN_NAME (list, token);
2793 token->type = CPP_COMMENT;
2794 token->val.name.len = len;
2795
2796 buffer = list->namebuf + list->name_used;
2797 list->name_used += len;
2798
2799 /* Copy the comment. */
2800 if (type == '*')
2801 {
2802 *buffer++ = '/';
2803 *buffer++ = '*';
2804 }
2805 else
2806 {
2807 *buffer++ = type;
2808 *buffer++ = type;
2809 }
2810 memcpy (buffer, from, len - COMMENT_START_LEN);
2811 }
2812
2813 /*
2814 * The tokenizer's main loop. Returns a token list, representing a
2815 * logical line in the input file. On EOF after some tokens have
2816 * been processed, we return immediately. Then in next call, or if
2817 * EOF occurred at the beginning of a logical line, a single CPP_EOF
2818 * token is placed in the list.
2819 *
2820 * Implementation relies almost entirely on lookback, rather than
2821 * looking forwards. This means that tokenization requires just
2822 * a single pass of the file, even in the presence of trigraphs and
2823 * escaped newlines, providing significant performance benefits.
2824 * Trigraph overhead is negligible if they are disabled, and low
2825 * even when enabled.
2826 */
2827
2828 #define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
2829
2830 void
2831 _cpp_lex_line (pfile, list)
2832 cpp_reader *pfile;
2833 cpp_toklist *list;
2834 {
2835 cpp_token *cur_token, *token_limit;
2836 cpp_buffer *buffer = pfile->buffer;
2837 register const unsigned char *cur = buffer->cur;
2838 unsigned char flags = 0;
2839 unsigned int first_token = list->tokens_used;
2840
2841 list->line = CPP_BUF_LINE (buffer);
2842 pfile->col_adjust = 0;
2843 expanded:
2844 token_limit = list->tokens + list->tokens_cap;
2845 cur_token = list->tokens + list->tokens_used;
2846
2847 for (; cur < buffer->rlimit && cur_token < token_limit;)
2848 {
2849 unsigned char c = *cur++;
2850
2851 /* Optimize whitespace skipping, as most tokens are probably
2852 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
2853
2854 if (is_hspace ((unsigned int) c))
2855 {
2856 /* Step back to get the null warning and tab correction. */
2857 buffer->cur = cur - 1;
2858 skip_whitespace (pfile, IS_DIRECTIVE ());
2859 cur = buffer->cur;
2860
2861 flags = PREV_WHITESPACE;
2862 if (cur == buffer->rlimit)
2863 break;
2864 c = *cur++;
2865 }
2866
2867 /* Initialize current token. Its type is set in the switch. */
2868 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
2869 cur_token->flags = flags;
2870 flags = 0;
2871
2872 switch (c)
2873 {
2874 case '0': case '1': case '2': case '3': case '4':
2875 case '5': case '6': case '7': case '8': case '9':
2876 {
2877 int prev_dot;
2878
2879 cur--; /* Backup character. */
2880 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
2881 if (prev_dot)
2882 cur_token--;
2883 INIT_TOKEN_NAME (list, cur_token);
2884 /* Prepend an immediately previous CPP_DOT token. */
2885 if (prev_dot)
2886 {
2887 if (list->name_cap == list->name_used)
2888 auto_expand_name_space (list);
2889
2890 cur_token->val.name.len = 1;
2891 list->namebuf[list->name_used++] = '.';
2892 }
2893
2894 continue_number:
2895 cur_token->type = CPP_NUMBER; /* Before parse_number. */
2896 buffer->cur = cur;
2897 parse_number (pfile, list, &cur_token->val.name);
2898 cur = buffer->cur;
2899 cur_token++;
2900 }
2901 break;
2902
2903 letter:
2904 case '_':
2905 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2906 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2907 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2908 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2909 case 'y': case 'z':
2910 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2911 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2912 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2913 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2914 case 'Y': case 'Z':
2915 cur--; /* Backup character. */
2916 INIT_TOKEN_NAME (list, cur_token);
2917 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2918
2919 continue_name:
2920 buffer->cur = cur;
2921 parse_name (pfile, list, &cur_token->val.name);
2922 cur = buffer->cur;
2923
2924 /* Find handler for newly created / extended directive. */
2925 if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
2926 _cpp_check_directive (list, cur_token);
2927 cur_token++;
2928 break;
2929
2930 case '\'':
2931 /* Fall through. */
2932 case '\"':
2933 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2934 /* Do we have a wide string? */
2935 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2936 && cur_token[-1].val.name.len == 1
2937 && cur_token[-1].val.name.text[0] == 'L'
2938 && !CPP_TRADITIONAL (pfile))
2939 {
2940 /* No need for 'L' any more. */
2941 list->name_used--;
2942 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2943 }
2944
2945 do_parse_string:
2946 /* Here c is one of ' " or >. */
2947 INIT_TOKEN_NAME (list, cur_token);
2948 buffer->cur = cur;
2949 parse_string2 (pfile, list, &cur_token->val.name, c,
2950 c == '"' && !IS_DIRECTIVE());
2951 cur = buffer->cur;
2952 cur_token++;
2953 break;
2954
2955 case '/':
2956 cur_token->type = CPP_DIV;
2957 if (IMMED_TOKEN ())
2958 {
2959 if (PREV_TOKEN_TYPE == CPP_DIV)
2960 {
2961 /* We silently allow C++ comments in system headers,
2962 irrespective of conformance mode, because lots of
2963 broken systems do that and trying to clean it up
2964 in fixincludes is a nightmare. */
2965 if (buffer->system_header_p)
2966 goto do_line_comment;
2967 else if (CPP_OPTION (pfile, cplusplus_comments))
2968 {
2969 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2970 && ! buffer->warned_cplusplus_comments)
2971 {
2972 buffer->cur = cur;
2973 cpp_pedwarn (pfile,
2974 "C++ style comments are not allowed in ISO C89");
2975 cpp_pedwarn (pfile,
2976 "(this will be reported only once per input file)");
2977 buffer->warned_cplusplus_comments = 1;
2978 }
2979 do_line_comment:
2980 buffer->cur = cur;
2981 if (cur[-2] != c)
2982 cpp_warning (pfile,
2983 "comment start split across lines");
2984 if (skip_line_comment2 (pfile))
2985 cpp_error_with_line (pfile, list->line,
2986 cur_token[-1].col,
2987 "multi-line comment");
2988
2989 /* Back-up to first '-' or '/'. */
2990 cur_token--;
2991 if (!CPP_OPTION (pfile, discard_comments)
2992 && (!IS_DIRECTIVE() || list->dirno == 0))
2993 save_comment (list, cur_token++, cur,
2994 buffer->cur - cur, c);
2995 cur = buffer->cur;
2996
2997 if (!CPP_OPTION (pfile, traditional))
2998 flags = PREV_WHITESPACE;
2999 break;
3000 }
3001 }
3002 }
3003 cur_token++;
3004 break;
3005
3006 case '*':
3007 cur_token->type = CPP_MULT;
3008 if (IMMED_TOKEN ())
3009 {
3010 if (PREV_TOKEN_TYPE == CPP_DIV)
3011 {
3012 buffer->cur = cur;
3013 if (cur[-2] != '/')
3014 cpp_warning (pfile,
3015 "comment start '/*' split across lines");
3016 if (skip_block_comment2 (pfile))
3017 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3018 "unterminated comment");
3019 else if (buffer->cur[-2] != '*')
3020 cpp_warning (pfile,
3021 "comment end '*/' split across lines");
3022
3023 /* Back up to opening '/'. */
3024 cur_token--;
3025 if (!CPP_OPTION (pfile, discard_comments)
3026 && (!IS_DIRECTIVE() || list->dirno == 0))
3027 save_comment (list, cur_token++, cur,
3028 buffer->cur - cur, c);
3029 cur = buffer->cur;
3030
3031 if (!CPP_OPTION (pfile, traditional))
3032 flags = PREV_WHITESPACE;
3033 break;
3034 }
3035 else if (CPP_OPTION (pfile, cplusplus))
3036 {
3037 /* In C++, there are .* and ->* operators. */
3038 if (PREV_TOKEN_TYPE == CPP_DEREF)
3039 BACKUP_TOKEN (CPP_DEREF_STAR);
3040 else if (PREV_TOKEN_TYPE == CPP_DOT)
3041 BACKUP_TOKEN (CPP_DOT_STAR);
3042 }
3043 }
3044 cur_token++;
3045 break;
3046
3047 case '\n':
3048 case '\r':
3049 handle_newline (cur, buffer->rlimit, c);
3050 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3051 {
3052 /* Remove the escaped newline. Then continue to process
3053 any interrupted name or number. */
3054 cur_token--;
3055 if (IMMED_TOKEN ())
3056 {
3057 cur_token--;
3058 if (cur_token->type == CPP_NAME)
3059 goto continue_name;
3060 else if (cur_token->type == CPP_NUMBER)
3061 goto continue_number;
3062 cur_token++;
3063 }
3064 /* Remember whitespace setting. */
3065 flags = cur_token->flags;
3066 break;
3067 }
3068 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3069 {
3070 buffer->cur = cur;
3071 cpp_warning (pfile, "backslash and newline separated by space");
3072 }
3073 /* Skip vertical space until we have at least one token to
3074 return. */
3075 if (cur_token != &list->tokens[first_token])
3076 goto out;
3077 list->line = CPP_BUF_LINE (buffer);
3078 break;
3079
3080 case '-':
3081 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3082 {
3083 if (CPP_OPTION (pfile, chill))
3084 goto do_line_comment;
3085 REVISE_TOKEN (CPP_MINUS_MINUS);
3086 }
3087 else
3088 PUSH_TOKEN (CPP_MINUS);
3089 break;
3090
3091 /* The digraph flag checking ensures that ## and %:%:
3092 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3093 make_hash:
3094 case '#':
3095 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3096 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3097 REVISE_TOKEN (CPP_PASTE);
3098 else
3099 PUSH_TOKEN (CPP_HASH);
3100 break;
3101
3102 case ':':
3103 cur_token->type = CPP_COLON;
3104 if (IMMED_TOKEN ())
3105 {
3106 if (PREV_TOKEN_TYPE == CPP_COLON
3107 && CPP_OPTION (pfile, cplusplus))
3108 BACKUP_TOKEN (CPP_SCOPE);
3109 /* Digraph: "<:" is a '[' */
3110 else if (PREV_TOKEN_TYPE == CPP_LESS)
3111 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3112 /* Digraph: "%:" is a '#' */
3113 else if (PREV_TOKEN_TYPE == CPP_MOD)
3114 {
3115 (--cur_token)->flags |= DIGRAPH;
3116 goto make_hash;
3117 }
3118 }
3119 cur_token++;
3120 break;
3121
3122 case '&':
3123 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3124 REVISE_TOKEN (CPP_AND_AND);
3125 else
3126 PUSH_TOKEN (CPP_AND);
3127 break;
3128
3129 make_or:
3130 case '|':
3131 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3132 REVISE_TOKEN (CPP_OR_OR);
3133 else
3134 PUSH_TOKEN (CPP_OR);
3135 break;
3136
3137 case '+':
3138 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3139 REVISE_TOKEN (CPP_PLUS_PLUS);
3140 else
3141 PUSH_TOKEN (CPP_PLUS);
3142 break;
3143
3144 case '=':
3145 /* This relies on equidistance of "?=" and "?" tokens. */
3146 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3147 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3148 else
3149 PUSH_TOKEN (CPP_EQ);
3150 break;
3151
3152 case '>':
3153 cur_token->type = CPP_GREATER;
3154 if (IMMED_TOKEN ())
3155 {
3156 if (PREV_TOKEN_TYPE == CPP_GREATER)
3157 BACKUP_TOKEN (CPP_RSHIFT);
3158 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3159 BACKUP_TOKEN (CPP_DEREF);
3160 /* Digraph: ":>" is a ']' */
3161 else if (PREV_TOKEN_TYPE == CPP_COLON)
3162 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3163 /* Digraph: "%>" is a '}' */
3164 else if (PREV_TOKEN_TYPE == CPP_MOD)
3165 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3166 }
3167 cur_token++;
3168 break;
3169
3170 case '<':
3171 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3172 {
3173 REVISE_TOKEN (CPP_LSHIFT);
3174 break;
3175 }
3176 /* Is this the beginning of a header name? */
3177 if (list->flags & SYNTAX_INCLUDE)
3178 {
3179 c = '>'; /* Terminator. */
3180 cur_token->type = CPP_HEADER_NAME;
3181 goto do_parse_string;
3182 }
3183 PUSH_TOKEN (CPP_LESS);
3184 break;
3185
3186 case '%':
3187 /* Digraph: "<%" is a '{' */
3188 cur_token->type = CPP_MOD;
3189 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3190 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3191 cur_token++;
3192 break;
3193
3194 case '?':
3195 if (cur + 1 < buffer->rlimit && *cur == '?'
3196 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3197 {
3198 /* Handle trigraph. */
3199 cur++;
3200 switch (*cur++)
3201 {
3202 case '(': goto make_open_square;
3203 case ')': goto make_close_square;
3204 case '<': goto make_open_brace;
3205 case '>': goto make_close_brace;
3206 case '=': goto make_hash;
3207 case '!': goto make_or;
3208 case '-': goto make_complement;
3209 case '/': goto make_backslash;
3210 case '\'': goto make_xor;
3211 }
3212 }
3213 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3214 {
3215 /* GNU C++ defines <? and >? operators. */
3216 if (PREV_TOKEN_TYPE == CPP_LESS)
3217 {
3218 REVISE_TOKEN (CPP_MIN);
3219 break;
3220 }
3221 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3222 {
3223 REVISE_TOKEN (CPP_MAX);
3224 break;
3225 }
3226 }
3227 PUSH_TOKEN (CPP_QUERY);
3228 break;
3229
3230 case '.':
3231 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3232 && IMMED_TOKEN ()
3233 && !(cur_token[-1].flags & PREV_WHITESPACE))
3234 {
3235 cur_token -= 2;
3236 PUSH_TOKEN (CPP_ELLIPSIS);
3237 }
3238 else
3239 PUSH_TOKEN (CPP_DOT);
3240 break;
3241
3242 make_complement:
3243 case '~': PUSH_TOKEN (CPP_COMPL); break;
3244 make_xor:
3245 case '^': PUSH_TOKEN (CPP_XOR); break;
3246 make_open_brace:
3247 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3248 make_close_brace:
3249 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3250 make_open_square:
3251 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3252 make_close_square:
3253 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3254 make_backslash:
3255 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3256 case '!': PUSH_TOKEN (CPP_NOT); break;
3257 case ',': PUSH_TOKEN (CPP_COMMA); break;
3258 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3259 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
3260 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3261
3262 case '$':
3263 if (CPP_OPTION (pfile, dollars_in_ident))
3264 goto letter;
3265 /* Fall through */
3266 default:
3267 cur_token->aux = c;
3268 cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3269 PUSH_TOKEN (CPP_OTHER);
3270 break;
3271 }
3272 }
3273
3274 /* Run out of token space? */
3275 if (cur_token == token_limit)
3276 {
3277 list->tokens_used = cur_token - list->tokens;
3278 _cpp_expand_token_space (list, 256);
3279 goto expanded;
3280 }
3281
3282 cur_token->flags = flags;
3283 if (cur_token == &list->tokens[first_token])
3284 {
3285 /* FIXME: move this warning to callers who care. */
3286 if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
3287 cpp_warning (pfile, "no newline at end of file");
3288 cur_token++->type = CPP_EOF;
3289 }
3290
3291 out:
3292 list->tokens[first_token].flags |= BOL;
3293 buffer->cur = cur;
3294 list->tokens_used = cur_token - list->tokens;
3295 }
3296
3297 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3298 already contain the enough space to hold the token's spelling. If
3299 WHITESPACE is true, and the token was preceded by whitespace,
3300 output a single space before the token proper. Returns a pointer
3301 to the character after the last character written. */
3302
3303 static unsigned char *
3304 spell_token (pfile, token, buffer, whitespace)
3305 cpp_reader *pfile; /* Would be nice to be rid of this... */
3306 const cpp_token *token;
3307 unsigned char *buffer;
3308 int whitespace;
3309 {
3310 /* Whitespace will not be wanted by handlers of the # and ##
3311 operators calling this function, but will be wanted by the
3312 function that writes out the preprocessed file. */
3313 if (whitespace && token->flags & PREV_WHITESPACE)
3314 *buffer++ = ' ';
3315
3316 switch (token_spellings[token->type].type)
3317 {
3318 case SPELL_OPERATOR:
3319 {
3320 const unsigned char *spelling;
3321 unsigned char c;
3322
3323 if (token->flags & DIGRAPH)
3324 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3325 else
3326 spelling = token_spellings[token->type].spelling;
3327
3328 while ((c = *spelling++) != '\0')
3329 *buffer++ = c;
3330 }
3331 break;
3332
3333 case SPELL_IDENT:
3334 memcpy (buffer, token->val.name.text, token->val.name.len);
3335 buffer += token->val.name.len;
3336 break;
3337
3338 case SPELL_STRING:
3339 {
3340 unsigned char c;
3341
3342 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3343 *buffer++ = 'L';
3344 c = '\'';
3345 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3346 c = '"';
3347 *buffer++ = c;
3348 memcpy (buffer, token->val.name.text, token->val.name.len);
3349 buffer += token->val.name.len;
3350 *buffer++ = c;
3351 }
3352 break;
3353
3354 case SPELL_CHAR:
3355 *buffer++ = token->aux;
3356 break;
3357
3358 case SPELL_NONE:
3359 cpp_ice (pfile, "Unspellable token");
3360 break;
3361 }
3362
3363 return buffer;
3364 }
3365
3366 /* Temporary function for illustrative purposes. */
3367 void
3368 _cpp_lex_file (pfile)
3369 cpp_reader* pfile;
3370 {
3371 cpp_toklist* list;
3372
3373 init_trigraph_map ();
3374 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3375 _cpp_init_toklist (list, DUMMY_TOKEN);
3376
3377 for (;;)
3378 {
3379 _cpp_lex_line (pfile, list);
3380 if (list->tokens[0].type == CPP_EOF)
3381 break;
3382
3383 #if 0
3384 if (list->dirno)
3385 _cpp_handle_directive (pfile, list);
3386 else
3387 #endif
3388 _cpp_output_list (pfile, list);
3389 _cpp_clear_toklist (list);
3390 }
3391 }
3392
3393 /* Temporary function for illustrative purposes. */
3394 static void
3395 _cpp_output_list (pfile, list)
3396 cpp_reader *pfile;
3397 cpp_toklist *list;
3398 {
3399 unsigned int i;
3400
3401 for (i = 0; i < list->tokens_used; i++)
3402 {
3403 CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
3404 pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);
3405 }
3406 }
3407
3408 #endif