cpp.texi: Update.
[gcc.git] / gcc / cpplex.c
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
8
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
36
37 #include "config.h"
38 #include "system.h"
39 #include "cpplib.h"
40 #include "cpphash.h"
41 #include "symcat.h"
42
43 /* Tokens with SPELL_STRING store their spelling in the token list,
44 and it's length in the token->val.name.len. */
45 enum spell_type
46 {
47 SPELL_OPERATOR = 0,
48 SPELL_CHAR,
49 SPELL_IDENT,
50 SPELL_STRING,
51 SPELL_NONE
52 };
53
54 struct token_spelling
55 {
56 enum spell_type category;
57 const unsigned char *name;
58 };
59
60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
61 U":>", U"<%", U"%>"};
62
63 #define OP(e, s) { SPELL_OPERATOR, U s },
64 #define TK(e, s) { s, U STRINGX (e) },
65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
66 #undef OP
67 #undef TK
68
69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
71
72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
75
76 static int skip_block_comment PARAMS ((cpp_reader *));
77 static int skip_line_comment PARAMS ((cpp_reader *));
78 static void adjust_column PARAMS ((cpp_reader *));
79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
84 static void unterminated PARAMS ((cpp_reader *, int));
85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
90
91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
93
94 /* Utility routine:
95
96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
98
99 int
100 cpp_ideq (token, string)
101 const cpp_token *token;
102 const char *string;
103 {
104 if (token->type != CPP_NAME)
105 return 0;
106
107 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
108 }
109
110 /* Call when meeting a newline. Returns the character after the newline
111 (or carriage-return newline combination), or EOF. */
112 static cppchar_t
113 handle_newline (buffer, newline_char)
114 cpp_buffer *buffer;
115 cppchar_t newline_char;
116 {
117 cppchar_t next = EOF;
118
119 buffer->col_adjust = 0;
120 buffer->lineno++;
121 buffer->line_base = buffer->cur;
122
123 /* Handle CR-LF and LF-CR combinations, get the next character. */
124 if (buffer->cur < buffer->rlimit)
125 {
126 next = *buffer->cur++;
127 if (next + newline_char == '\r' + '\n')
128 {
129 buffer->line_base = buffer->cur;
130 if (buffer->cur < buffer->rlimit)
131 next = *buffer->cur++;
132 else
133 next = EOF;
134 }
135 }
136
137 buffer->read_ahead = next;
138 return next;
139 }
140
141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
142 encountered. It warns if necessary, and returns true if the
143 trigraph should be honoured. FROM_CHAR is the third character of a
144 trigraph, and presumed to be the previous character for position
145 reporting. */
146 static int
147 trigraph_ok (pfile, from_char)
148 cpp_reader *pfile;
149 cppchar_t from_char;
150 {
151 int accept = CPP_OPTION (pfile, trigraphs);
152
153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
155 {
156 cpp_buffer *buffer = pfile->buffer;
157 if (accept)
158 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
159 "trigraph ??%c converted to %c",
160 (int) from_char,
161 (int) _cpp_trigraph_map[from_char]);
162 else if (buffer->cur != buffer->last_Wtrigraphs)
163 {
164 buffer->last_Wtrigraphs = buffer->cur;
165 cpp_warning_with_line (pfile, buffer->lineno,
166 CPP_BUF_COL (buffer) - 2,
167 "trigraph ??%c ignored", (int) from_char);
168 }
169 }
170
171 return accept;
172 }
173
174 /* Assumes local variables buffer and result. */
175 #define ACCEPT_CHAR(t) \
176 do { result->type = t; buffer->read_ahead = EOF; } while (0)
177
178 /* When we move to multibyte character sets, add to these something
179 that saves and restores the state of the multibyte conversion
180 library. This probably involves saving and restoring a "cookie".
181 In the case of glibc it is an 8-byte structure, so is not a high
182 overhead operation. In any case, it's out of the fast path. */
183 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
184 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
185
186 /* Skips any escaped newlines introduced by NEXT, which is either a
187 '?' or a '\\'. Returns the next character, which will also have
188 been placed in buffer->read_ahead. This routine performs
189 preprocessing stages 1 and 2 of the ISO C standard. */
190 static cppchar_t
191 skip_escaped_newlines (buffer, next)
192 cpp_buffer *buffer;
193 cppchar_t next;
194 {
195 /* Only do this if we apply stages 1 and 2. */
196 if (!buffer->from_stage3)
197 {
198 cppchar_t next1;
199 const unsigned char *saved_cur;
200 int space;
201
202 do
203 {
204 if (buffer->cur == buffer->rlimit)
205 break;
206
207 SAVE_STATE ();
208 if (next == '?')
209 {
210 next1 = *buffer->cur++;
211 if (next1 != '?' || buffer->cur == buffer->rlimit)
212 {
213 RESTORE_STATE ();
214 break;
215 }
216
217 next1 = *buffer->cur++;
218 if (!_cpp_trigraph_map[next1]
219 || !trigraph_ok (buffer->pfile, next1))
220 {
221 RESTORE_STATE ();
222 break;
223 }
224
225 /* We have a full trigraph here. */
226 next = _cpp_trigraph_map[next1];
227 if (next != '\\' || buffer->cur == buffer->rlimit)
228 break;
229 SAVE_STATE ();
230 }
231
232 /* We have a backslash, and room for at least one more character. */
233 space = 0;
234 do
235 {
236 next1 = *buffer->cur++;
237 if (!is_nvspace (next1))
238 break;
239 space = 1;
240 }
241 while (buffer->cur < buffer->rlimit);
242
243 if (!is_vspace (next1))
244 {
245 RESTORE_STATE ();
246 break;
247 }
248
249 if (space && !buffer->pfile->state.lexing_comment)
250 cpp_warning (buffer->pfile,
251 "backslash and newline separated by space");
252
253 next = handle_newline (buffer, next1);
254 if (next == EOF)
255 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
256 }
257 while (next == '\\' || next == '?');
258 }
259
260 buffer->read_ahead = next;
261 return next;
262 }
263
264 /* Obtain the next character, after trigraph conversion and skipping
265 an arbitrary string of escaped newlines. The common case of no
266 trigraphs or escaped newlines falls through quickly. */
267 static cppchar_t
268 get_effective_char (buffer)
269 cpp_buffer *buffer;
270 {
271 cppchar_t next = EOF;
272
273 if (buffer->cur < buffer->rlimit)
274 {
275 next = *buffer->cur++;
276
277 /* '?' can introduce trigraphs (and therefore backslash); '\\'
278 can introduce escaped newlines, which we want to skip, or
279 UCNs, which, depending upon lexer state, we will handle in
280 the future. */
281 if (next == '?' || next == '\\')
282 next = skip_escaped_newlines (buffer, next);
283 }
284
285 buffer->read_ahead = next;
286 return next;
287 }
288
289 /* Skip a C-style block comment. We find the end of the comment by
290 seeing if an asterisk is before every '/' we encounter. Returns
291 non-zero if comment terminated by EOF, zero otherwise. */
292 static int
293 skip_block_comment (pfile)
294 cpp_reader *pfile;
295 {
296 cpp_buffer *buffer = pfile->buffer;
297 cppchar_t c = EOF, prevc = EOF;
298
299 pfile->state.lexing_comment = 1;
300 while (buffer->cur != buffer->rlimit)
301 {
302 prevc = c, c = *buffer->cur++;
303
304 next_char:
305 /* FIXME: For speed, create a new character class of characters
306 of interest inside block comments. */
307 if (c == '?' || c == '\\')
308 c = skip_escaped_newlines (buffer, c);
309
310 /* People like decorating comments with '*', so check for '/'
311 instead for efficiency. */
312 if (c == '/')
313 {
314 if (prevc == '*')
315 break;
316
317 /* Warn about potential nested comments, but not if the '/'
318 comes immediately before the true comment delimeter.
319 Don't bother to get it right across escaped newlines. */
320 if (CPP_OPTION (pfile, warn_comments)
321 && buffer->cur != buffer->rlimit)
322 {
323 prevc = c, c = *buffer->cur++;
324 if (c == '*' && buffer->cur != buffer->rlimit)
325 {
326 prevc = c, c = *buffer->cur++;
327 if (c != '/')
328 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
329 CPP_BUF_COL (buffer),
330 "\"/*\" within comment");
331 }
332 goto next_char;
333 }
334 }
335 else if (is_vspace (c))
336 {
337 prevc = c, c = handle_newline (buffer, c);
338 goto next_char;
339 }
340 else if (c == '\t')
341 adjust_column (pfile);
342 }
343
344 pfile->state.lexing_comment = 0;
345 buffer->read_ahead = EOF;
346 return c != '/' || prevc != '*';
347 }
348
349 /* Skip a C++ line comment. Handles escaped newlines. Returns
350 non-zero if a multiline comment. The following new line, if any,
351 is left in buffer->read_ahead. */
352 static int
353 skip_line_comment (pfile)
354 cpp_reader *pfile;
355 {
356 cpp_buffer *buffer = pfile->buffer;
357 unsigned int orig_lineno = buffer->lineno;
358 cppchar_t c;
359
360 pfile->state.lexing_comment = 1;
361 do
362 {
363 c = EOF;
364 if (buffer->cur == buffer->rlimit)
365 break;
366
367 c = *buffer->cur++;
368 if (c == '?' || c == '\\')
369 c = skip_escaped_newlines (buffer, c);
370 }
371 while (!is_vspace (c));
372
373 pfile->state.lexing_comment = 0;
374 buffer->read_ahead = c; /* Leave any newline for caller. */
375 return orig_lineno != buffer->lineno;
376 }
377
378 /* pfile->buffer->cur is one beyond the \t character. Update
379 col_adjust so we track the column correctly. */
380 static void
381 adjust_column (pfile)
382 cpp_reader *pfile;
383 {
384 cpp_buffer *buffer = pfile->buffer;
385 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
386
387 /* Round it up to multiple of the tabstop, but subtract 1 since the
388 tab itself occupies a character position. */
389 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
390 - col % CPP_OPTION (pfile, tabstop)) - 1;
391 }
392
393 /* Skips whitespace, saving the next non-whitespace character.
394 Adjusts pfile->col_adjust to account for tabs. Without this,
395 tokens might be assigned an incorrect column. */
396 static void
397 skip_whitespace (pfile, c)
398 cpp_reader *pfile;
399 cppchar_t c;
400 {
401 cpp_buffer *buffer = pfile->buffer;
402 unsigned int warned = 0;
403
404 do
405 {
406 /* Horizontal space always OK. */
407 if (c == ' ')
408 ;
409 else if (c == '\t')
410 adjust_column (pfile);
411 /* Just \f \v or \0 left. */
412 else if (c == '\0')
413 {
414 if (!warned)
415 {
416 cpp_warning (pfile, "null character(s) ignored");
417 warned = 1;
418 }
419 }
420 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
421 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
422 CPP_BUF_COL (buffer),
423 "%s in preprocessing directive",
424 c == '\f' ? "form feed" : "vertical tab");
425
426 c = EOF;
427 if (buffer->cur == buffer->rlimit)
428 break;
429 c = *buffer->cur++;
430 }
431 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
432 while (is_nvspace (c));
433
434 /* Remember the next character. */
435 buffer->read_ahead = c;
436 }
437
438 /* See if the characters of a number token are valid in a name (no
439 '.', '+' or '-'). */
440 static int
441 name_p (pfile, string)
442 cpp_reader *pfile;
443 const cpp_string *string;
444 {
445 unsigned int i;
446
447 for (i = 0; i < string->len; i++)
448 if (!is_idchar (string->text[i]))
449 return 0;
450
451 return 1;
452 }
453
454 /* Parse an identifier, skipping embedded backslash-newlines.
455 Calculate the hash value of the token while parsing, for improved
456 performance. The hashing algorithm *must* match cpp_lookup(). */
457
458 static cpp_hashnode *
459 parse_identifier (pfile, c)
460 cpp_reader *pfile;
461 cppchar_t c;
462 {
463 cpp_hashnode *result;
464 cpp_buffer *buffer = pfile->buffer;
465 unsigned char *dest, *limit;
466 unsigned int r = 0, saw_dollar = 0;
467
468 dest = POOL_FRONT (&pfile->ident_pool);
469 limit = POOL_LIMIT (&pfile->ident_pool);
470
471 do
472 {
473 do
474 {
475 /* Need room for terminating null. */
476 if (dest + 1 >= limit)
477 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
478
479 *dest++ = c;
480 r = HASHSTEP (r, c);
481
482 if (c == '$')
483 saw_dollar++;
484
485 c = EOF;
486 if (buffer->cur == buffer->rlimit)
487 break;
488
489 c = *buffer->cur++;
490 }
491 while (is_idchar (c));
492
493 /* Potential escaped newline? */
494 if (c != '?' && c != '\\')
495 break;
496 c = skip_escaped_newlines (buffer, c);
497 }
498 while (is_idchar (c));
499
500 /* Remember the next character. */
501 buffer->read_ahead = c;
502
503 /* $ is not a identifier character in the standard, but is commonly
504 accepted as an extension. Don't warn about it in skipped
505 conditional blocks. */
506 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
507 cpp_pedwarn (pfile, "'$' character(s) in identifier");
508
509 /* Identifiers are null-terminated. */
510 *dest = '\0';
511
512 /* This routine commits the memory if necessary. */
513 result = _cpp_lookup_with_hash (pfile,
514 dest - POOL_FRONT (&pfile->ident_pool), r);
515
516 /* Some identifiers require diagnostics when lexed. */
517 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
518 {
519 /* It is allowed to poison the same identifier twice. */
520 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
521 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
522
523 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
524 replacement list of a variadic macro. */
525 if (result == pfile->spec_nodes.n__VA_ARGS__
526 && !pfile->state.va_args_ok)
527 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
528 }
529
530 return result;
531 }
532
533 /* Parse a number, skipping embedded backslash-newlines. */
534 static void
535 parse_number (pfile, number, c, leading_period)
536 cpp_reader *pfile;
537 cpp_string *number;
538 cppchar_t c;
539 int leading_period;
540 {
541 cpp_buffer *buffer = pfile->buffer;
542 cpp_pool *pool = &pfile->ident_pool;
543 unsigned char *dest, *limit;
544
545 dest = POOL_FRONT (pool);
546 limit = POOL_LIMIT (pool);
547
548 /* Place a leading period. */
549 if (leading_period)
550 {
551 if (dest >= limit)
552 limit = _cpp_next_chunk (pool, 0, &dest);
553 *dest++ = '.';
554 }
555
556 do
557 {
558 do
559 {
560 /* Need room for terminating null. */
561 if (dest + 1 >= limit)
562 limit = _cpp_next_chunk (pool, 0, &dest);
563 *dest++ = c;
564
565 c = EOF;
566 if (buffer->cur == buffer->rlimit)
567 break;
568
569 c = *buffer->cur++;
570 }
571 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
572
573 /* Potential escaped newline? */
574 if (c != '?' && c != '\\')
575 break;
576 c = skip_escaped_newlines (buffer, c);
577 }
578 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
579
580 /* Remember the next character. */
581 buffer->read_ahead = c;
582
583 /* Null-terminate the number. */
584 *dest = '\0';
585
586 number->text = POOL_FRONT (pool);
587 number->len = dest - number->text;
588 POOL_COMMIT (pool, number->len + 1);
589 }
590
591 /* Subroutine of parse_string. Emits error for unterminated strings. */
592 static void
593 unterminated (pfile, term)
594 cpp_reader *pfile;
595 int term;
596 {
597 cpp_error (pfile, "missing terminating %c character", term);
598
599 if (term == '\"' && pfile->mlstring_pos.line
600 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
601 {
602 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
603 pfile->mlstring_pos.col,
604 "possible start of unterminated string literal");
605 pfile->mlstring_pos.line = 0;
606 }
607 }
608
609 /* Subroutine of parse_string. */
610 static int
611 unescaped_terminator_p (pfile, dest)
612 cpp_reader *pfile;
613 const unsigned char *dest;
614 {
615 const unsigned char *start, *temp;
616
617 /* In #include-style directives, terminators are not escapeable. */
618 if (pfile->state.angled_headers)
619 return 1;
620
621 start = POOL_FRONT (&pfile->ident_pool);
622
623 /* An odd number of consecutive backslashes represents an escaped
624 terminator. */
625 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
626 ;
627
628 return ((dest - temp) & 1) == 0;
629 }
630
631 /* Parses a string, character constant, or angle-bracketed header file
632 name. Handles embedded trigraphs and escaped newlines.
633
634 Multi-line strings are allowed, but they are deprecated within
635 directives. */
636 static void
637 parse_string (pfile, token, terminator)
638 cpp_reader *pfile;
639 cpp_token *token;
640 cppchar_t terminator;
641 {
642 cpp_buffer *buffer = pfile->buffer;
643 cpp_pool *pool = &pfile->ident_pool;
644 unsigned char *dest, *limit;
645 cppchar_t c;
646 unsigned int nulls = 0;
647
648 dest = POOL_FRONT (pool);
649 limit = POOL_LIMIT (pool);
650
651 for (;;)
652 {
653 if (buffer->cur == buffer->rlimit)
654 {
655 c = EOF;
656 unterminated (pfile, terminator);
657 break;
658 }
659 c = *buffer->cur++;
660
661 have_char:
662 /* Handle trigraphs, escaped newlines etc. */
663 if (c == '?' || c == '\\')
664 c = skip_escaped_newlines (buffer, c);
665
666 if (c == terminator && unescaped_terminator_p (pfile, dest))
667 {
668 c = EOF;
669 break;
670 }
671 else if (is_vspace (c))
672 {
673 /* In assembly language, silently terminate string and
674 character literals at end of line. This is a kludge
675 around not knowing where comments are. */
676 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
677 break;
678
679 /* Character constants and header names may not extend over
680 multiple lines. In Standard C, neither may strings.
681 Unfortunately, we accept multiline strings as an
682 extension, except in #include family directives. */
683 if (terminator != '"' || pfile->state.angled_headers)
684 {
685 unterminated (pfile, terminator);
686 break;
687 }
688
689 if (! cpp_sys_macro_p (pfile))
690 cpp_pedwarn (pfile, "multi-line string constants are deprecated");
691 if (pfile->mlstring_pos.line == 0)
692 pfile->mlstring_pos = pfile->lexer_pos;
693
694 handle_newline (buffer, c); /* Stores to read_ahead. */
695 c = '\n';
696 }
697 else if (c == '\0')
698 {
699 if (nulls++ == 0)
700 cpp_warning (pfile, "null character(s) preserved in literal");
701 }
702
703 /* No terminating null for strings - they could contain nulls. */
704 if (dest >= limit)
705 limit = _cpp_next_chunk (pool, 0, &dest);
706 *dest++ = c;
707
708 /* If we had a new line, the next character is in read_ahead. */
709 if (c != '\n')
710 continue;
711 c = buffer->read_ahead;
712 if (c != EOF)
713 goto have_char;
714 }
715
716 /* Remember the next character. */
717 buffer->read_ahead = c;
718
719 token->val.str.text = POOL_FRONT (pool);
720 token->val.str.len = dest - token->val.str.text;
721 POOL_COMMIT (pool, token->val.str.len);
722 }
723
724 /* The stored comment includes the comment start and any terminator. */
725 static void
726 save_comment (pfile, token, from)
727 cpp_reader *pfile;
728 cpp_token *token;
729 const unsigned char *from;
730 {
731 unsigned char *buffer;
732 unsigned int len;
733
734 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
735 /* C++ comments probably (not definitely) have moved past a new
736 line, which we don't want to save in the comment. */
737 if (pfile->buffer->read_ahead != EOF)
738 len--;
739 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
740
741 token->type = CPP_COMMENT;
742 token->val.str.len = len;
743 token->val.str.text = buffer;
744
745 buffer[0] = '/';
746 memcpy (buffer + 1, from, len - 1);
747 }
748
749 /* Subroutine of lex_token to handle '%'. A little tricky, since we
750 want to avoid stepping back when lexing %:%X. */
751 static void
752 lex_percent (buffer, result)
753 cpp_buffer *buffer;
754 cpp_token *result;
755 {
756 cppchar_t c;
757
758 result->type = CPP_MOD;
759 /* Parsing %:%X could leave an extra character. */
760 if (buffer->extra_char == EOF)
761 c = get_effective_char (buffer);
762 else
763 {
764 c = buffer->read_ahead = buffer->extra_char;
765 buffer->extra_char = EOF;
766 }
767
768 if (c == '=')
769 ACCEPT_CHAR (CPP_MOD_EQ);
770 else if (CPP_OPTION (buffer->pfile, digraphs))
771 {
772 if (c == ':')
773 {
774 result->flags |= DIGRAPH;
775 ACCEPT_CHAR (CPP_HASH);
776 if (get_effective_char (buffer) == '%')
777 {
778 buffer->extra_char = get_effective_char (buffer);
779 if (buffer->extra_char == ':')
780 {
781 buffer->extra_char = EOF;
782 ACCEPT_CHAR (CPP_PASTE);
783 }
784 else
785 /* We'll catch the extra_char when we're called back. */
786 buffer->read_ahead = '%';
787 }
788 }
789 else if (c == '>')
790 {
791 result->flags |= DIGRAPH;
792 ACCEPT_CHAR (CPP_CLOSE_BRACE);
793 }
794 }
795 }
796
797 /* Subroutine of lex_token to handle '.'. This is tricky, since we
798 want to avoid stepping back when lexing '...' or '.123'. In the
799 latter case we should also set a flag for parse_number. */
800 static void
801 lex_dot (pfile, result)
802 cpp_reader *pfile;
803 cpp_token *result;
804 {
805 cpp_buffer *buffer = pfile->buffer;
806 cppchar_t c;
807
808 /* Parsing ..X could leave an extra character. */
809 if (buffer->extra_char == EOF)
810 c = get_effective_char (buffer);
811 else
812 {
813 c = buffer->read_ahead = buffer->extra_char;
814 buffer->extra_char = EOF;
815 }
816
817 /* All known character sets have 0...9 contiguous. */
818 if (c >= '0' && c <= '9')
819 {
820 result->type = CPP_NUMBER;
821 parse_number (pfile, &result->val.str, c, 1);
822 }
823 else
824 {
825 result->type = CPP_DOT;
826 if (c == '.')
827 {
828 buffer->extra_char = get_effective_char (buffer);
829 if (buffer->extra_char == '.')
830 {
831 buffer->extra_char = EOF;
832 ACCEPT_CHAR (CPP_ELLIPSIS);
833 }
834 else
835 /* We'll catch the extra_char when we're called back. */
836 buffer->read_ahead = '.';
837 }
838 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
839 ACCEPT_CHAR (CPP_DOT_STAR);
840 }
841 }
842
843 void
844 _cpp_lex_token (pfile, result)
845 cpp_reader *pfile;
846 cpp_token *result;
847 {
848 cppchar_t c;
849 cpp_buffer *buffer;
850 const unsigned char *comment_start;
851 unsigned char bol;
852
853 skip:
854 bol = pfile->state.next_bol;
855 done_directive:
856 buffer = pfile->buffer;
857 pfile->state.next_bol = 0;
858 result->flags = buffer->saved_flags;
859 buffer->saved_flags = 0;
860 next_char:
861 pfile->lexer_pos.line = buffer->lineno;
862 next_char2:
863 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
864
865 c = buffer->read_ahead;
866 if (c == EOF && buffer->cur < buffer->rlimit)
867 {
868 c = *buffer->cur++;
869 pfile->lexer_pos.col++;
870 }
871
872 do_switch:
873 buffer->read_ahead = EOF;
874 switch (c)
875 {
876 case EOF:
877 /* Non-empty files should end in a newline. Ignore for command
878 line and _Pragma buffers. */
879 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
880 cpp_pedwarn (pfile, "no newline at end of file");
881 pfile->state.next_bol = 1;
882 pfile->skipping = 0; /* In case missing #endif. */
883 result->type = CPP_EOF;
884 /* Don't do MI optimisation. */
885 return;
886
887 case ' ': case '\t': case '\f': case '\v': case '\0':
888 skip_whitespace (pfile, c);
889 result->flags |= PREV_WHITE;
890 goto next_char2;
891
892 case '\n': case '\r':
893 if (!pfile->state.in_directive)
894 {
895 handle_newline (buffer, c);
896 bol = 1;
897 pfile->lexer_pos.output_line = buffer->lineno;
898 /* This is a new line, so clear any white space flag.
899 Newlines in arguments are white space (6.10.3.10);
900 parse_arg takes care of that. */
901 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
902 goto next_char;
903 }
904
905 /* Don't let directives spill over to the next line. */
906 buffer->read_ahead = c;
907 pfile->state.next_bol = 1;
908 result->type = CPP_EOF;
909 /* Don't break; pfile->skipping might be true. */
910 return;
911
912 case '?':
913 case '\\':
914 /* These could start an escaped newline, or '?' a trigraph. Let
915 skip_escaped_newlines do all the work. */
916 {
917 unsigned int lineno = buffer->lineno;
918
919 c = skip_escaped_newlines (buffer, c);
920 if (lineno != buffer->lineno)
921 /* We had at least one escaped newline of some sort, and the
922 next character is in buffer->read_ahead. Update the
923 token's line and column. */
924 goto next_char;
925
926 /* We are either the original '?' or '\\', or a trigraph. */
927 result->type = CPP_QUERY;
928 buffer->read_ahead = EOF;
929 if (c == '\\')
930 goto random_char;
931 else if (c != '?')
932 goto do_switch;
933 }
934 break;
935
936 case '0': case '1': case '2': case '3': case '4':
937 case '5': case '6': case '7': case '8': case '9':
938 result->type = CPP_NUMBER;
939 parse_number (pfile, &result->val.str, c, 0);
940 break;
941
942 case '$':
943 if (!CPP_OPTION (pfile, dollars_in_ident))
944 goto random_char;
945 /* Fall through... */
946
947 case '_':
948 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
949 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
950 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
951 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
952 case 'y': case 'z':
953 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
954 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
955 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
956 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
957 case 'Y': case 'Z':
958 result->type = CPP_NAME;
959 result->val.node = parse_identifier (pfile, c);
960
961 /* 'L' may introduce wide characters or strings. */
962 if (result->val.node == pfile->spec_nodes.n_L)
963 {
964 c = buffer->read_ahead; /* For make_string. */
965 if (c == '\'' || c == '"')
966 {
967 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
968 goto make_string;
969 }
970 }
971 /* Convert named operators to their proper types. */
972 else if (result->val.node->flags & NODE_OPERATOR)
973 {
974 result->flags |= NAMED_OP;
975 result->type = result->val.node->value.operator;
976 }
977 break;
978
979 case '\'':
980 case '"':
981 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
982 make_string:
983 parse_string (pfile, result, c);
984 break;
985
986 case '/':
987 /* A potential block or line comment. */
988 comment_start = buffer->cur;
989 result->type = CPP_DIV;
990 c = get_effective_char (buffer);
991 if (c == '=')
992 ACCEPT_CHAR (CPP_DIV_EQ);
993 if (c != '/' && c != '*')
994 break;
995
996 if (c == '*')
997 {
998 if (skip_block_comment (pfile))
999 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1000 pfile->lexer_pos.col,
1001 "unterminated comment");
1002 }
1003 else
1004 {
1005 if (!CPP_OPTION (pfile, cplusplus_comments)
1006 && !CPP_IN_SYSTEM_HEADER (pfile))
1007 break;
1008
1009 /* Warn about comments only if pedantically GNUC89, and not
1010 in system headers. */
1011 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1012 && ! buffer->warned_cplusplus_comments)
1013 {
1014 cpp_pedwarn (pfile,
1015 "C++ style comments are not allowed in ISO C89");
1016 cpp_pedwarn (pfile,
1017 "(this will be reported only once per input file)");
1018 buffer->warned_cplusplus_comments = 1;
1019 }
1020
1021 /* Skip_line_comment updates buffer->read_ahead. */
1022 if (skip_line_comment (pfile))
1023 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1024 pfile->lexer_pos.col,
1025 "multi-line comment");
1026 }
1027
1028 /* Skipping the comment has updated buffer->read_ahead. */
1029 if (!pfile->state.save_comments)
1030 {
1031 result->flags |= PREV_WHITE;
1032 goto next_char;
1033 }
1034
1035 /* Save the comment as a token in its own right. */
1036 save_comment (pfile, result, comment_start);
1037 /* Don't do MI optimisation. */
1038 return;
1039
1040 case '<':
1041 if (pfile->state.angled_headers)
1042 {
1043 result->type = CPP_HEADER_NAME;
1044 c = '>'; /* terminator. */
1045 goto make_string;
1046 }
1047
1048 result->type = CPP_LESS;
1049 c = get_effective_char (buffer);
1050 if (c == '=')
1051 ACCEPT_CHAR (CPP_LESS_EQ);
1052 else if (c == '<')
1053 {
1054 ACCEPT_CHAR (CPP_LSHIFT);
1055 if (get_effective_char (buffer) == '=')
1056 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1057 }
1058 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1059 {
1060 ACCEPT_CHAR (CPP_MIN);
1061 if (get_effective_char (buffer) == '=')
1062 ACCEPT_CHAR (CPP_MIN_EQ);
1063 }
1064 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1065 {
1066 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1067 result->flags |= DIGRAPH;
1068 }
1069 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1070 {
1071 ACCEPT_CHAR (CPP_OPEN_BRACE);
1072 result->flags |= DIGRAPH;
1073 }
1074 break;
1075
1076 case '>':
1077 result->type = CPP_GREATER;
1078 c = get_effective_char (buffer);
1079 if (c == '=')
1080 ACCEPT_CHAR (CPP_GREATER_EQ);
1081 else if (c == '>')
1082 {
1083 ACCEPT_CHAR (CPP_RSHIFT);
1084 if (get_effective_char (buffer) == '=')
1085 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1086 }
1087 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1088 {
1089 ACCEPT_CHAR (CPP_MAX);
1090 if (get_effective_char (buffer) == '=')
1091 ACCEPT_CHAR (CPP_MAX_EQ);
1092 }
1093 break;
1094
1095 case '%':
1096 lex_percent (buffer, result);
1097 if (result->type == CPP_HASH)
1098 goto do_hash;
1099 break;
1100
1101 case '.':
1102 lex_dot (pfile, result);
1103 break;
1104
1105 case '+':
1106 result->type = CPP_PLUS;
1107 c = get_effective_char (buffer);
1108 if (c == '=')
1109 ACCEPT_CHAR (CPP_PLUS_EQ);
1110 else if (c == '+')
1111 ACCEPT_CHAR (CPP_PLUS_PLUS);
1112 break;
1113
1114 case '-':
1115 result->type = CPP_MINUS;
1116 c = get_effective_char (buffer);
1117 if (c == '>')
1118 {
1119 ACCEPT_CHAR (CPP_DEREF);
1120 if (CPP_OPTION (pfile, cplusplus)
1121 && get_effective_char (buffer) == '*')
1122 ACCEPT_CHAR (CPP_DEREF_STAR);
1123 }
1124 else if (c == '=')
1125 ACCEPT_CHAR (CPP_MINUS_EQ);
1126 else if (c == '-')
1127 ACCEPT_CHAR (CPP_MINUS_MINUS);
1128 break;
1129
1130 case '*':
1131 result->type = CPP_MULT;
1132 if (get_effective_char (buffer) == '=')
1133 ACCEPT_CHAR (CPP_MULT_EQ);
1134 break;
1135
1136 case '=':
1137 result->type = CPP_EQ;
1138 if (get_effective_char (buffer) == '=')
1139 ACCEPT_CHAR (CPP_EQ_EQ);
1140 break;
1141
1142 case '!':
1143 result->type = CPP_NOT;
1144 if (get_effective_char (buffer) == '=')
1145 ACCEPT_CHAR (CPP_NOT_EQ);
1146 break;
1147
1148 case '&':
1149 result->type = CPP_AND;
1150 c = get_effective_char (buffer);
1151 if (c == '=')
1152 ACCEPT_CHAR (CPP_AND_EQ);
1153 else if (c == '&')
1154 ACCEPT_CHAR (CPP_AND_AND);
1155 break;
1156
1157 case '#':
1158 c = buffer->extra_char; /* Can be set by error condition below. */
1159 if (c != EOF)
1160 {
1161 buffer->read_ahead = c;
1162 buffer->extra_char = EOF;
1163 }
1164 else
1165 c = get_effective_char (buffer);
1166
1167 if (c == '#')
1168 {
1169 ACCEPT_CHAR (CPP_PASTE);
1170 break;
1171 }
1172
1173 result->type = CPP_HASH;
1174 do_hash:
1175 if (bol)
1176 {
1177 if (pfile->state.parsing_args)
1178 {
1179 /* 6.10.3 paragraph 11: If there are sequences of
1180 preprocessing tokens within the list of arguments that
1181 would otherwise act as preprocessing directives, the
1182 behavior is undefined.
1183
1184 This implementation will report a hard error, terminate
1185 the macro invocation, and proceed to process the
1186 directive. */
1187 cpp_error (pfile,
1188 "directives may not be used inside a macro argument");
1189
1190 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1191 buffer->extra_char = buffer->read_ahead;
1192 buffer->read_ahead = '#';
1193 pfile->state.next_bol = 1;
1194 result->type = CPP_EOF;
1195
1196 /* Get whitespace right - newline_in_args sets it. */
1197 if (pfile->lexer_pos.col == 1)
1198 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1199 }
1200 else
1201 {
1202 /* This is the hash introducing a directive. */
1203 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1204 goto done_directive; /* bol still 1. */
1205 /* This is in fact an assembler #. */
1206 }
1207 }
1208 break;
1209
1210 case '|':
1211 result->type = CPP_OR;
1212 c = get_effective_char (buffer);
1213 if (c == '=')
1214 ACCEPT_CHAR (CPP_OR_EQ);
1215 else if (c == '|')
1216 ACCEPT_CHAR (CPP_OR_OR);
1217 break;
1218
1219 case '^':
1220 result->type = CPP_XOR;
1221 if (get_effective_char (buffer) == '=')
1222 ACCEPT_CHAR (CPP_XOR_EQ);
1223 break;
1224
1225 case ':':
1226 result->type = CPP_COLON;
1227 c = get_effective_char (buffer);
1228 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1229 ACCEPT_CHAR (CPP_SCOPE);
1230 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1231 {
1232 result->flags |= DIGRAPH;
1233 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1234 }
1235 break;
1236
1237 case '~': result->type = CPP_COMPL; break;
1238 case ',': result->type = CPP_COMMA; break;
1239 case '(': result->type = CPP_OPEN_PAREN; break;
1240 case ')': result->type = CPP_CLOSE_PAREN; break;
1241 case '[': result->type = CPP_OPEN_SQUARE; break;
1242 case ']': result->type = CPP_CLOSE_SQUARE; break;
1243 case '{': result->type = CPP_OPEN_BRACE; break;
1244 case '}': result->type = CPP_CLOSE_BRACE; break;
1245 case ';': result->type = CPP_SEMICOLON; break;
1246
1247 case '@':
1248 if (CPP_OPTION (pfile, objc))
1249 {
1250 /* In Objective C, '@' may begin keywords or strings, like
1251 @keyword or @"string". It would be nice to call
1252 get_effective_char here and test the result. However, we
1253 would then need to pass 2 characters to parse_identifier,
1254 making it ugly and slowing down its main loop. Instead,
1255 we assume we have an identifier, and recover if not. */
1256 result->type = CPP_NAME;
1257 result->val.node = parse_identifier (pfile, c);
1258 if (result->val.node->length != 1)
1259 break;
1260
1261 /* OK, so it wasn't an identifier. Maybe a string? */
1262 if (buffer->read_ahead == '"')
1263 {
1264 c = '"';
1265 ACCEPT_CHAR (CPP_OSTRING);
1266 goto make_string;
1267 }
1268 }
1269 goto random_char;
1270
1271 random_char:
1272 default:
1273 result->type = CPP_OTHER;
1274 result->val.c = c;
1275 break;
1276 }
1277
1278 if (pfile->skipping)
1279 goto skip;
1280
1281 /* If not in a directive, this token invalidates controlling macros. */
1282 if (!pfile->state.in_directive)
1283 pfile->mi_state = MI_FAILED;
1284 }
1285
1286 /* An upper bound on the number of bytes needed to spell a token,
1287 including preceding whitespace. */
1288 unsigned int
1289 cpp_token_len (token)
1290 const cpp_token *token;
1291 {
1292 unsigned int len;
1293
1294 switch (TOKEN_SPELL (token))
1295 {
1296 default: len = 0; break;
1297 case SPELL_STRING: len = token->val.str.len; break;
1298 case SPELL_IDENT: len = token->val.node->length; break;
1299 }
1300 /* 1 for whitespace, 4 for comment delimeters. */
1301 return len + 5;
1302 }
1303
1304 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1305 already contain the enough space to hold the token's spelling.
1306 Returns a pointer to the character after the last character
1307 written. */
1308 unsigned char *
1309 cpp_spell_token (pfile, token, buffer)
1310 cpp_reader *pfile; /* Would be nice to be rid of this... */
1311 const cpp_token *token;
1312 unsigned char *buffer;
1313 {
1314 switch (TOKEN_SPELL (token))
1315 {
1316 case SPELL_OPERATOR:
1317 {
1318 const unsigned char *spelling;
1319 unsigned char c;
1320
1321 if (token->flags & DIGRAPH)
1322 spelling
1323 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1324 else if (token->flags & NAMED_OP)
1325 goto spell_ident;
1326 else
1327 spelling = TOKEN_NAME (token);
1328
1329 while ((c = *spelling++) != '\0')
1330 *buffer++ = c;
1331 }
1332 break;
1333
1334 case SPELL_IDENT:
1335 spell_ident:
1336 memcpy (buffer, token->val.node->name, token->val.node->length);
1337 buffer += token->val.node->length;
1338 break;
1339
1340 case SPELL_STRING:
1341 {
1342 int left, right, tag;
1343 switch (token->type)
1344 {
1345 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1346 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1347 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1348 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1349 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1350 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1351 default: left = '\0'; right = '\0'; tag = '\0'; break;
1352 }
1353 if (tag) *buffer++ = tag;
1354 if (left) *buffer++ = left;
1355 memcpy (buffer, token->val.str.text, token->val.str.len);
1356 buffer += token->val.str.len;
1357 if (right) *buffer++ = right;
1358 }
1359 break;
1360
1361 case SPELL_CHAR:
1362 *buffer++ = token->val.c;
1363 break;
1364
1365 case SPELL_NONE:
1366 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1367 break;
1368 }
1369
1370 return buffer;
1371 }
1372
1373 /* Returns a token as a null-terminated string. The string is
1374 temporary, and automatically freed later. Useful for diagnostics. */
1375 unsigned char *
1376 cpp_token_as_text (pfile, token)
1377 cpp_reader *pfile;
1378 const cpp_token *token;
1379 {
1380 unsigned int len = cpp_token_len (token);
1381 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1382
1383 end = cpp_spell_token (pfile, token, start);
1384 end[0] = '\0';
1385
1386 return start;
1387 }
1388
1389 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1390 const char *
1391 cpp_type2name (type)
1392 enum cpp_ttype type;
1393 {
1394 return (const char *) token_spellings[type].name;
1395 }
1396
1397 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1398 for efficiency - to avoid double-buffering. Also, outputs a space
1399 if PREV_WHITE is flagged. */
1400 void
1401 cpp_output_token (token, fp)
1402 const cpp_token *token;
1403 FILE *fp;
1404 {
1405 if (token->flags & PREV_WHITE)
1406 putc (' ', fp);
1407
1408 switch (TOKEN_SPELL (token))
1409 {
1410 case SPELL_OPERATOR:
1411 {
1412 const unsigned char *spelling;
1413
1414 if (token->flags & DIGRAPH)
1415 spelling
1416 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1417 else if (token->flags & NAMED_OP)
1418 goto spell_ident;
1419 else
1420 spelling = TOKEN_NAME (token);
1421
1422 ufputs (spelling, fp);
1423 }
1424 break;
1425
1426 spell_ident:
1427 case SPELL_IDENT:
1428 ufputs (token->val.node->name, fp);
1429 break;
1430
1431 case SPELL_STRING:
1432 {
1433 int left, right, tag;
1434 switch (token->type)
1435 {
1436 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1437 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1438 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1439 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1440 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1441 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1442 default: left = '\0'; right = '\0'; tag = '\0'; break;
1443 }
1444 if (tag) putc (tag, fp);
1445 if (left) putc (left, fp);
1446 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1447 if (right) putc (right, fp);
1448 }
1449 break;
1450
1451 case SPELL_CHAR:
1452 putc (token->val.c, fp);
1453 break;
1454
1455 case SPELL_NONE:
1456 /* An error, most probably. */
1457 break;
1458 }
1459 }
1460
1461 /* Compare two tokens. */
1462 int
1463 _cpp_equiv_tokens (a, b)
1464 const cpp_token *a, *b;
1465 {
1466 if (a->type == b->type && a->flags == b->flags)
1467 switch (TOKEN_SPELL (a))
1468 {
1469 default: /* Keep compiler happy. */
1470 case SPELL_OPERATOR:
1471 return 1;
1472 case SPELL_CHAR:
1473 return a->val.c == b->val.c; /* Character. */
1474 case SPELL_NONE:
1475 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1476 case SPELL_IDENT:
1477 return a->val.node == b->val.node;
1478 case SPELL_STRING:
1479 return (a->val.str.len == b->val.str.len
1480 && !memcmp (a->val.str.text, b->val.str.text,
1481 a->val.str.len));
1482 }
1483
1484 return 0;
1485 }
1486
1487 #if 0
1488 /* Compare two token lists. */
1489 int
1490 _cpp_equiv_toklists (a, b)
1491 const struct toklist *a, *b;
1492 {
1493 unsigned int i, count;
1494
1495 count = a->limit - a->first;
1496 if (count != (b->limit - b->first))
1497 return 0;
1498
1499 for (i = 0; i < count; i++)
1500 if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1501 return 0;
1502
1503 return 1;
1504 }
1505 #endif
1506
1507 /* Determine whether two tokens can be pasted together, and if so,
1508 what the resulting token is. Returns CPP_EOF if the tokens cannot
1509 be pasted, or the appropriate type for the merged token if they
1510 can. */
1511 enum cpp_ttype
1512 cpp_can_paste (pfile, token1, token2, digraph)
1513 cpp_reader * pfile;
1514 const cpp_token *token1, *token2;
1515 int* digraph;
1516 {
1517 enum cpp_ttype a = token1->type, b = token2->type;
1518 int cxx = CPP_OPTION (pfile, cplusplus);
1519
1520 /* Treat named operators as if they were ordinary NAMEs. */
1521 if (token1->flags & NAMED_OP)
1522 a = CPP_NAME;
1523 if (token2->flags & NAMED_OP)
1524 b = CPP_NAME;
1525
1526 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1527 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1528
1529 switch (a)
1530 {
1531 case CPP_GREATER:
1532 if (b == a) return CPP_RSHIFT;
1533 if (b == CPP_QUERY && cxx) return CPP_MAX;
1534 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1535 break;
1536 case CPP_LESS:
1537 if (b == a) return CPP_LSHIFT;
1538 if (b == CPP_QUERY && cxx) return CPP_MIN;
1539 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1540 if (CPP_OPTION (pfile, digraphs))
1541 {
1542 if (b == CPP_COLON)
1543 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1544 if (b == CPP_MOD)
1545 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1546 }
1547 break;
1548
1549 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1550 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1551 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1552
1553 case CPP_MINUS:
1554 if (b == a) return CPP_MINUS_MINUS;
1555 if (b == CPP_GREATER) return CPP_DEREF;
1556 break;
1557 case CPP_COLON:
1558 if (b == a && cxx) return CPP_SCOPE;
1559 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1560 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1561 break;
1562
1563 case CPP_MOD:
1564 if (CPP_OPTION (pfile, digraphs))
1565 {
1566 if (b == CPP_GREATER)
1567 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1568 if (b == CPP_COLON)
1569 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1570 }
1571 break;
1572 case CPP_DEREF:
1573 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1574 break;
1575 case CPP_DOT:
1576 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1577 if (b == CPP_NUMBER) return CPP_NUMBER;
1578 break;
1579
1580 case CPP_HASH:
1581 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1582 /* %:%: digraph */
1583 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1584 break;
1585
1586 case CPP_NAME:
1587 if (b == CPP_NAME) return CPP_NAME;
1588 if (b == CPP_NUMBER
1589 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1590 if (b == CPP_CHAR
1591 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1592 if (b == CPP_STRING
1593 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1594 break;
1595
1596 case CPP_NUMBER:
1597 if (b == CPP_NUMBER) return CPP_NUMBER;
1598 if (b == CPP_NAME) return CPP_NUMBER;
1599 if (b == CPP_DOT) return CPP_NUMBER;
1600 /* Numbers cannot have length zero, so this is safe. */
1601 if ((b == CPP_PLUS || b == CPP_MINUS)
1602 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1603 return CPP_NUMBER;
1604 break;
1605
1606 case CPP_OTHER:
1607 if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
1608 {
1609 if (b == CPP_NAME) return CPP_NAME;
1610 if (b == CPP_STRING) return CPP_OSTRING;
1611 }
1612
1613 default:
1614 break;
1615 }
1616
1617 return CPP_EOF;
1618 }
1619
1620 /* Returns nonzero if a space should be inserted to avoid an
1621 accidental token paste for output. For simplicity, it is
1622 conservative, and occasionally advises a space where one is not
1623 needed, e.g. "." and ".2". */
1624
1625 int
1626 cpp_avoid_paste (pfile, token1, token2)
1627 cpp_reader *pfile;
1628 const cpp_token *token1, *token2;
1629 {
1630 enum cpp_ttype a = token1->type, b = token2->type;
1631 cppchar_t c;
1632
1633 if (token1->flags & NAMED_OP)
1634 a = CPP_NAME;
1635 if (token2->flags & NAMED_OP)
1636 b = CPP_NAME;
1637
1638 c = EOF;
1639 if (token2->flags & DIGRAPH)
1640 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1641 else if (token_spellings[b].category == SPELL_OPERATOR)
1642 c = token_spellings[b].name[0];
1643
1644 /* Quickly get everything that can paste with an '='. */
1645 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1646 return 1;
1647
1648 switch (a)
1649 {
1650 case CPP_GREATER: return c == '>' || c == '?';
1651 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1652 case CPP_PLUS: return c == '+';
1653 case CPP_MINUS: return c == '-' || c == '>';
1654 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1655 case CPP_MOD: return c == ':' || c == '>';
1656 case CPP_AND: return c == '&';
1657 case CPP_OR: return c == '|';
1658 case CPP_COLON: return c == ':' || c == '>';
1659 case CPP_DEREF: return c == '*';
1660 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1661 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1662 case CPP_NAME: return ((b == CPP_NUMBER
1663 && name_p (pfile, &token2->val.str))
1664 || b == CPP_NAME
1665 || b == CPP_CHAR || b == CPP_STRING); /* L */
1666 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1667 || c == '.' || c == '+' || c == '-');
1668 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1669 && token1->val.c == '@'
1670 && (b == CPP_NAME || b == CPP_STRING));
1671 default: break;
1672 }
1673
1674 return 0;
1675 }
1676
1677 /* Output all the remaining tokens on the current line, and a newline
1678 character, to FP. Leading whitespace is removed. */
1679 void
1680 cpp_output_line (pfile, fp)
1681 cpp_reader *pfile;
1682 FILE *fp;
1683 {
1684 cpp_token token;
1685
1686 cpp_get_token (pfile, &token);
1687 token.flags &= ~PREV_WHITE;
1688 while (token.type != CPP_EOF)
1689 {
1690 cpp_output_token (&token, fp);
1691 cpp_get_token (pfile, &token);
1692 }
1693
1694 putc ('\n', fp);
1695 }
1696
1697 /* Memory pools. */
1698
1699 struct dummy
1700 {
1701 char c;
1702 union
1703 {
1704 double d;
1705 int *p;
1706 } u;
1707 };
1708
1709 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1710
1711 static int
1712 chunk_suitable (pool, chunk, size)
1713 cpp_pool *pool;
1714 cpp_chunk *chunk;
1715 unsigned int size;
1716 {
1717 /* Being at least twice SIZE means we can use memcpy in
1718 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1719 anyway. */
1720 return (chunk && pool->locked != chunk
1721 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1722 }
1723
1724 /* Returns the end of the new pool. PTR points to a char in the old
1725 pool, and is updated to point to the same char in the new pool. */
1726 unsigned char *
1727 _cpp_next_chunk (pool, len, ptr)
1728 cpp_pool *pool;
1729 unsigned int len;
1730 unsigned char **ptr;
1731 {
1732 cpp_chunk *chunk = pool->cur->next;
1733
1734 /* LEN is the minimum size we want in the new pool. */
1735 len += POOL_ROOM (pool);
1736 if (! chunk_suitable (pool, chunk, len))
1737 {
1738 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1739
1740 chunk->next = pool->cur->next;
1741 pool->cur->next = chunk;
1742 }
1743
1744 /* Update the pointer before changing chunk's front. */
1745 if (ptr)
1746 *ptr += chunk->base - POOL_FRONT (pool);
1747
1748 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1749 chunk->front = chunk->base;
1750
1751 pool->cur = chunk;
1752 return POOL_LIMIT (pool);
1753 }
1754
1755 static cpp_chunk *
1756 new_chunk (size)
1757 unsigned int size;
1758 {
1759 unsigned char *base;
1760 cpp_chunk *result;
1761
1762 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
1763 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1764 /* Put the chunk descriptor at the end. Then chunk overruns will
1765 cause obvious chaos. */
1766 result = (cpp_chunk *) (base + size);
1767 result->base = base;
1768 result->front = base;
1769 result->limit = base + size;
1770 result->next = 0;
1771
1772 return result;
1773 }
1774
1775 void
1776 _cpp_init_pool (pool, size, align, temp)
1777 cpp_pool *pool;
1778 unsigned int size, align, temp;
1779 {
1780 if (align == 0)
1781 align = DEFAULT_ALIGNMENT;
1782 if (align & (align - 1))
1783 abort ();
1784 pool->align = align;
1785 pool->cur = new_chunk (size);
1786 pool->locked = 0;
1787 pool->locks = 0;
1788 if (temp)
1789 pool->cur->next = pool->cur;
1790 }
1791
1792 void
1793 _cpp_lock_pool (pool)
1794 cpp_pool *pool;
1795 {
1796 if (pool->locks++ == 0)
1797 pool->locked = pool->cur;
1798 }
1799
1800 void
1801 _cpp_unlock_pool (pool)
1802 cpp_pool *pool;
1803 {
1804 if (--pool->locks == 0)
1805 pool->locked = 0;
1806 }
1807
1808 void
1809 _cpp_free_pool (pool)
1810 cpp_pool *pool;
1811 {
1812 cpp_chunk *chunk = pool->cur, *next;
1813
1814 do
1815 {
1816 next = chunk->next;
1817 free (chunk->base);
1818 chunk = next;
1819 }
1820 while (chunk && chunk != pool->cur);
1821 }
1822
1823 /* Reserve LEN bytes from a memory pool. */
1824 unsigned char *
1825 _cpp_pool_reserve (pool, len)
1826 cpp_pool *pool;
1827 unsigned int len;
1828 {
1829 len = POOL_ALIGN (len, pool->align);
1830 if (len > (unsigned int) POOL_ROOM (pool))
1831 _cpp_next_chunk (pool, len, 0);
1832
1833 return POOL_FRONT (pool);
1834 }
1835
1836 /* Allocate LEN bytes from a memory pool. */
1837 unsigned char *
1838 _cpp_pool_alloc (pool, len)
1839 cpp_pool *pool;
1840 unsigned int len;
1841 {
1842 unsigned char *result = _cpp_pool_reserve (pool, len);
1843
1844 POOL_COMMIT (pool, len);
1845 return result;
1846 }