configure.ac (GLIBCXX_CHECK_STANDARD_LAYOUT): Remove invocation.
[gcc.git] / libcpp / lex.c
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26
27 enum spell_type
28 {
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
33 };
34
35 struct token_spelling
36 {
37 enum spell_type category;
38 const unsigned char *name;
39 };
40
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
43
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void store_comment (cpp_reader *, cpp_token *);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
64
65 static _cpp_buff *new_buff (size_t);
66
67
68 /* Utility routine:
69
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
72 int
73 cpp_ideq (const cpp_token *token, const char *string)
74 {
75 if (token->type != CPP_NAME)
76 return 0;
77
78 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
79 }
80
81 /* Record a note TYPE at byte POS into the current cleaned logical
82 line. */
83 static void
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 {
86 if (buffer->notes_used == buffer->notes_cap)
87 {
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90 buffer->notes_cap);
91 }
92
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
95 buffer->notes_used++;
96 }
97
98 /* Returns with a logical line that contains no escaped newlines or
99 trigraphs. This is a time-critical inner loop. */
100 void
101 _cpp_clean_line (cpp_reader *pfile)
102 {
103 cpp_buffer *buffer;
104 const uchar *s;
105 uchar c, *d, *p;
106
107 buffer = pfile->buffer;
108 buffer->cur_note = buffer->notes_used = 0;
109 buffer->cur = buffer->line_base = buffer->next_line;
110 buffer->need_line = false;
111 s = buffer->next_line - 1;
112
113 if (!buffer->from_stage3)
114 {
115 const uchar *pbackslash = NULL;
116
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
120 for (;;)
121 {
122 c = *++s;
123 if (__builtin_expect (c == '\n', false)
124 || __builtin_expect (c == '\r', false))
125 {
126 d = (uchar *) s;
127
128 if (__builtin_expect (s == buffer->rlimit, false))
129 goto done;
130
131 /* DOS line ending? */
132 if (__builtin_expect (c == '\r', false)
133 && s[1] == '\n')
134 {
135 s++;
136 if (s == buffer->rlimit)
137 goto done;
138 }
139
140 if (__builtin_expect (pbackslash == NULL, true))
141 goto done;
142
143 /* Check for escaped newline. */
144 p = d;
145 while (is_nvspace (p[-1]))
146 p--;
147 if (p - 1 != pbackslash)
148 goto done;
149
150 /* Have an escaped newline; process it and proceed to
151 the slow path. */
152 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
153 d = p - 2;
154 buffer->next_line = p - 1;
155 break;
156 }
157 if (__builtin_expect (c == '\\', false))
158 pbackslash = s;
159 else if (__builtin_expect (c == '?', false)
160 && __builtin_expect (s[1] == '?', false)
161 && _cpp_trigraph_map[s[2]])
162 {
163 /* Have a trigraph. We may or may not have to convert
164 it. Add a line note regardless, for -Wtrigraphs. */
165 add_line_note (buffer, s, s[2]);
166 if (CPP_OPTION (pfile, trigraphs))
167 {
168 /* We do, and that means we have to switch to the
169 slow path. */
170 d = (uchar *) s;
171 *d = _cpp_trigraph_map[s[2]];
172 s += 2;
173 break;
174 }
175 }
176 }
177
178
179 for (;;)
180 {
181 c = *++s;
182 *++d = c;
183
184 if (c == '\n' || c == '\r')
185 {
186 /* Handle DOS line endings. */
187 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
188 s++;
189 if (s == buffer->rlimit)
190 break;
191
192 /* Escaped? */
193 p = d;
194 while (p != buffer->next_line && is_nvspace (p[-1]))
195 p--;
196 if (p == buffer->next_line || p[-1] != '\\')
197 break;
198
199 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
200 d = p - 2;
201 buffer->next_line = p - 1;
202 }
203 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
204 {
205 /* Add a note regardless, for the benefit of -Wtrigraphs. */
206 add_line_note (buffer, d, s[2]);
207 if (CPP_OPTION (pfile, trigraphs))
208 {
209 *d = _cpp_trigraph_map[s[2]];
210 s += 2;
211 }
212 }
213 }
214 }
215 else
216 {
217 do
218 s++;
219 while (*s != '\n' && *s != '\r');
220 d = (uchar *) s;
221
222 /* Handle DOS line endings. */
223 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
224 s++;
225 }
226
227 done:
228 *d = '\n';
229 /* A sentinel note that should never be processed. */
230 add_line_note (buffer, d + 1, '\n');
231 buffer->next_line = s + 1;
232 }
233
234 /* Return true if the trigraph indicated by NOTE should be warned
235 about in a comment. */
236 static bool
237 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
238 {
239 const uchar *p;
240
241 /* Within comments we don't warn about trigraphs, unless the
242 trigraph forms an escaped newline, as that may change
243 behavior. */
244 if (note->type != '/')
245 return false;
246
247 /* If -trigraphs, then this was an escaped newline iff the next note
248 is coincident. */
249 if (CPP_OPTION (pfile, trigraphs))
250 return note[1].pos == note->pos;
251
252 /* Otherwise, see if this forms an escaped newline. */
253 p = note->pos + 3;
254 while (is_nvspace (*p))
255 p++;
256
257 /* There might have been escaped newlines between the trigraph and the
258 newline we found. Hence the position test. */
259 return (*p == '\n' && p < note[1].pos);
260 }
261
262 /* Process the notes created by add_line_note as far as the current
263 location. */
264 void
265 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
266 {
267 cpp_buffer *buffer = pfile->buffer;
268
269 for (;;)
270 {
271 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
272 unsigned int col;
273
274 if (note->pos > buffer->cur)
275 break;
276
277 buffer->cur_note++;
278 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
279
280 if (note->type == '\\' || note->type == ' ')
281 {
282 if (note->type == ' ' && !in_comment)
283 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
284 "backslash and newline separated by space");
285
286 if (buffer->next_line > buffer->rlimit)
287 {
288 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
289 "backslash-newline at end of file");
290 /* Prevent "no newline at end of file" warning. */
291 buffer->next_line = buffer->rlimit;
292 }
293
294 buffer->line_base = note->pos;
295 CPP_INCREMENT_LINE (pfile, 0);
296 }
297 else if (_cpp_trigraph_map[note->type])
298 {
299 if (CPP_OPTION (pfile, warn_trigraphs)
300 && (!in_comment || warn_in_comment (pfile, note)))
301 {
302 if (CPP_OPTION (pfile, trigraphs))
303 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
304 "trigraph ??%c converted to %c",
305 note->type,
306 (int) _cpp_trigraph_map[note->type]);
307 else
308 {
309 cpp_error_with_line
310 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
311 "trigraph ??%c ignored, use -trigraphs to enable",
312 note->type);
313 }
314 }
315 }
316 else
317 abort ();
318 }
319 }
320
321 /* Skip a C-style block comment. We find the end of the comment by
322 seeing if an asterisk is before every '/' we encounter. Returns
323 nonzero if comment terminated by EOF, zero otherwise.
324
325 Buffer->cur points to the initial asterisk of the comment. */
326 bool
327 _cpp_skip_block_comment (cpp_reader *pfile)
328 {
329 cpp_buffer *buffer = pfile->buffer;
330 const uchar *cur = buffer->cur;
331 uchar c;
332
333 cur++;
334 if (*cur == '/')
335 cur++;
336
337 for (;;)
338 {
339 /* People like decorating comments with '*', so check for '/'
340 instead for efficiency. */
341 c = *cur++;
342
343 if (c == '/')
344 {
345 if (cur[-2] == '*')
346 break;
347
348 /* Warn about potential nested comments, but not if the '/'
349 comes immediately before the true comment delimiter.
350 Don't bother to get it right across escaped newlines. */
351 if (CPP_OPTION (pfile, warn_comments)
352 && cur[0] == '*' && cur[1] != '/')
353 {
354 buffer->cur = cur;
355 cpp_error_with_line (pfile, CPP_DL_WARNING,
356 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
357 "\"/*\" within comment");
358 }
359 }
360 else if (c == '\n')
361 {
362 unsigned int cols;
363 buffer->cur = cur - 1;
364 _cpp_process_line_notes (pfile, true);
365 if (buffer->next_line >= buffer->rlimit)
366 return true;
367 _cpp_clean_line (pfile);
368
369 cols = buffer->next_line - buffer->line_base;
370 CPP_INCREMENT_LINE (pfile, cols);
371
372 cur = buffer->cur;
373 }
374 }
375
376 buffer->cur = cur;
377 _cpp_process_line_notes (pfile, true);
378 return false;
379 }
380
381 /* Skip a C++ line comment, leaving buffer->cur pointing to the
382 terminating newline. Handles escaped newlines. Returns nonzero
383 if a multiline comment. */
384 static int
385 skip_line_comment (cpp_reader *pfile)
386 {
387 cpp_buffer *buffer = pfile->buffer;
388 source_location orig_line = pfile->line_table->highest_line;
389
390 while (*buffer->cur != '\n')
391 buffer->cur++;
392
393 _cpp_process_line_notes (pfile, true);
394 return orig_line != pfile->line_table->highest_line;
395 }
396
397 /* Skips whitespace, saving the next non-whitespace character. */
398 static void
399 skip_whitespace (cpp_reader *pfile, cppchar_t c)
400 {
401 cpp_buffer *buffer = pfile->buffer;
402 bool saw_NUL = false;
403
404 do
405 {
406 /* Horizontal space always OK. */
407 if (c == ' ' || c == '\t')
408 ;
409 /* Just \f \v or \0 left. */
410 else if (c == '\0')
411 saw_NUL = true;
412 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
413 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
414 CPP_BUF_COL (buffer),
415 "%s in preprocessing directive",
416 c == '\f' ? "form feed" : "vertical tab");
417
418 c = *buffer->cur++;
419 }
420 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
421 while (is_nvspace (c));
422
423 if (saw_NUL)
424 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
425
426 buffer->cur--;
427 }
428
429 /* See if the characters of a number token are valid in a name (no
430 '.', '+' or '-'). */
431 static int
432 name_p (cpp_reader *pfile, const cpp_string *string)
433 {
434 unsigned int i;
435
436 for (i = 0; i < string->len; i++)
437 if (!is_idchar (string->text[i]))
438 return 0;
439
440 return 1;
441 }
442
443 /* After parsing an identifier or other sequence, produce a warning about
444 sequences not in NFC/NFKC. */
445 static void
446 warn_about_normalization (cpp_reader *pfile,
447 const cpp_token *token,
448 const struct normalize_state *s)
449 {
450 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
451 && !pfile->state.skipping)
452 {
453 /* Make sure that the token is printed using UCNs, even
454 if we'd otherwise happily print UTF-8. */
455 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
456 size_t sz;
457
458 sz = cpp_spell_token (pfile, token, buf, false) - buf;
459 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
460 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
461 "`%.*s' is not in NFKC", (int) sz, buf);
462 else
463 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
464 "`%.*s' is not in NFC", (int) sz, buf);
465 }
466 }
467
468 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
469 an identifier. FIRST is TRUE if this starts an identifier. */
470 static bool
471 forms_identifier_p (cpp_reader *pfile, int first,
472 struct normalize_state *state)
473 {
474 cpp_buffer *buffer = pfile->buffer;
475
476 if (*buffer->cur == '$')
477 {
478 if (!CPP_OPTION (pfile, dollars_in_ident))
479 return false;
480
481 buffer->cur++;
482 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
483 {
484 CPP_OPTION (pfile, warn_dollars) = 0;
485 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
486 }
487
488 return true;
489 }
490
491 /* Is this a syntactically valid UCN? */
492 if (CPP_OPTION (pfile, extended_identifiers)
493 && *buffer->cur == '\\'
494 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
495 {
496 buffer->cur += 2;
497 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
498 state))
499 return true;
500 buffer->cur -= 2;
501 }
502
503 return false;
504 }
505
506 /* Lex an identifier starting at BUFFER->CUR - 1. */
507 static cpp_hashnode *
508 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
509 struct normalize_state *nst)
510 {
511 cpp_hashnode *result;
512 const uchar *cur;
513 unsigned int len;
514 unsigned int hash = HT_HASHSTEP (0, *base);
515
516 cur = pfile->buffer->cur;
517 if (! starts_ucn)
518 while (ISIDNUM (*cur))
519 {
520 hash = HT_HASHSTEP (hash, *cur);
521 cur++;
522 }
523 pfile->buffer->cur = cur;
524 if (starts_ucn || forms_identifier_p (pfile, false, nst))
525 {
526 /* Slower version for identifiers containing UCNs (or $). */
527 do {
528 while (ISIDNUM (*pfile->buffer->cur))
529 {
530 pfile->buffer->cur++;
531 NORMALIZE_STATE_UPDATE_IDNUM (nst);
532 }
533 } while (forms_identifier_p (pfile, false, nst));
534 result = _cpp_interpret_identifier (pfile, base,
535 pfile->buffer->cur - base);
536 }
537 else
538 {
539 len = cur - base;
540 hash = HT_HASHFINISH (hash, len);
541
542 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
543 base, len, hash, HT_ALLOC));
544 }
545
546 /* Rarely, identifiers require diagnostics when lexed. */
547 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
548 && !pfile->state.skipping, 0))
549 {
550 /* It is allowed to poison the same identifier twice. */
551 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
552 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
553 NODE_NAME (result));
554
555 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
556 replacement list of a variadic macro. */
557 if (result == pfile->spec_nodes.n__VA_ARGS__
558 && !pfile->state.va_args_ok)
559 cpp_error (pfile, CPP_DL_PEDWARN,
560 "__VA_ARGS__ can only appear in the expansion"
561 " of a C99 variadic macro");
562 }
563
564 return result;
565 }
566
567 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
568 static void
569 lex_number (cpp_reader *pfile, cpp_string *number,
570 struct normalize_state *nst)
571 {
572 const uchar *cur;
573 const uchar *base;
574 uchar *dest;
575
576 base = pfile->buffer->cur - 1;
577 do
578 {
579 cur = pfile->buffer->cur;
580
581 /* N.B. ISIDNUM does not include $. */
582 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
583 {
584 cur++;
585 NORMALIZE_STATE_UPDATE_IDNUM (nst);
586 }
587
588 pfile->buffer->cur = cur;
589 }
590 while (forms_identifier_p (pfile, false, nst));
591
592 number->len = cur - base;
593 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
594 memcpy (dest, base, number->len);
595 dest[number->len] = '\0';
596 number->text = dest;
597 }
598
599 /* Create a token of type TYPE with a literal spelling. */
600 static void
601 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
602 unsigned int len, enum cpp_ttype type)
603 {
604 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
605
606 memcpy (dest, base, len);
607 dest[len] = '\0';
608 token->type = type;
609 token->val.str.len = len;
610 token->val.str.text = dest;
611 }
612
613 /* Lexes a string, character constant, or angle-bracketed header file
614 name. The stored string contains the spelling, including opening
615 quote and leading any leading 'L', 'u' or 'U'. It returns the type
616 of the literal, or CPP_OTHER if it was not properly terminated.
617
618 The spelling is NUL-terminated, but it is not guaranteed that this
619 is the first NUL since embedded NULs are preserved. */
620 static void
621 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
622 {
623 bool saw_NUL = false;
624 const uchar *cur;
625 cppchar_t terminator;
626 enum cpp_ttype type;
627
628 cur = base;
629 terminator = *cur++;
630 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
631 terminator = *cur++;
632 if (terminator == '\"')
633 type = (*base == 'L' ? CPP_WSTRING :
634 *base == 'U' ? CPP_STRING32 :
635 *base == 'u' ? CPP_STRING16 : CPP_STRING);
636 else if (terminator == '\'')
637 type = (*base == 'L' ? CPP_WCHAR :
638 *base == 'U' ? CPP_CHAR32 :
639 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
640 else
641 terminator = '>', type = CPP_HEADER_NAME;
642
643 for (;;)
644 {
645 cppchar_t c = *cur++;
646
647 /* In #include-style directives, terminators are not escapable. */
648 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
649 cur++;
650 else if (c == terminator)
651 break;
652 else if (c == '\n')
653 {
654 cur--;
655 type = CPP_OTHER;
656 break;
657 }
658 else if (c == '\0')
659 saw_NUL = true;
660 }
661
662 if (saw_NUL && !pfile->state.skipping)
663 cpp_error (pfile, CPP_DL_WARNING,
664 "null character(s) preserved in literal");
665
666 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
667 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
668 (int) terminator);
669
670 pfile->buffer->cur = cur;
671 create_literal (pfile, token, base, cur - base, type);
672 }
673
674 /* Return the comment table. The client may not make any assumption
675 about the ordering of the table. */
676 cpp_comment_table *
677 cpp_get_comments (cpp_reader *pfile)
678 {
679 return &pfile->comments;
680 }
681
682 /* Append a comment to the end of the comment table. */
683 static void
684 store_comment (cpp_reader *pfile, cpp_token *token)
685 {
686 int len;
687
688 if (pfile->comments.allocated == 0)
689 {
690 pfile->comments.allocated = 256;
691 pfile->comments.entries = (cpp_comment *) xmalloc
692 (pfile->comments.allocated * sizeof (cpp_comment));
693 }
694
695 if (pfile->comments.count == pfile->comments.allocated)
696 {
697 pfile->comments.allocated *= 2;
698 pfile->comments.entries = (cpp_comment *) xrealloc
699 (pfile->comments.entries,
700 pfile->comments.allocated * sizeof (cpp_comment));
701 }
702
703 len = token->val.str.len;
704
705 /* Copy comment. Note, token may not be NULL terminated. */
706 pfile->comments.entries[pfile->comments.count].comment =
707 (char *) xmalloc (sizeof (char) * (len + 1));
708 memcpy (pfile->comments.entries[pfile->comments.count].comment,
709 token->val.str.text, len);
710 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
711
712 /* Set source location. */
713 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
714
715 /* Increment the count of entries in the comment table. */
716 pfile->comments.count++;
717 }
718
719 /* The stored comment includes the comment start and any terminator. */
720 static void
721 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
722 cppchar_t type)
723 {
724 unsigned char *buffer;
725 unsigned int len, clen;
726
727 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
728
729 /* C++ comments probably (not definitely) have moved past a new
730 line, which we don't want to save in the comment. */
731 if (is_vspace (pfile->buffer->cur[-1]))
732 len--;
733
734 /* If we are currently in a directive, then we need to store all
735 C++ comments as C comments internally, and so we need to
736 allocate a little extra space in that case.
737
738 Note that the only time we encounter a directive here is
739 when we are saving comments in a "#define". */
740 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
741
742 buffer = _cpp_unaligned_alloc (pfile, clen);
743
744 token->type = CPP_COMMENT;
745 token->val.str.len = clen;
746 token->val.str.text = buffer;
747
748 buffer[0] = '/';
749 memcpy (buffer + 1, from, len - 1);
750
751 /* Finish conversion to a C comment, if necessary. */
752 if (pfile->state.in_directive && type == '/')
753 {
754 buffer[1] = '*';
755 buffer[clen - 2] = '*';
756 buffer[clen - 1] = '/';
757 }
758
759 /* Finally store this comment for use by clients of libcpp. */
760 store_comment (pfile, token);
761 }
762
763 /* Allocate COUNT tokens for RUN. */
764 void
765 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
766 {
767 run->base = XNEWVEC (cpp_token, count);
768 run->limit = run->base + count;
769 run->next = NULL;
770 }
771
772 /* Returns the next tokenrun, or creates one if there is none. */
773 static tokenrun *
774 next_tokenrun (tokenrun *run)
775 {
776 if (run->next == NULL)
777 {
778 run->next = XNEW (tokenrun);
779 run->next->prev = run;
780 _cpp_init_tokenrun (run->next, 250);
781 }
782
783 return run->next;
784 }
785
786 /* Look ahead in the input stream. */
787 const cpp_token *
788 cpp_peek_token (cpp_reader *pfile, int index)
789 {
790 cpp_context *context = pfile->context;
791 const cpp_token *peektok;
792 int count;
793
794 /* First, scan through any pending cpp_context objects. */
795 while (context->prev)
796 {
797 ptrdiff_t sz = (context->direct_p
798 ? LAST (context).token - FIRST (context).token
799 : LAST (context).ptoken - FIRST (context).ptoken);
800
801 if (index < (int) sz)
802 return (context->direct_p
803 ? FIRST (context).token + index
804 : *(FIRST (context).ptoken + index));
805
806 index -= (int) sz;
807 context = context->prev;
808 }
809
810 /* We will have to read some new tokens after all (and do so
811 without invalidating preceding tokens). */
812 count = index;
813 pfile->keep_tokens++;
814
815 do
816 {
817 peektok = _cpp_lex_token (pfile);
818 if (peektok->type == CPP_EOF)
819 return peektok;
820 }
821 while (index--);
822
823 _cpp_backup_tokens_direct (pfile, count + 1);
824 pfile->keep_tokens--;
825
826 return peektok;
827 }
828
829 /* Allocate a single token that is invalidated at the same time as the
830 rest of the tokens on the line. Has its line and col set to the
831 same as the last lexed token, so that diagnostics appear in the
832 right place. */
833 cpp_token *
834 _cpp_temp_token (cpp_reader *pfile)
835 {
836 cpp_token *old, *result;
837 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
838 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
839
840 old = pfile->cur_token - 1;
841 /* Any pre-existing lookaheads must not be clobbered. */
842 if (la)
843 {
844 if (sz <= la)
845 {
846 tokenrun *next = next_tokenrun (pfile->cur_run);
847
848 if (sz < la)
849 memmove (next->base + 1, next->base,
850 (la - sz) * sizeof (cpp_token));
851
852 next->base[0] = pfile->cur_run->limit[-1];
853 }
854
855 if (sz > 1)
856 memmove (pfile->cur_token + 1, pfile->cur_token,
857 MIN (la, sz - 1) * sizeof (cpp_token));
858 }
859
860 if (!sz && pfile->cur_token == pfile->cur_run->limit)
861 {
862 pfile->cur_run = next_tokenrun (pfile->cur_run);
863 pfile->cur_token = pfile->cur_run->base;
864 }
865
866 result = pfile->cur_token++;
867 result->src_loc = old->src_loc;
868 return result;
869 }
870
871 /* Lex a token into RESULT (external interface). Takes care of issues
872 like directive handling, token lookahead, multiple include
873 optimization and skipping. */
874 const cpp_token *
875 _cpp_lex_token (cpp_reader *pfile)
876 {
877 cpp_token *result;
878
879 for (;;)
880 {
881 if (pfile->cur_token == pfile->cur_run->limit)
882 {
883 pfile->cur_run = next_tokenrun (pfile->cur_run);
884 pfile->cur_token = pfile->cur_run->base;
885 }
886 /* We assume that the current token is somewhere in the current
887 run. */
888 if (pfile->cur_token < pfile->cur_run->base
889 || pfile->cur_token >= pfile->cur_run->limit)
890 abort ();
891
892 if (pfile->lookaheads)
893 {
894 pfile->lookaheads--;
895 result = pfile->cur_token++;
896 }
897 else
898 result = _cpp_lex_direct (pfile);
899
900 if (result->flags & BOL)
901 {
902 /* Is this a directive. If _cpp_handle_directive returns
903 false, it is an assembler #. */
904 if (result->type == CPP_HASH
905 /* 6.10.3 p 11: Directives in a list of macro arguments
906 gives undefined behavior. This implementation
907 handles the directive as normal. */
908 && pfile->state.parsing_args != 1)
909 {
910 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
911 {
912 if (pfile->directive_result.type == CPP_PADDING)
913 continue;
914 result = &pfile->directive_result;
915 }
916 }
917 else if (pfile->state.in_deferred_pragma)
918 result = &pfile->directive_result;
919
920 if (pfile->cb.line_change && !pfile->state.skipping)
921 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
922 }
923
924 /* We don't skip tokens in directives. */
925 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
926 break;
927
928 /* Outside a directive, invalidate controlling macros. At file
929 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
930 get here and MI optimization works. */
931 pfile->mi_valid = false;
932
933 if (!pfile->state.skipping || result->type == CPP_EOF)
934 break;
935 }
936
937 return result;
938 }
939
940 /* Returns true if a fresh line has been loaded. */
941 bool
942 _cpp_get_fresh_line (cpp_reader *pfile)
943 {
944 int return_at_eof;
945
946 /* We can't get a new line until we leave the current directive. */
947 if (pfile->state.in_directive)
948 return false;
949
950 for (;;)
951 {
952 cpp_buffer *buffer = pfile->buffer;
953
954 if (!buffer->need_line)
955 return true;
956
957 if (buffer->next_line < buffer->rlimit)
958 {
959 _cpp_clean_line (pfile);
960 return true;
961 }
962
963 /* First, get out of parsing arguments state. */
964 if (pfile->state.parsing_args)
965 return false;
966
967 /* End of buffer. Non-empty files should end in a newline. */
968 if (buffer->buf != buffer->rlimit
969 && buffer->next_line > buffer->rlimit
970 && !buffer->from_stage3)
971 {
972 /* Clip to buffer size. */
973 buffer->next_line = buffer->rlimit;
974 }
975
976 return_at_eof = buffer->return_at_eof;
977 _cpp_pop_buffer (pfile);
978 if (pfile->buffer == NULL || return_at_eof)
979 return false;
980 }
981 }
982
983 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
984 do \
985 { \
986 result->type = ELSE_TYPE; \
987 if (*buffer->cur == CHAR) \
988 buffer->cur++, result->type = THEN_TYPE; \
989 } \
990 while (0)
991
992 /* Lex a token into pfile->cur_token, which is also incremented, to
993 get diagnostics pointing to the correct location.
994
995 Does not handle issues such as token lookahead, multiple-include
996 optimization, directives, skipping etc. This function is only
997 suitable for use by _cpp_lex_token, and in special cases like
998 lex_expansion_token which doesn't care for any of these issues.
999
1000 When meeting a newline, returns CPP_EOF if parsing a directive,
1001 otherwise returns to the start of the token buffer if permissible.
1002 Returns the location of the lexed token. */
1003 cpp_token *
1004 _cpp_lex_direct (cpp_reader *pfile)
1005 {
1006 cppchar_t c;
1007 cpp_buffer *buffer;
1008 const unsigned char *comment_start;
1009 cpp_token *result = pfile->cur_token++;
1010
1011 fresh_line:
1012 result->flags = 0;
1013 buffer = pfile->buffer;
1014 if (buffer->need_line)
1015 {
1016 if (pfile->state.in_deferred_pragma)
1017 {
1018 result->type = CPP_PRAGMA_EOL;
1019 pfile->state.in_deferred_pragma = false;
1020 if (!pfile->state.pragma_allow_expansion)
1021 pfile->state.prevent_expansion--;
1022 return result;
1023 }
1024 if (!_cpp_get_fresh_line (pfile))
1025 {
1026 result->type = CPP_EOF;
1027 if (!pfile->state.in_directive)
1028 {
1029 /* Tell the compiler the line number of the EOF token. */
1030 result->src_loc = pfile->line_table->highest_line;
1031 result->flags = BOL;
1032 }
1033 return result;
1034 }
1035 if (!pfile->keep_tokens)
1036 {
1037 pfile->cur_run = &pfile->base_run;
1038 result = pfile->base_run.base;
1039 pfile->cur_token = result + 1;
1040 }
1041 result->flags = BOL;
1042 if (pfile->state.parsing_args == 2)
1043 result->flags |= PREV_WHITE;
1044 }
1045 buffer = pfile->buffer;
1046 update_tokens_line:
1047 result->src_loc = pfile->line_table->highest_line;
1048
1049 skipped_white:
1050 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1051 && !pfile->overlaid_buffer)
1052 {
1053 _cpp_process_line_notes (pfile, false);
1054 result->src_loc = pfile->line_table->highest_line;
1055 }
1056 c = *buffer->cur++;
1057
1058 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1059 CPP_BUF_COLUMN (buffer, buffer->cur));
1060
1061 switch (c)
1062 {
1063 case ' ': case '\t': case '\f': case '\v': case '\0':
1064 result->flags |= PREV_WHITE;
1065 skip_whitespace (pfile, c);
1066 goto skipped_white;
1067
1068 case '\n':
1069 if (buffer->cur < buffer->rlimit)
1070 CPP_INCREMENT_LINE (pfile, 0);
1071 buffer->need_line = true;
1072 goto fresh_line;
1073
1074 case '0': case '1': case '2': case '3': case '4':
1075 case '5': case '6': case '7': case '8': case '9':
1076 {
1077 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1078 result->type = CPP_NUMBER;
1079 lex_number (pfile, &result->val.str, &nst);
1080 warn_about_normalization (pfile, result, &nst);
1081 break;
1082 }
1083
1084 case 'L':
1085 case 'u':
1086 case 'U':
1087 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1088 if (c == 'L' || CPP_OPTION (pfile, uliterals))
1089 {
1090 if (*buffer->cur == '\'' || *buffer->cur == '"')
1091 {
1092 lex_string (pfile, result, buffer->cur - 1);
1093 break;
1094 }
1095 }
1096 /* Fall through. */
1097
1098 case '_':
1099 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1100 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1101 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1102 case 's': case 't': case 'v': case 'w': case 'x':
1103 case 'y': case 'z':
1104 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1105 case 'G': case 'H': case 'I': case 'J': case 'K':
1106 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1107 case 'S': case 'T': case 'V': case 'W': case 'X':
1108 case 'Y': case 'Z':
1109 result->type = CPP_NAME;
1110 {
1111 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1112 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1113 &nst);
1114 warn_about_normalization (pfile, result, &nst);
1115 }
1116
1117 /* Convert named operators to their proper types. */
1118 if (result->val.node->flags & NODE_OPERATOR)
1119 {
1120 result->flags |= NAMED_OP;
1121 result->type = (enum cpp_ttype) result->val.node->directive_index;
1122 }
1123 break;
1124
1125 case '\'':
1126 case '"':
1127 lex_string (pfile, result, buffer->cur - 1);
1128 break;
1129
1130 case '/':
1131 /* A potential block or line comment. */
1132 comment_start = buffer->cur;
1133 c = *buffer->cur;
1134
1135 if (c == '*')
1136 {
1137 if (_cpp_skip_block_comment (pfile))
1138 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1139 }
1140 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1141 || cpp_in_system_header (pfile)))
1142 {
1143 /* Warn about comments only if pedantically GNUC89, and not
1144 in system headers. */
1145 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1146 && ! buffer->warned_cplusplus_comments)
1147 {
1148 cpp_error (pfile, CPP_DL_PEDWARN,
1149 "C++ style comments are not allowed in ISO C90");
1150 cpp_error (pfile, CPP_DL_PEDWARN,
1151 "(this will be reported only once per input file)");
1152 buffer->warned_cplusplus_comments = 1;
1153 }
1154
1155 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1156 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1157 }
1158 else if (c == '=')
1159 {
1160 buffer->cur++;
1161 result->type = CPP_DIV_EQ;
1162 break;
1163 }
1164 else
1165 {
1166 result->type = CPP_DIV;
1167 break;
1168 }
1169
1170 if (!pfile->state.save_comments)
1171 {
1172 result->flags |= PREV_WHITE;
1173 goto update_tokens_line;
1174 }
1175
1176 /* Save the comment as a token in its own right. */
1177 save_comment (pfile, result, comment_start, c);
1178 break;
1179
1180 case '<':
1181 if (pfile->state.angled_headers)
1182 {
1183 lex_string (pfile, result, buffer->cur - 1);
1184 break;
1185 }
1186
1187 result->type = CPP_LESS;
1188 if (*buffer->cur == '=')
1189 buffer->cur++, result->type = CPP_LESS_EQ;
1190 else if (*buffer->cur == '<')
1191 {
1192 buffer->cur++;
1193 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1194 }
1195 else if (CPP_OPTION (pfile, digraphs))
1196 {
1197 if (*buffer->cur == ':')
1198 {
1199 buffer->cur++;
1200 result->flags |= DIGRAPH;
1201 result->type = CPP_OPEN_SQUARE;
1202 }
1203 else if (*buffer->cur == '%')
1204 {
1205 buffer->cur++;
1206 result->flags |= DIGRAPH;
1207 result->type = CPP_OPEN_BRACE;
1208 }
1209 }
1210 break;
1211
1212 case '>':
1213 result->type = CPP_GREATER;
1214 if (*buffer->cur == '=')
1215 buffer->cur++, result->type = CPP_GREATER_EQ;
1216 else if (*buffer->cur == '>')
1217 {
1218 buffer->cur++;
1219 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1220 }
1221 break;
1222
1223 case '%':
1224 result->type = CPP_MOD;
1225 if (*buffer->cur == '=')
1226 buffer->cur++, result->type = CPP_MOD_EQ;
1227 else if (CPP_OPTION (pfile, digraphs))
1228 {
1229 if (*buffer->cur == ':')
1230 {
1231 buffer->cur++;
1232 result->flags |= DIGRAPH;
1233 result->type = CPP_HASH;
1234 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1235 buffer->cur += 2, result->type = CPP_PASTE;
1236 }
1237 else if (*buffer->cur == '>')
1238 {
1239 buffer->cur++;
1240 result->flags |= DIGRAPH;
1241 result->type = CPP_CLOSE_BRACE;
1242 }
1243 }
1244 break;
1245
1246 case '.':
1247 result->type = CPP_DOT;
1248 if (ISDIGIT (*buffer->cur))
1249 {
1250 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1251 result->type = CPP_NUMBER;
1252 lex_number (pfile, &result->val.str, &nst);
1253 warn_about_normalization (pfile, result, &nst);
1254 }
1255 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1256 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1257 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1258 buffer->cur++, result->type = CPP_DOT_STAR;
1259 break;
1260
1261 case '+':
1262 result->type = CPP_PLUS;
1263 if (*buffer->cur == '+')
1264 buffer->cur++, result->type = CPP_PLUS_PLUS;
1265 else if (*buffer->cur == '=')
1266 buffer->cur++, result->type = CPP_PLUS_EQ;
1267 break;
1268
1269 case '-':
1270 result->type = CPP_MINUS;
1271 if (*buffer->cur == '>')
1272 {
1273 buffer->cur++;
1274 result->type = CPP_DEREF;
1275 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1276 buffer->cur++, result->type = CPP_DEREF_STAR;
1277 }
1278 else if (*buffer->cur == '-')
1279 buffer->cur++, result->type = CPP_MINUS_MINUS;
1280 else if (*buffer->cur == '=')
1281 buffer->cur++, result->type = CPP_MINUS_EQ;
1282 break;
1283
1284 case '&':
1285 result->type = CPP_AND;
1286 if (*buffer->cur == '&')
1287 buffer->cur++, result->type = CPP_AND_AND;
1288 else if (*buffer->cur == '=')
1289 buffer->cur++, result->type = CPP_AND_EQ;
1290 break;
1291
1292 case '|':
1293 result->type = CPP_OR;
1294 if (*buffer->cur == '|')
1295 buffer->cur++, result->type = CPP_OR_OR;
1296 else if (*buffer->cur == '=')
1297 buffer->cur++, result->type = CPP_OR_EQ;
1298 break;
1299
1300 case ':':
1301 result->type = CPP_COLON;
1302 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1303 buffer->cur++, result->type = CPP_SCOPE;
1304 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1305 {
1306 buffer->cur++;
1307 result->flags |= DIGRAPH;
1308 result->type = CPP_CLOSE_SQUARE;
1309 }
1310 break;
1311
1312 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1313 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1314 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1315 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1316 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1317
1318 case '?': result->type = CPP_QUERY; break;
1319 case '~': result->type = CPP_COMPL; break;
1320 case ',': result->type = CPP_COMMA; break;
1321 case '(': result->type = CPP_OPEN_PAREN; break;
1322 case ')': result->type = CPP_CLOSE_PAREN; break;
1323 case '[': result->type = CPP_OPEN_SQUARE; break;
1324 case ']': result->type = CPP_CLOSE_SQUARE; break;
1325 case '{': result->type = CPP_OPEN_BRACE; break;
1326 case '}': result->type = CPP_CLOSE_BRACE; break;
1327 case ';': result->type = CPP_SEMICOLON; break;
1328
1329 /* @ is a punctuator in Objective-C. */
1330 case '@': result->type = CPP_ATSIGN; break;
1331
1332 case '$':
1333 case '\\':
1334 {
1335 const uchar *base = --buffer->cur;
1336 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1337
1338 if (forms_identifier_p (pfile, true, &nst))
1339 {
1340 result->type = CPP_NAME;
1341 result->val.node = lex_identifier (pfile, base, true, &nst);
1342 warn_about_normalization (pfile, result, &nst);
1343 break;
1344 }
1345 buffer->cur++;
1346 }
1347
1348 default:
1349 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1350 break;
1351 }
1352
1353 return result;
1354 }
1355
1356 /* An upper bound on the number of bytes needed to spell TOKEN.
1357 Does not include preceding whitespace. */
1358 unsigned int
1359 cpp_token_len (const cpp_token *token)
1360 {
1361 unsigned int len;
1362
1363 switch (TOKEN_SPELL (token))
1364 {
1365 default: len = 6; break;
1366 case SPELL_LITERAL: len = token->val.str.len; break;
1367 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1368 }
1369
1370 return len;
1371 }
1372
1373 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1374 Return the number of bytes read out of NAME. (There are always
1375 10 bytes written to BUFFER.) */
1376
1377 static size_t
1378 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1379 {
1380 int j;
1381 int ucn_len = 0;
1382 int ucn_len_c;
1383 unsigned t;
1384 unsigned long utf32;
1385
1386 /* Compute the length of the UTF-8 sequence. */
1387 for (t = *name; t & 0x80; t <<= 1)
1388 ucn_len++;
1389
1390 utf32 = *name & (0x7F >> ucn_len);
1391 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1392 {
1393 utf32 = (utf32 << 6) | (*++name & 0x3F);
1394
1395 /* Ill-formed UTF-8. */
1396 if ((*name & ~0x3F) != 0x80)
1397 abort ();
1398 }
1399
1400 *buffer++ = '\\';
1401 *buffer++ = 'U';
1402 for (j = 7; j >= 0; j--)
1403 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1404 return ucn_len;
1405 }
1406
1407
1408 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1409 already contain the enough space to hold the token's spelling.
1410 Returns a pointer to the character after the last character written.
1411 FORSTRING is true if this is to be the spelling after translation
1412 phase 1 (this is different for UCNs).
1413 FIXME: Would be nice if we didn't need the PFILE argument. */
1414 unsigned char *
1415 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1416 unsigned char *buffer, bool forstring)
1417 {
1418 switch (TOKEN_SPELL (token))
1419 {
1420 case SPELL_OPERATOR:
1421 {
1422 const unsigned char *spelling;
1423 unsigned char c;
1424
1425 if (token->flags & DIGRAPH)
1426 spelling
1427 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1428 else if (token->flags & NAMED_OP)
1429 goto spell_ident;
1430 else
1431 spelling = TOKEN_NAME (token);
1432
1433 while ((c = *spelling++) != '\0')
1434 *buffer++ = c;
1435 }
1436 break;
1437
1438 spell_ident:
1439 case SPELL_IDENT:
1440 if (forstring)
1441 {
1442 memcpy (buffer, NODE_NAME (token->val.node),
1443 NODE_LEN (token->val.node));
1444 buffer += NODE_LEN (token->val.node);
1445 }
1446 else
1447 {
1448 size_t i;
1449 const unsigned char * name = NODE_NAME (token->val.node);
1450
1451 for (i = 0; i < NODE_LEN (token->val.node); i++)
1452 if (name[i] & ~0x7F)
1453 {
1454 i += utf8_to_ucn (buffer, name + i) - 1;
1455 buffer += 10;
1456 }
1457 else
1458 *buffer++ = NODE_NAME (token->val.node)[i];
1459 }
1460 break;
1461
1462 case SPELL_LITERAL:
1463 memcpy (buffer, token->val.str.text, token->val.str.len);
1464 buffer += token->val.str.len;
1465 break;
1466
1467 case SPELL_NONE:
1468 cpp_error (pfile, CPP_DL_ICE,
1469 "unspellable token %s", TOKEN_NAME (token));
1470 break;
1471 }
1472
1473 return buffer;
1474 }
1475
1476 /* Returns TOKEN spelt as a null-terminated string. The string is
1477 freed when the reader is destroyed. Useful for diagnostics. */
1478 unsigned char *
1479 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1480 {
1481 unsigned int len = cpp_token_len (token) + 1;
1482 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1483
1484 end = cpp_spell_token (pfile, token, start, false);
1485 end[0] = '\0';
1486
1487 return start;
1488 }
1489
1490 /* Used by C front ends, which really should move to using
1491 cpp_token_as_text. */
1492 const char *
1493 cpp_type2name (enum cpp_ttype type)
1494 {
1495 return (const char *) token_spellings[type].name;
1496 }
1497
1498 /* Writes the spelling of token to FP, without any preceding space.
1499 Separated from cpp_spell_token for efficiency - to avoid stdio
1500 double-buffering. */
1501 void
1502 cpp_output_token (const cpp_token *token, FILE *fp)
1503 {
1504 switch (TOKEN_SPELL (token))
1505 {
1506 case SPELL_OPERATOR:
1507 {
1508 const unsigned char *spelling;
1509 int c;
1510
1511 if (token->flags & DIGRAPH)
1512 spelling
1513 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1514 else if (token->flags & NAMED_OP)
1515 goto spell_ident;
1516 else
1517 spelling = TOKEN_NAME (token);
1518
1519 c = *spelling;
1520 do
1521 putc (c, fp);
1522 while ((c = *++spelling) != '\0');
1523 }
1524 break;
1525
1526 spell_ident:
1527 case SPELL_IDENT:
1528 {
1529 size_t i;
1530 const unsigned char * name = NODE_NAME (token->val.node);
1531
1532 for (i = 0; i < NODE_LEN (token->val.node); i++)
1533 if (name[i] & ~0x7F)
1534 {
1535 unsigned char buffer[10];
1536 i += utf8_to_ucn (buffer, name + i) - 1;
1537 fwrite (buffer, 1, 10, fp);
1538 }
1539 else
1540 fputc (NODE_NAME (token->val.node)[i], fp);
1541 }
1542 break;
1543
1544 case SPELL_LITERAL:
1545 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1546 break;
1547
1548 case SPELL_NONE:
1549 /* An error, most probably. */
1550 break;
1551 }
1552 }
1553
1554 /* Compare two tokens. */
1555 int
1556 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1557 {
1558 if (a->type == b->type && a->flags == b->flags)
1559 switch (TOKEN_SPELL (a))
1560 {
1561 default: /* Keep compiler happy. */
1562 case SPELL_OPERATOR:
1563 return 1;
1564 case SPELL_NONE:
1565 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1566 case SPELL_IDENT:
1567 return a->val.node == b->val.node;
1568 case SPELL_LITERAL:
1569 return (a->val.str.len == b->val.str.len
1570 && !memcmp (a->val.str.text, b->val.str.text,
1571 a->val.str.len));
1572 }
1573
1574 return 0;
1575 }
1576
1577 /* Returns nonzero if a space should be inserted to avoid an
1578 accidental token paste for output. For simplicity, it is
1579 conservative, and occasionally advises a space where one is not
1580 needed, e.g. "." and ".2". */
1581 int
1582 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1583 const cpp_token *token2)
1584 {
1585 enum cpp_ttype a = token1->type, b = token2->type;
1586 cppchar_t c;
1587
1588 if (token1->flags & NAMED_OP)
1589 a = CPP_NAME;
1590 if (token2->flags & NAMED_OP)
1591 b = CPP_NAME;
1592
1593 c = EOF;
1594 if (token2->flags & DIGRAPH)
1595 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1596 else if (token_spellings[b].category == SPELL_OPERATOR)
1597 c = token_spellings[b].name[0];
1598
1599 /* Quickly get everything that can paste with an '='. */
1600 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1601 return 1;
1602
1603 switch (a)
1604 {
1605 case CPP_GREATER: return c == '>';
1606 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1607 case CPP_PLUS: return c == '+';
1608 case CPP_MINUS: return c == '-' || c == '>';
1609 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1610 case CPP_MOD: return c == ':' || c == '>';
1611 case CPP_AND: return c == '&';
1612 case CPP_OR: return c == '|';
1613 case CPP_COLON: return c == ':' || c == '>';
1614 case CPP_DEREF: return c == '*';
1615 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1616 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1617 case CPP_NAME: return ((b == CPP_NUMBER
1618 && name_p (pfile, &token2->val.str))
1619 || b == CPP_NAME
1620 || b == CPP_CHAR || b == CPP_STRING); /* L */
1621 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1622 || c == '.' || c == '+' || c == '-');
1623 /* UCNs */
1624 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1625 && b == CPP_NAME)
1626 || (CPP_OPTION (pfile, objc)
1627 && token1->val.str.text[0] == '@'
1628 && (b == CPP_NAME || b == CPP_STRING)));
1629 default: break;
1630 }
1631
1632 return 0;
1633 }
1634
1635 /* Output all the remaining tokens on the current line, and a newline
1636 character, to FP. Leading whitespace is removed. If there are
1637 macros, special token padding is not performed. */
1638 void
1639 cpp_output_line (cpp_reader *pfile, FILE *fp)
1640 {
1641 const cpp_token *token;
1642
1643 token = cpp_get_token (pfile);
1644 while (token->type != CPP_EOF)
1645 {
1646 cpp_output_token (token, fp);
1647 token = cpp_get_token (pfile);
1648 if (token->flags & PREV_WHITE)
1649 putc (' ', fp);
1650 }
1651
1652 putc ('\n', fp);
1653 }
1654
1655 /* Return a string representation of all the remaining tokens on the
1656 current line. The result is allocated using xmalloc and must be
1657 freed by the caller. */
1658 unsigned char *
1659 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1660 {
1661 const cpp_token *token;
1662 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1663 unsigned int alloced = 120 + out;
1664 unsigned char *result = (unsigned char *) xmalloc (alloced);
1665
1666 /* If DIR_NAME is empty, there are no initial contents. */
1667 if (dir_name)
1668 {
1669 sprintf ((char *) result, "#%s ", dir_name);
1670 out += 2;
1671 }
1672
1673 token = cpp_get_token (pfile);
1674 while (token->type != CPP_EOF)
1675 {
1676 unsigned char *last;
1677 /* Include room for a possible space and the terminating nul. */
1678 unsigned int len = cpp_token_len (token) + 2;
1679
1680 if (out + len > alloced)
1681 {
1682 alloced *= 2;
1683 if (out + len > alloced)
1684 alloced = out + len;
1685 result = (unsigned char *) xrealloc (result, alloced);
1686 }
1687
1688 last = cpp_spell_token (pfile, token, &result[out], 0);
1689 out = last - result;
1690
1691 token = cpp_get_token (pfile);
1692 if (token->flags & PREV_WHITE)
1693 result[out++] = ' ';
1694 }
1695
1696 result[out] = '\0';
1697 return result;
1698 }
1699
1700 /* Memory buffers. Changing these three constants can have a dramatic
1701 effect on performance. The values here are reasonable defaults,
1702 but might be tuned. If you adjust them, be sure to test across a
1703 range of uses of cpplib, including heavy nested function-like macro
1704 expansion. Also check the change in peak memory usage (NJAMD is a
1705 good tool for this). */
1706 #define MIN_BUFF_SIZE 8000
1707 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1708 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1709 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1710
1711 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1712 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1713 #endif
1714
1715 /* Create a new allocation buffer. Place the control block at the end
1716 of the buffer, so that buffer overflows will cause immediate chaos. */
1717 static _cpp_buff *
1718 new_buff (size_t len)
1719 {
1720 _cpp_buff *result;
1721 unsigned char *base;
1722
1723 if (len < MIN_BUFF_SIZE)
1724 len = MIN_BUFF_SIZE;
1725 len = CPP_ALIGN (len);
1726
1727 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1728 result = (_cpp_buff *) (base + len);
1729 result->base = base;
1730 result->cur = base;
1731 result->limit = base + len;
1732 result->next = NULL;
1733 return result;
1734 }
1735
1736 /* Place a chain of unwanted allocation buffers on the free list. */
1737 void
1738 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1739 {
1740 _cpp_buff *end = buff;
1741
1742 while (end->next)
1743 end = end->next;
1744 end->next = pfile->free_buffs;
1745 pfile->free_buffs = buff;
1746 }
1747
1748 /* Return a free buffer of size at least MIN_SIZE. */
1749 _cpp_buff *
1750 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1751 {
1752 _cpp_buff *result, **p;
1753
1754 for (p = &pfile->free_buffs;; p = &(*p)->next)
1755 {
1756 size_t size;
1757
1758 if (*p == NULL)
1759 return new_buff (min_size);
1760 result = *p;
1761 size = result->limit - result->base;
1762 /* Return a buffer that's big enough, but don't waste one that's
1763 way too big. */
1764 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1765 break;
1766 }
1767
1768 *p = result->next;
1769 result->next = NULL;
1770 result->cur = result->base;
1771 return result;
1772 }
1773
1774 /* Creates a new buffer with enough space to hold the uncommitted
1775 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1776 the excess bytes to the new buffer. Chains the new buffer after
1777 BUFF, and returns the new buffer. */
1778 _cpp_buff *
1779 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1780 {
1781 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1782 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1783
1784 buff->next = new_buff;
1785 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1786 return new_buff;
1787 }
1788
1789 /* Creates a new buffer with enough space to hold the uncommitted
1790 remaining bytes of the buffer pointed to by BUFF, and at least
1791 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1792 Chains the new buffer before the buffer pointed to by BUFF, and
1793 updates the pointer to point to the new buffer. */
1794 void
1795 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1796 {
1797 _cpp_buff *new_buff, *old_buff = *pbuff;
1798 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1799
1800 new_buff = _cpp_get_buff (pfile, size);
1801 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1802 new_buff->next = old_buff;
1803 *pbuff = new_buff;
1804 }
1805
1806 /* Free a chain of buffers starting at BUFF. */
1807 void
1808 _cpp_free_buff (_cpp_buff *buff)
1809 {
1810 _cpp_buff *next;
1811
1812 for (; buff; buff = next)
1813 {
1814 next = buff->next;
1815 free (buff->base);
1816 }
1817 }
1818
1819 /* Allocate permanent, unaligned storage of length LEN. */
1820 unsigned char *
1821 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1822 {
1823 _cpp_buff *buff = pfile->u_buff;
1824 unsigned char *result = buff->cur;
1825
1826 if (len > (size_t) (buff->limit - result))
1827 {
1828 buff = _cpp_get_buff (pfile, len);
1829 buff->next = pfile->u_buff;
1830 pfile->u_buff = buff;
1831 result = buff->cur;
1832 }
1833
1834 buff->cur = result + len;
1835 return result;
1836 }
1837
1838 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1839 That buffer is used for growing allocations when saving macro
1840 replacement lists in a #define, and when parsing an answer to an
1841 assertion in #assert, #unassert or #if (and therefore possibly
1842 whilst expanding macros). It therefore must not be used by any
1843 code that they might call: specifically the lexer and the guts of
1844 the macro expander.
1845
1846 All existing other uses clearly fit this restriction: storing
1847 registered pragmas during initialization. */
1848 unsigned char *
1849 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1850 {
1851 _cpp_buff *buff = pfile->a_buff;
1852 unsigned char *result = buff->cur;
1853
1854 if (len > (size_t) (buff->limit - result))
1855 {
1856 buff = _cpp_get_buff (pfile, len);
1857 buff->next = pfile->a_buff;
1858 pfile->a_buff = buff;
1859 result = buff->cur;
1860 }
1861
1862 buff->cur = result + len;
1863 return result;
1864 }
1865
1866 /* Say which field of TOK is in use. */
1867
1868 enum cpp_token_fld_kind
1869 cpp_token_val_index (cpp_token *tok)
1870 {
1871 switch (TOKEN_SPELL (tok))
1872 {
1873 case SPELL_IDENT:
1874 return CPP_TOKEN_FLD_NODE;
1875 case SPELL_LITERAL:
1876 return CPP_TOKEN_FLD_STR;
1877 case SPELL_NONE:
1878 if (tok->type == CPP_MACRO_ARG)
1879 return CPP_TOKEN_FLD_ARG_NO;
1880 else if (tok->type == CPP_PADDING)
1881 return CPP_TOKEN_FLD_SOURCE;
1882 else if (tok->type == CPP_PRAGMA)
1883 return CPP_TOKEN_FLD_PRAGMA;
1884 /* else fall through */
1885 default:
1886 return CPP_TOKEN_FLD_NONE;
1887 }
1888 }