lra: fix spill_hard_reg_in_range clobber check
[gcc.git] / gcc / input.c
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2018 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic-core.h"
25 #include "selftest.h"
26 #include "cpplib.h"
27
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
31
32 /* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34 struct fcache
35 {
36 /* These are information used to store a line boundary. */
37 struct line_info
38 {
39 /* The line number. It starts from 1. */
40 size_t line_num;
41
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
45
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
51
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
54 {}
55
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
58 {}
59 };
60
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
65
66 /* The file_path is the key for identifying a particular file in
67 the cache.
68 For libcpp-using code, the underlying buffer for this field is
69 owned by the corresponding _cpp_file within the cpp_reader. */
70 const char *file_path;
71
72 FILE *fp;
73
74 /* This points to the content of the file that we've read so
75 far. */
76 char *data;
77
78 /* The size of the DATA array above.*/
79 size_t size;
80
81 /* The number of bytes read from the underlying file so far. This
82 must be less (or equal) than SIZE above. */
83 size_t nb_read;
84
85 /* The index of the beginning of the current line. */
86 size_t line_start_idx;
87
88 /* The number of the previous line read. This starts at 1. Zero
89 means we've read no line so far. */
90 size_t line_num;
91
92 /* This is the total number of lines of the current file. At the
93 moment, we try to get this information from the line map
94 subsystem. Note that this is just a hint. When using the C++
95 front-end, this hint is correct because the input file is then
96 completely tokenized before parsing starts; so the line map knows
97 the number of lines before compilation really starts. For e.g,
98 the C front-end, it can happen that we start emitting diagnostics
99 before the line map has seen the end of the file. */
100 size_t total_lines;
101
102 /* Could this file be missing a trailing newline on its final line?
103 Initially true (to cope with empty files), set to true/false
104 as each line is read. */
105 bool missing_trailing_newline;
106
107 /* This is a record of the beginning and end of the lines we've seen
108 while reading the file. This is useful to avoid walking the data
109 from the beginning when we are asked to read a line that is
110 before LINE_START_IDX above. Note that the maximum size of this
111 record is fcache_line_record_size, so that the memory consumption
112 doesn't explode. We thus scale total_lines down to
113 fcache_line_record_size. */
114 vec<line_info, va_heap> line_record;
115
116 fcache ();
117 ~fcache ();
118 };
119
120 /* Current position in real source file. */
121
122 location_t input_location = UNKNOWN_LOCATION;
123
124 struct line_maps *line_table;
125
126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
127 This needs to be a global so that it can be a GC root, and thus
128 prevent the stashed copy from being garbage-collected if the GC runs
129 during a line_table_test. */
130
131 struct line_maps *saved_line_table;
132
133 static fcache *fcache_tab;
134 static const size_t fcache_tab_size = 16;
135 static const size_t fcache_buffer_size = 4 * 1024;
136 static const size_t fcache_line_record_size = 100;
137
138 /* Expand the source location LOC into a human readable location. If
139 LOC resolves to a builtin location, the file name of the readable
140 location is set to the string "<built-in>". If EXPANSION_POINT_P is
141 TRUE and LOC is virtual, then it is resolved to the expansion
142 point of the involved macro. Otherwise, it is resolved to the
143 spelling location of the token.
144
145 When resolving to the spelling location of the token, if the
146 resulting location is for a built-in location (that is, it has no
147 associated line/column) in the context of a macro expansion, the
148 returned location is the first one (while unwinding the macro
149 location towards its expansion point) that is in real source
150 code.
151
152 ASPECT controls which part of the location to use. */
153
154 static expanded_location
155 expand_location_1 (source_location loc,
156 bool expansion_point_p,
157 enum location_aspect aspect)
158 {
159 expanded_location xloc;
160 const line_map_ordinary *map;
161 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
162 tree block = NULL;
163
164 if (IS_ADHOC_LOC (loc))
165 {
166 block = LOCATION_BLOCK (loc);
167 loc = LOCATION_LOCUS (loc);
168 }
169
170 memset (&xloc, 0, sizeof (xloc));
171
172 if (loc >= RESERVED_LOCATION_COUNT)
173 {
174 if (!expansion_point_p)
175 {
176 /* We want to resolve LOC to its spelling location.
177
178 But if that spelling location is a reserved location that
179 appears in the context of a macro expansion (like for a
180 location for a built-in token), let's consider the first
181 location (toward the expansion point) that is not reserved;
182 that is, the first location that is in real source code. */
183 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
184 loc, NULL);
185 lrk = LRK_SPELLING_LOCATION;
186 }
187 loc = linemap_resolve_location (line_table, loc, lrk, &map);
188
189 /* loc is now either in an ordinary map, or is a reserved location.
190 If it is a compound location, the caret is in a spelling location,
191 but the start/finish might still be a virtual location.
192 Depending of what the caller asked for, we may need to recurse
193 one level in order to resolve any virtual locations in the
194 end-points. */
195 switch (aspect)
196 {
197 default:
198 gcc_unreachable ();
199 /* Fall through. */
200 case LOCATION_ASPECT_CARET:
201 break;
202 case LOCATION_ASPECT_START:
203 {
204 source_location start = get_start (loc);
205 if (start != loc)
206 return expand_location_1 (start, expansion_point_p, aspect);
207 }
208 break;
209 case LOCATION_ASPECT_FINISH:
210 {
211 source_location finish = get_finish (loc);
212 if (finish != loc)
213 return expand_location_1 (finish, expansion_point_p, aspect);
214 }
215 break;
216 }
217 xloc = linemap_expand_location (line_table, map, loc);
218 }
219
220 xloc.data = block;
221 if (loc <= BUILTINS_LOCATION)
222 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
223
224 return xloc;
225 }
226
227 /* Initialize the set of cache used for files accessed by caret
228 diagnostic. */
229
230 static void
231 diagnostic_file_cache_init (void)
232 {
233 if (fcache_tab == NULL)
234 fcache_tab = new fcache[fcache_tab_size];
235 }
236
237 /* Free the resources used by the set of cache used for files accessed
238 by caret diagnostic. */
239
240 void
241 diagnostic_file_cache_fini (void)
242 {
243 if (fcache_tab)
244 {
245 delete [] (fcache_tab);
246 fcache_tab = NULL;
247 }
248 }
249
250 /* Return the total lines number that have been read so far by the
251 line map (in the preprocessor) so far. For languages like C++ that
252 entirely preprocess the input file before starting to parse, this
253 equals the actual number of lines of the file. */
254
255 static size_t
256 total_lines_num (const char *file_path)
257 {
258 size_t r = 0;
259 source_location l = 0;
260 if (linemap_get_file_highest_location (line_table, file_path, &l))
261 {
262 gcc_assert (l >= RESERVED_LOCATION_COUNT);
263 expanded_location xloc = expand_location (l);
264 r = xloc.line;
265 }
266 return r;
267 }
268
269 /* Lookup the cache used for the content of a given file accessed by
270 caret diagnostic. Return the found cached file, or NULL if no
271 cached file was found. */
272
273 static fcache*
274 lookup_file_in_cache_tab (const char *file_path)
275 {
276 if (file_path == NULL)
277 return NULL;
278
279 diagnostic_file_cache_init ();
280
281 /* This will contain the found cached file. */
282 fcache *r = NULL;
283 for (unsigned i = 0; i < fcache_tab_size; ++i)
284 {
285 fcache *c = &fcache_tab[i];
286 if (c->file_path && !strcmp (c->file_path, file_path))
287 {
288 ++c->use_count;
289 r = c;
290 }
291 }
292
293 if (r)
294 ++r->use_count;
295
296 return r;
297 }
298
299 /* Purge any mention of FILENAME from the cache of files used for
300 printing source code. For use in selftests when working
301 with tempfiles. */
302
303 void
304 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
305 {
306 gcc_assert (file_path);
307
308 fcache *r = lookup_file_in_cache_tab (file_path);
309 if (!r)
310 /* Not found. */
311 return;
312
313 r->file_path = NULL;
314 if (r->fp)
315 fclose (r->fp);
316 r->fp = NULL;
317 r->nb_read = 0;
318 r->line_start_idx = 0;
319 r->line_num = 0;
320 r->line_record.truncate (0);
321 r->use_count = 0;
322 r->total_lines = 0;
323 r->missing_trailing_newline = true;
324 }
325
326 /* Return the file cache that has been less used, recently, or the
327 first empty one. If HIGHEST_USE_COUNT is non-null,
328 *HIGHEST_USE_COUNT is set to the highest use count of the entries
329 in the cache table. */
330
331 static fcache*
332 evicted_cache_tab_entry (unsigned *highest_use_count)
333 {
334 diagnostic_file_cache_init ();
335
336 fcache *to_evict = &fcache_tab[0];
337 unsigned huc = to_evict->use_count;
338 for (unsigned i = 1; i < fcache_tab_size; ++i)
339 {
340 fcache *c = &fcache_tab[i];
341 bool c_is_empty = (c->file_path == NULL);
342
343 if (c->use_count < to_evict->use_count
344 || (to_evict->file_path && c_is_empty))
345 /* We evict C because it's either an entry with a lower use
346 count or one that is empty. */
347 to_evict = c;
348
349 if (huc < c->use_count)
350 huc = c->use_count;
351
352 if (c_is_empty)
353 /* We've reached the end of the cache; subsequent elements are
354 all empty. */
355 break;
356 }
357
358 if (highest_use_count)
359 *highest_use_count = huc;
360
361 return to_evict;
362 }
363
364 /* Create the cache used for the content of a given file to be
365 accessed by caret diagnostic. This cache is added to an array of
366 cache and can be retrieved by lookup_file_in_cache_tab. This
367 function returns the created cache. Note that only the last
368 fcache_tab_size files are cached. */
369
370 static fcache*
371 add_file_to_cache_tab (const char *file_path)
372 {
373
374 FILE *fp = fopen (file_path, "r");
375 if (fp == NULL)
376 return NULL;
377
378 unsigned highest_use_count = 0;
379 fcache *r = evicted_cache_tab_entry (&highest_use_count);
380 r->file_path = file_path;
381 if (r->fp)
382 fclose (r->fp);
383 r->fp = fp;
384 r->nb_read = 0;
385 r->line_start_idx = 0;
386 r->line_num = 0;
387 r->line_record.truncate (0);
388 /* Ensure that this cache entry doesn't get evicted next time
389 add_file_to_cache_tab is called. */
390 r->use_count = ++highest_use_count;
391 r->total_lines = total_lines_num (file_path);
392 r->missing_trailing_newline = true;
393
394 return r;
395 }
396
397 /* Lookup the cache used for the content of a given file accessed by
398 caret diagnostic. If no cached file was found, create a new cache
399 for this file, add it to the array of cached file and return
400 it. */
401
402 static fcache*
403 lookup_or_add_file_to_cache_tab (const char *file_path)
404 {
405 fcache *r = lookup_file_in_cache_tab (file_path);
406 if (r == NULL)
407 r = add_file_to_cache_tab (file_path);
408 return r;
409 }
410
411 /* Default constructor for a cache of file used by caret
412 diagnostic. */
413
414 fcache::fcache ()
415 : use_count (0), file_path (NULL), fp (NULL), data (0),
416 size (0), nb_read (0), line_start_idx (0), line_num (0),
417 total_lines (0), missing_trailing_newline (true)
418 {
419 line_record.create (0);
420 }
421
422 /* Destructor for a cache of file used by caret diagnostic. */
423
424 fcache::~fcache ()
425 {
426 if (fp)
427 {
428 fclose (fp);
429 fp = NULL;
430 }
431 if (data)
432 {
433 XDELETEVEC (data);
434 data = 0;
435 }
436 line_record.release ();
437 }
438
439 /* Returns TRUE iff the cache would need to be filled with data coming
440 from the file. That is, either the cache is empty or full or the
441 current line is empty. Note that if the cache is full, it would
442 need to be extended and filled again. */
443
444 static bool
445 needs_read (fcache *c)
446 {
447 return (c->nb_read == 0
448 || c->nb_read == c->size
449 || (c->line_start_idx >= c->nb_read - 1));
450 }
451
452 /* Return TRUE iff the cache is full and thus needs to be
453 extended. */
454
455 static bool
456 needs_grow (fcache *c)
457 {
458 return c->nb_read == c->size;
459 }
460
461 /* Grow the cache if it needs to be extended. */
462
463 static void
464 maybe_grow (fcache *c)
465 {
466 if (!needs_grow (c))
467 return;
468
469 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
470 c->data = XRESIZEVEC (char, c->data, size);
471 c->size = size;
472 }
473
474 /* Read more data into the cache. Extends the cache if need be.
475 Returns TRUE iff new data could be read. */
476
477 static bool
478 read_data (fcache *c)
479 {
480 if (feof (c->fp) || ferror (c->fp))
481 return false;
482
483 maybe_grow (c);
484
485 char * from = c->data + c->nb_read;
486 size_t to_read = c->size - c->nb_read;
487 size_t nb_read = fread (from, 1, to_read, c->fp);
488
489 if (ferror (c->fp))
490 return false;
491
492 c->nb_read += nb_read;
493 return !!nb_read;
494 }
495
496 /* Read new data iff the cache needs to be filled with more data
497 coming from the file FP. Return TRUE iff the cache was filled with
498 mode data. */
499
500 static bool
501 maybe_read_data (fcache *c)
502 {
503 if (!needs_read (c))
504 return false;
505 return read_data (c);
506 }
507
508 /* Read a new line from file FP, using C as a cache for the data
509 coming from the file. Upon successful completion, *LINE is set to
510 the beginning of the line found. *LINE points directly in the
511 line cache and is only valid until the next call of get_next_line.
512 *LINE_LEN is set to the length of the line. Note that the line
513 does not contain any terminal delimiter. This function returns
514 true if some data was read or process from the cache, false
515 otherwise. Note that subsequent calls to get_next_line might
516 make the content of *LINE invalid. */
517
518 static bool
519 get_next_line (fcache *c, char **line, ssize_t *line_len)
520 {
521 /* Fill the cache with data to process. */
522 maybe_read_data (c);
523
524 size_t remaining_size = c->nb_read - c->line_start_idx;
525 if (remaining_size == 0)
526 /* There is no more data to process. */
527 return false;
528
529 char *line_start = c->data + c->line_start_idx;
530
531 char *next_line_start = NULL;
532 size_t len = 0;
533 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
534 if (line_end == NULL)
535 {
536 /* We haven't found the end-of-line delimiter in the cache.
537 Fill the cache with more data from the file and look for the
538 '\n'. */
539 while (maybe_read_data (c))
540 {
541 line_start = c->data + c->line_start_idx;
542 remaining_size = c->nb_read - c->line_start_idx;
543 line_end = (char *) memchr (line_start, '\n', remaining_size);
544 if (line_end != NULL)
545 {
546 next_line_start = line_end + 1;
547 break;
548 }
549 }
550 if (line_end == NULL)
551 {
552 /* We've loadded all the file into the cache and still no
553 '\n'. Let's say the line ends up at one byte passed the
554 end of the file. This is to stay consistent with the case
555 of when the line ends up with a '\n' and line_end points to
556 that terminal '\n'. That consistency is useful below in
557 the len calculation. */
558 line_end = c->data + c->nb_read ;
559 c->missing_trailing_newline = true;
560 }
561 else
562 c->missing_trailing_newline = false;
563 }
564 else
565 {
566 next_line_start = line_end + 1;
567 c->missing_trailing_newline = false;
568 }
569
570 if (ferror (c->fp))
571 return false;
572
573 /* At this point, we've found the end of the of line. It either
574 points to the '\n' or to one byte after the last byte of the
575 file. */
576 gcc_assert (line_end != NULL);
577
578 len = line_end - line_start;
579
580 if (c->line_start_idx < c->nb_read)
581 *line = line_start;
582
583 ++c->line_num;
584
585 /* Before we update our line record, make sure the hint about the
586 total number of lines of the file is correct. If it's not, then
587 we give up recording line boundaries from now on. */
588 bool update_line_record = true;
589 if (c->line_num > c->total_lines)
590 update_line_record = false;
591
592 /* Now update our line record so that re-reading lines from the
593 before c->line_start_idx is faster. */
594 if (update_line_record
595 && c->line_record.length () < fcache_line_record_size)
596 {
597 /* If the file lines fits in the line record, we just record all
598 its lines ...*/
599 if (c->total_lines <= fcache_line_record_size
600 && c->line_num > c->line_record.length ())
601 c->line_record.safe_push (fcache::line_info (c->line_num,
602 c->line_start_idx,
603 line_end - c->data));
604 else if (c->total_lines > fcache_line_record_size)
605 {
606 /* ... otherwise, we just scale total_lines down to
607 (fcache_line_record_size lines. */
608 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
609 if (c->line_record.length () == 0
610 || n >= c->line_record.length ())
611 c->line_record.safe_push (fcache::line_info (c->line_num,
612 c->line_start_idx,
613 line_end - c->data));
614 }
615 }
616
617 /* Update c->line_start_idx so that it points to the next line to be
618 read. */
619 if (next_line_start)
620 c->line_start_idx = next_line_start - c->data;
621 else
622 /* We didn't find any terminal '\n'. Let's consider that the end
623 of line is the end of the data in the cache. The next
624 invocation of get_next_line will either read more data from the
625 underlying file or return false early because we've reached the
626 end of the file. */
627 c->line_start_idx = c->nb_read;
628
629 *line_len = len;
630
631 return true;
632 }
633
634 /* Consume the next bytes coming from the cache (or from its
635 underlying file if there are remaining unread bytes in the file)
636 until we reach the next end-of-line (or end-of-file). There is no
637 copying from the cache involved. Return TRUE upon successful
638 completion. */
639
640 static bool
641 goto_next_line (fcache *cache)
642 {
643 char *l;
644 ssize_t len;
645
646 return get_next_line (cache, &l, &len);
647 }
648
649 /* Read an arbitrary line number LINE_NUM from the file cached in C.
650 If the line was read successfully, *LINE points to the beginning
651 of the line in the file cache and *LINE_LEN is the length of the
652 line. *LINE is not nul-terminated, but may contain zero bytes.
653 *LINE is only valid until the next call of read_line_num.
654 This function returns bool if a line was read. */
655
656 static bool
657 read_line_num (fcache *c, size_t line_num,
658 char **line, ssize_t *line_len)
659 {
660 gcc_assert (line_num > 0);
661
662 if (line_num <= c->line_num)
663 {
664 /* We've been asked to read lines that are before c->line_num.
665 So lets use our line record (if it's not empty) to try to
666 avoid re-reading the file from the beginning again. */
667
668 if (c->line_record.is_empty ())
669 {
670 c->line_start_idx = 0;
671 c->line_num = 0;
672 }
673 else
674 {
675 fcache::line_info *i = NULL;
676 if (c->total_lines <= fcache_line_record_size)
677 {
678 /* In languages where the input file is not totally
679 preprocessed up front, the c->total_lines hint
680 can be smaller than the number of lines of the
681 file. In that case, only the first
682 c->total_lines have been recorded.
683
684 Otherwise, the first c->total_lines we've read have
685 their start/end recorded here. */
686 i = (line_num <= c->total_lines)
687 ? &c->line_record[line_num - 1]
688 : &c->line_record[c->total_lines - 1];
689 gcc_assert (i->line_num <= line_num);
690 }
691 else
692 {
693 /* So the file had more lines than our line record
694 size. Thus the number of lines we've recorded has
695 been scaled down to fcache_line_reacord_size. Let's
696 pick the start/end of the recorded line that is
697 closest to line_num. */
698 size_t n = (line_num <= c->total_lines)
699 ? line_num * fcache_line_record_size / c->total_lines
700 : c ->line_record.length () - 1;
701 if (n < c->line_record.length ())
702 {
703 i = &c->line_record[n];
704 gcc_assert (i->line_num <= line_num);
705 }
706 }
707
708 if (i && i->line_num == line_num)
709 {
710 /* We have the start/end of the line. */
711 *line = c->data + i->start_pos;
712 *line_len = i->end_pos - i->start_pos;
713 return true;
714 }
715
716 if (i)
717 {
718 c->line_start_idx = i->start_pos;
719 c->line_num = i->line_num - 1;
720 }
721 else
722 {
723 c->line_start_idx = 0;
724 c->line_num = 0;
725 }
726 }
727 }
728
729 /* Let's walk from line c->line_num up to line_num - 1, without
730 copying any line. */
731 while (c->line_num < line_num - 1)
732 if (!goto_next_line (c))
733 return false;
734
735 /* The line we want is the next one. Let's read and copy it back to
736 the caller. */
737 return get_next_line (c, line, line_len);
738 }
739
740 /* Return the physical source line that corresponds to FILE_PATH/LINE.
741 The line is not nul-terminated. The returned pointer is only
742 valid until the next call of location_get_source_line.
743 Note that the line can contain several null characters,
744 so the returned value's length has the actual length of the line.
745 If the function fails, a NULL char_span is returned. */
746
747 char_span
748 location_get_source_line (const char *file_path, int line)
749 {
750 char *buffer = NULL;
751 ssize_t len;
752
753 if (line == 0)
754 return char_span (NULL, 0);
755
756 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
757 if (c == NULL)
758 return char_span (NULL, 0);
759
760 bool read = read_line_num (c, line, &buffer, &len);
761 if (!read)
762 return char_span (NULL, 0);
763
764 return char_span (buffer, len);
765 }
766
767 /* Determine if FILE_PATH missing a trailing newline on its final line.
768 Only valid to call once all of the file has been loaded, by
769 requesting a line number beyond the end of the file. */
770
771 bool
772 location_missing_trailing_newline (const char *file_path)
773 {
774 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
775 if (c == NULL)
776 return false;
777
778 return c->missing_trailing_newline;
779 }
780
781 /* Test if the location originates from the spelling location of a
782 builtin-tokens. That is, return TRUE if LOC is a (possibly
783 virtual) location of a built-in token that appears in the expansion
784 list of a macro. Please note that this function also works on
785 tokens that result from built-in tokens. For instance, the
786 function would return true if passed a token "4" that is the result
787 of the expansion of the built-in __LINE__ macro. */
788 bool
789 is_location_from_builtin_token (source_location loc)
790 {
791 const line_map_ordinary *map = NULL;
792 loc = linemap_resolve_location (line_table, loc,
793 LRK_SPELLING_LOCATION, &map);
794 return loc == BUILTINS_LOCATION;
795 }
796
797 /* Expand the source location LOC into a human readable location. If
798 LOC is virtual, it resolves to the expansion point of the involved
799 macro. If LOC resolves to a builtin location, the file name of the
800 readable location is set to the string "<built-in>". */
801
802 expanded_location
803 expand_location (source_location loc)
804 {
805 return expand_location_1 (loc, /*expansion_point_p=*/true,
806 LOCATION_ASPECT_CARET);
807 }
808
809 /* Expand the source location LOC into a human readable location. If
810 LOC is virtual, it resolves to the expansion location of the
811 relevant macro. If LOC resolves to a builtin location, the file
812 name of the readable location is set to the string
813 "<built-in>". */
814
815 expanded_location
816 expand_location_to_spelling_point (source_location loc,
817 enum location_aspect aspect)
818 {
819 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
820 }
821
822 /* The rich_location class within libcpp requires a way to expand
823 source_location instances, and relies on the client code
824 providing a symbol named
825 linemap_client_expand_location_to_spelling_point
826 to do this.
827
828 This is the implementation for libcommon.a (all host binaries),
829 which simply calls into expand_location_1. */
830
831 expanded_location
832 linemap_client_expand_location_to_spelling_point (source_location loc,
833 enum location_aspect aspect)
834 {
835 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
836 }
837
838
839 /* If LOCATION is in a system header and if it is a virtual location for
840 a token coming from the expansion of a macro, unwind it to the
841 location of the expansion point of the macro. Otherwise, just return
842 LOCATION.
843
844 This is used for instance when we want to emit diagnostics about a
845 token that may be located in a macro that is itself defined in a
846 system header, for example, for the NULL macro. In such a case, if
847 LOCATION were passed directly to diagnostic functions such as
848 warning_at, the diagnostic would be suppressed (unless
849 -Wsystem-headers). */
850
851 source_location
852 expansion_point_location_if_in_system_header (source_location location)
853 {
854 if (in_system_header_at (location))
855 location = linemap_resolve_location (line_table, location,
856 LRK_MACRO_EXPANSION_POINT,
857 NULL);
858 return location;
859 }
860
861 /* If LOCATION is a virtual location for a token coming from the expansion
862 of a macro, unwind to the location of the expansion point of the macro. */
863
864 source_location
865 expansion_point_location (source_location location)
866 {
867 return linemap_resolve_location (line_table, location,
868 LRK_MACRO_EXPANSION_POINT, NULL);
869 }
870
871 /* Construct a location with caret at CARET, ranging from START to
872 finish e.g.
873
874 11111111112
875 12345678901234567890
876 522
877 523 return foo + bar;
878 ~~~~^~~~~
879 524
880
881 The location's caret is at the "+", line 523 column 15, but starts
882 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
883 of "bar" at column 19. */
884
885 location_t
886 make_location (location_t caret, location_t start, location_t finish)
887 {
888 location_t pure_loc = get_pure_location (caret);
889 source_range src_range;
890 src_range.m_start = get_start (start);
891 src_range.m_finish = get_finish (finish);
892 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
893 pure_loc,
894 src_range,
895 NULL);
896 return combined_loc;
897 }
898
899 /* Same as above, but taking a source range rather than two locations. */
900
901 location_t
902 make_location (location_t caret, source_range src_range)
903 {
904 location_t pure_loc = get_pure_location (caret);
905 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
906 }
907
908 #define ONE_K 1024
909 #define ONE_M (ONE_K * ONE_K)
910
911 /* Display a number as an integer multiple of either:
912 - 1024, if said integer is >= to 10 K (in base 2)
913 - 1024 * 1024, if said integer is >= 10 M in (base 2)
914 */
915 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
916 ? (x) \
917 : ((x) < 10 * ONE_M \
918 ? (x) / ONE_K \
919 : (x) / ONE_M)))
920
921 /* For a given integer, display either:
922 - the character 'k', if the number is higher than 10 K (in base 2)
923 but strictly lower than 10 M (in base 2)
924 - the character 'M' if the number is higher than 10 M (in base2)
925 - the charcter ' ' if the number is strictly lower than 10 K */
926 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
927
928 /* Display an integer amount as multiple of 1K or 1M (in base 2).
929 Display the correct unit (either k, M, or ' ') after the amount, as
930 well. */
931 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
932
933 /* Dump statistics to stderr about the memory usage of the line_table
934 set of line maps. This also displays some statistics about macro
935 expansion. */
936
937 void
938 dump_line_table_statistics (void)
939 {
940 struct linemap_stats s;
941 long total_used_map_size,
942 macro_maps_size,
943 total_allocated_map_size;
944
945 memset (&s, 0, sizeof (s));
946
947 linemap_get_statistics (line_table, &s);
948
949 macro_maps_size = s.macro_maps_used_size
950 + s.macro_maps_locations_size;
951
952 total_allocated_map_size = s.ordinary_maps_allocated_size
953 + s.macro_maps_allocated_size
954 + s.macro_maps_locations_size;
955
956 total_used_map_size = s.ordinary_maps_used_size
957 + s.macro_maps_used_size
958 + s.macro_maps_locations_size;
959
960 fprintf (stderr, "Number of expanded macros: %5ld\n",
961 s.num_expanded_macros);
962 if (s.num_expanded_macros != 0)
963 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
964 s.num_macro_tokens / s.num_expanded_macros);
965 fprintf (stderr,
966 "\nLine Table allocations during the "
967 "compilation process\n");
968 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
969 SCALE (s.num_ordinary_maps_used),
970 STAT_LABEL (s.num_ordinary_maps_used));
971 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
972 SCALE (s.ordinary_maps_used_size),
973 STAT_LABEL (s.ordinary_maps_used_size));
974 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
975 SCALE (s.num_ordinary_maps_allocated),
976 STAT_LABEL (s.num_ordinary_maps_allocated));
977 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
978 SCALE (s.ordinary_maps_allocated_size),
979 STAT_LABEL (s.ordinary_maps_allocated_size));
980 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
981 SCALE (s.num_macro_maps_used),
982 STAT_LABEL (s.num_macro_maps_used));
983 fprintf (stderr, "Macro maps used size: %5ld%c\n",
984 SCALE (s.macro_maps_used_size),
985 STAT_LABEL (s.macro_maps_used_size));
986 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
987 SCALE (s.macro_maps_locations_size),
988 STAT_LABEL (s.macro_maps_locations_size));
989 fprintf (stderr, "Macro maps size: %5ld%c\n",
990 SCALE (macro_maps_size),
991 STAT_LABEL (macro_maps_size));
992 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
993 SCALE (s.duplicated_macro_maps_locations_size),
994 STAT_LABEL (s.duplicated_macro_maps_locations_size));
995 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
996 SCALE (total_allocated_map_size),
997 STAT_LABEL (total_allocated_map_size));
998 fprintf (stderr, "Total used maps size: %5ld%c\n",
999 SCALE (total_used_map_size),
1000 STAT_LABEL (total_used_map_size));
1001 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
1002 SCALE (s.adhoc_table_size),
1003 STAT_LABEL (s.adhoc_table_size));
1004 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
1005 s.adhoc_table_entries_used);
1006 fprintf (stderr, "optimized_ranges: %i\n",
1007 line_table->num_optimized_ranges);
1008 fprintf (stderr, "unoptimized_ranges: %i\n",
1009 line_table->num_unoptimized_ranges);
1010
1011 fprintf (stderr, "\n");
1012 }
1013
1014 /* Get location one beyond the final location in ordinary map IDX. */
1015
1016 static source_location
1017 get_end_location (struct line_maps *set, unsigned int idx)
1018 {
1019 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1020 return set->highest_location;
1021
1022 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1023 return MAP_START_LOCATION (next_map);
1024 }
1025
1026 /* Helper function for write_digit_row. */
1027
1028 static void
1029 write_digit (FILE *stream, int digit)
1030 {
1031 fputc ('0' + (digit % 10), stream);
1032 }
1033
1034 /* Helper function for dump_location_info.
1035 Write a row of numbers to STREAM, numbering a source line,
1036 giving the units, tens, hundreds etc of the column number. */
1037
1038 static void
1039 write_digit_row (FILE *stream, int indent,
1040 const line_map_ordinary *map,
1041 source_location loc, int max_col, int divisor)
1042 {
1043 fprintf (stream, "%*c", indent, ' ');
1044 fprintf (stream, "|");
1045 for (int column = 1; column < max_col; column++)
1046 {
1047 source_location column_loc = loc + (column << map->m_range_bits);
1048 write_digit (stream, column_loc / divisor);
1049 }
1050 fprintf (stream, "\n");
1051 }
1052
1053 /* Write a half-closed (START) / half-open (END) interval of
1054 source_location to STREAM. */
1055
1056 static void
1057 dump_location_range (FILE *stream,
1058 source_location start, source_location end)
1059 {
1060 fprintf (stream,
1061 " source_location interval: %u <= loc < %u\n",
1062 start, end);
1063 }
1064
1065 /* Write a labelled description of a half-closed (START) / half-open (END)
1066 interval of source_location to STREAM. */
1067
1068 static void
1069 dump_labelled_location_range (FILE *stream,
1070 const char *name,
1071 source_location start, source_location end)
1072 {
1073 fprintf (stream, "%s\n", name);
1074 dump_location_range (stream, start, end);
1075 fprintf (stream, "\n");
1076 }
1077
1078 /* Write a visualization of the locations in the line_table to STREAM. */
1079
1080 void
1081 dump_location_info (FILE *stream)
1082 {
1083 /* Visualize the reserved locations. */
1084 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1085 0, RESERVED_LOCATION_COUNT);
1086
1087 /* Visualize the ordinary line_map instances, rendering the sources. */
1088 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1089 {
1090 source_location end_location = get_end_location (line_table, idx);
1091 /* half-closed: doesn't include this one. */
1092
1093 const line_map_ordinary *map
1094 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1095 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1096 dump_location_range (stream,
1097 MAP_START_LOCATION (map), end_location);
1098 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1099 fprintf (stream, " starting at line: %i\n",
1100 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1101 fprintf (stream, " column and range bits: %i\n",
1102 map->m_column_and_range_bits);
1103 fprintf (stream, " column bits: %i\n",
1104 map->m_column_and_range_bits - map->m_range_bits);
1105 fprintf (stream, " range bits: %i\n",
1106 map->m_range_bits);
1107
1108 /* Render the span of source lines that this "map" covers. */
1109 for (source_location loc = MAP_START_LOCATION (map);
1110 loc < end_location;
1111 loc += (1 << map->m_range_bits) )
1112 {
1113 gcc_assert (pure_location_p (line_table, loc) );
1114
1115 expanded_location exploc
1116 = linemap_expand_location (line_table, map, loc);
1117
1118 if (exploc.column == 0)
1119 {
1120 /* Beginning of a new source line: draw the line. */
1121
1122 char_span line_text = location_get_source_line (exploc.file,
1123 exploc.line);
1124 if (!line_text)
1125 break;
1126 fprintf (stream,
1127 "%s:%3i|loc:%5i|%.*s\n",
1128 exploc.file, exploc.line,
1129 loc,
1130 (int)line_text.length (), line_text.get_buffer ());
1131
1132 /* "loc" is at column 0, which means "the whole line".
1133 Render the locations *within* the line, by underlining
1134 it, showing the source_location numeric values
1135 at each column. */
1136 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1137 if (max_col > line_text.length ())
1138 max_col = line_text.length () + 1;
1139
1140 int indent = 14 + strlen (exploc.file);
1141
1142 /* Thousands. */
1143 if (end_location > 999)
1144 write_digit_row (stream, indent, map, loc, max_col, 1000);
1145
1146 /* Hundreds. */
1147 if (end_location > 99)
1148 write_digit_row (stream, indent, map, loc, max_col, 100);
1149
1150 /* Tens. */
1151 write_digit_row (stream, indent, map, loc, max_col, 10);
1152
1153 /* Units. */
1154 write_digit_row (stream, indent, map, loc, max_col, 1);
1155 }
1156 }
1157 fprintf (stream, "\n");
1158 }
1159
1160 /* Visualize unallocated values. */
1161 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1162 line_table->highest_location,
1163 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1164
1165 /* Visualize the macro line_map instances, rendering the sources. */
1166 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1167 {
1168 /* Each macro map that is allocated owns source_location values
1169 that are *lower* that the one before them.
1170 Hence it's meaningful to view them either in order of ascending
1171 source locations, or in order of ascending macro map index. */
1172 const bool ascending_source_locations = true;
1173 unsigned int idx = (ascending_source_locations
1174 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1175 : i);
1176 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1177 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1178 idx,
1179 linemap_map_get_macro_name (map),
1180 MACRO_MAP_NUM_MACRO_TOKENS (map));
1181 dump_location_range (stream,
1182 map->start_location,
1183 (map->start_location
1184 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1185 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1186 "expansion point is location %i",
1187 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1188 fprintf (stream, " map->start_location: %u\n",
1189 map->start_location);
1190
1191 fprintf (stream, " macro_locations:\n");
1192 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1193 {
1194 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1195 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1196
1197 /* linemap_add_macro_token encodes token numbers in an expansion
1198 by putting them after MAP_START_LOCATION. */
1199
1200 /* I'm typically seeing 4 uninitialized entries at the end of
1201 0xafafafaf.
1202 This appears to be due to macro.c:replace_args
1203 adding 2 extra args for padding tokens; presumably there may
1204 be a leading and/or trailing padding token injected,
1205 each for 2 more location slots.
1206 This would explain there being up to 4 source_locations slots
1207 that may be uninitialized. */
1208
1209 fprintf (stream, " %u: %u, %u\n",
1210 i,
1211 x,
1212 y);
1213 if (x == y)
1214 {
1215 if (x < MAP_START_LOCATION (map))
1216 inform (x, "token %u has x-location == y-location == %u", i, x);
1217 else
1218 fprintf (stream,
1219 "x-location == y-location == %u encodes token # %u\n",
1220 x, x - MAP_START_LOCATION (map));
1221 }
1222 else
1223 {
1224 inform (x, "token %u has x-location == %u", i, x);
1225 inform (x, "token %u has y-location == %u", i, y);
1226 }
1227 }
1228 fprintf (stream, "\n");
1229 }
1230
1231 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1232 macro map, presumably due to an off-by-one error somewhere
1233 between the logic in linemap_enter_macro and
1234 LINEMAPS_MACRO_LOWEST_LOCATION. */
1235 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1236 MAX_SOURCE_LOCATION,
1237 MAX_SOURCE_LOCATION + 1);
1238
1239 /* Visualize ad-hoc values. */
1240 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1241 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1242 }
1243
1244 /* string_concat's constructor. */
1245
1246 string_concat::string_concat (int num, location_t *locs)
1247 : m_num (num)
1248 {
1249 m_locs = ggc_vec_alloc <location_t> (num);
1250 for (int i = 0; i < num; i++)
1251 m_locs[i] = locs[i];
1252 }
1253
1254 /* string_concat_db's constructor. */
1255
1256 string_concat_db::string_concat_db ()
1257 {
1258 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1259 }
1260
1261 /* Record that a string concatenation occurred, covering NUM
1262 string literal tokens. LOCS is an array of size NUM, containing the
1263 locations of the tokens. A copy of LOCS is taken. */
1264
1265 void
1266 string_concat_db::record_string_concatenation (int num, location_t *locs)
1267 {
1268 gcc_assert (num > 1);
1269 gcc_assert (locs);
1270
1271 location_t key_loc = get_key_loc (locs[0]);
1272
1273 string_concat *concat
1274 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1275 m_table->put (key_loc, concat);
1276 }
1277
1278 /* Determine if LOC was the location of the the initial token of a
1279 concatenation of string literal tokens.
1280 If so, *OUT_NUM is written to with the number of tokens, and
1281 *OUT_LOCS with the location of an array of locations of the
1282 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1283 storage owned by the string_concat_db.
1284 Otherwise, return false. */
1285
1286 bool
1287 string_concat_db::get_string_concatenation (location_t loc,
1288 int *out_num,
1289 location_t **out_locs)
1290 {
1291 gcc_assert (out_num);
1292 gcc_assert (out_locs);
1293
1294 location_t key_loc = get_key_loc (loc);
1295
1296 string_concat **concat = m_table->get (key_loc);
1297 if (!concat)
1298 return false;
1299
1300 *out_num = (*concat)->m_num;
1301 *out_locs =(*concat)->m_locs;
1302 return true;
1303 }
1304
1305 /* Internal function. Canonicalize LOC into a form suitable for
1306 use as a key within the database, stripping away macro expansion,
1307 ad-hoc information, and range information, using the location of
1308 the start of LOC within an ordinary linemap. */
1309
1310 location_t
1311 string_concat_db::get_key_loc (location_t loc)
1312 {
1313 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1314 NULL);
1315
1316 loc = get_range_from_loc (line_table, loc).m_start;
1317
1318 return loc;
1319 }
1320
1321 /* Helper class for use within get_substring_ranges_for_loc.
1322 An vec of cpp_string with responsibility for releasing all of the
1323 str->text for each str in the vector. */
1324
1325 class auto_cpp_string_vec : public auto_vec <cpp_string>
1326 {
1327 public:
1328 auto_cpp_string_vec (int alloc)
1329 : auto_vec <cpp_string> (alloc) {}
1330
1331 ~auto_cpp_string_vec ()
1332 {
1333 /* Clean up the copies within this vec. */
1334 int i;
1335 cpp_string *str;
1336 FOR_EACH_VEC_ELT (*this, i, str)
1337 free (const_cast <unsigned char *> (str->text));
1338 }
1339 };
1340
1341 /* Attempt to populate RANGES with source location information on the
1342 individual characters within the string literal found at STRLOC.
1343 If CONCATS is non-NULL, then any string literals that the token at
1344 STRLOC was concatenated with are also added to RANGES.
1345
1346 Return NULL if successful, or an error message if any errors occurred (in
1347 which case RANGES may be only partially populated and should not
1348 be used).
1349
1350 This is implemented by re-parsing the relevant source line(s). */
1351
1352 static const char *
1353 get_substring_ranges_for_loc (cpp_reader *pfile,
1354 string_concat_db *concats,
1355 location_t strloc,
1356 enum cpp_ttype type,
1357 cpp_substring_ranges &ranges)
1358 {
1359 gcc_assert (pfile);
1360
1361 if (strloc == UNKNOWN_LOCATION)
1362 return "unknown location";
1363
1364 /* Reparsing the strings requires accurate location information.
1365 If -ftrack-macro-expansion has been overridden from its default
1366 of 2, then we might have a location of a macro expansion point,
1367 rather than the location of the literal itself.
1368 Avoid this by requiring that we have full macro expansion tracking
1369 for substring locations to be available. */
1370 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1371 return "track_macro_expansion != 2";
1372
1373 /* If #line or # 44 "file"-style directives are present, then there's
1374 no guarantee that the line numbers we have can be used to locate
1375 the strings. For example, we might have a .i file with # directives
1376 pointing back to lines within a .c file, but the .c file might
1377 have been edited since the .i file was created.
1378 In such a case, the safest course is to disable on-demand substring
1379 locations. */
1380 if (line_table->seen_line_directive)
1381 return "seen line directive";
1382
1383 /* If string concatenation has occurred at STRLOC, get the locations
1384 of all of the literal tokens making up the compound string.
1385 Otherwise, just use STRLOC. */
1386 int num_locs = 1;
1387 location_t *strlocs = &strloc;
1388 if (concats)
1389 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1390
1391 auto_cpp_string_vec strs (num_locs);
1392 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1393 for (int i = 0; i < num_locs; i++)
1394 {
1395 /* Get range of strloc. We will use it to locate the start and finish
1396 of the literal token within the line. */
1397 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1398
1399 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1400 {
1401 /* If the string token was within a macro expansion, then we can
1402 cope with it for the simple case where we have a single token.
1403 Otherwise, bail out. */
1404 if (src_range.m_start != src_range.m_finish)
1405 return "macro expansion";
1406 }
1407 else
1408 {
1409 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1410 /* If so, we can't reliably determine where the token started within
1411 its line. */
1412 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1413
1414 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1415 /* If so, we can't reliably determine where the token finished
1416 within its line. */
1417 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1418 }
1419
1420 expanded_location start
1421 = expand_location_to_spelling_point (src_range.m_start,
1422 LOCATION_ASPECT_START);
1423 expanded_location finish
1424 = expand_location_to_spelling_point (src_range.m_finish,
1425 LOCATION_ASPECT_FINISH);
1426 if (start.file != finish.file)
1427 return "range endpoints are in different files";
1428 if (start.line != finish.line)
1429 return "range endpoints are on different lines";
1430 if (start.column > finish.column)
1431 return "range endpoints are reversed";
1432
1433 char_span line = location_get_source_line (start.file, start.line);
1434 if (!line)
1435 return "unable to read source line";
1436
1437 /* Determine the location of the literal (including quotes
1438 and leading prefix chars, such as the 'u' in a u""
1439 token). */
1440 size_t literal_length = finish.column - start.column + 1;
1441
1442 /* Ensure that we don't crash if we got the wrong location. */
1443 if (line.length () < (start.column - 1 + literal_length))
1444 return "line is not wide enough";
1445
1446 char_span literal = line.subspan (start.column - 1, literal_length);
1447
1448 cpp_string from;
1449 from.len = literal_length;
1450 /* Make a copy of the literal, to avoid having to rely on
1451 the lifetime of the copy of the line within the cache.
1452 This will be released by the auto_cpp_string_vec dtor. */
1453 from.text = (unsigned char *)literal.xstrdup ();
1454 strs.safe_push (from);
1455
1456 /* For very long lines, a new linemap could have started
1457 halfway through the token.
1458 Ensure that the loc_reader uses the linemap of the
1459 *end* of the token for its start location. */
1460 const line_map_ordinary *start_ord_map;
1461 linemap_resolve_location (line_table, src_range.m_start,
1462 LRK_SPELLING_LOCATION, &start_ord_map);
1463 const line_map_ordinary *final_ord_map;
1464 linemap_resolve_location (line_table, src_range.m_finish,
1465 LRK_SPELLING_LOCATION, &final_ord_map);
1466 /* Bulletproofing. We ought to only have different ordinary maps
1467 for start vs finish due to line-length jumps. */
1468 if (start_ord_map != final_ord_map
1469 && start_ord_map->to_file != final_ord_map->to_file)
1470 return "start and finish are spelled in different ordinary maps";
1471 location_t start_loc
1472 = linemap_position_for_line_and_column (line_table, final_ord_map,
1473 start.line, start.column);
1474
1475 cpp_string_location_reader loc_reader (start_loc, line_table);
1476 loc_readers.safe_push (loc_reader);
1477 }
1478
1479 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1480 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1481 loc_readers.address (),
1482 num_locs, &ranges, type);
1483 if (err)
1484 return err;
1485
1486 /* Success: "ranges" should now contain information on the string. */
1487 return NULL;
1488 }
1489
1490 /* Attempt to populate *OUT_LOC with source location information on the
1491 given characters within the string literal found at STRLOC.
1492 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1493 character set.
1494
1495 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1496 and string literal "012345\n789"
1497 *OUT_LOC is written to with:
1498 "012345\n789"
1499 ~^~~~~
1500
1501 If CONCATS is non-NULL, then any string literals that the token at
1502 STRLOC was concatenated with are also considered.
1503
1504 This is implemented by re-parsing the relevant source line(s).
1505
1506 Return NULL if successful, or an error message if any errors occurred.
1507 Error messages are intended for GCC developers (to help debugging) rather
1508 than for end-users. */
1509
1510 const char *
1511 get_source_location_for_substring (cpp_reader *pfile,
1512 string_concat_db *concats,
1513 location_t strloc,
1514 enum cpp_ttype type,
1515 int caret_idx, int start_idx, int end_idx,
1516 source_location *out_loc)
1517 {
1518 gcc_checking_assert (caret_idx >= 0);
1519 gcc_checking_assert (start_idx >= 0);
1520 gcc_checking_assert (end_idx >= 0);
1521 gcc_assert (out_loc);
1522
1523 cpp_substring_ranges ranges;
1524 const char *err
1525 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1526 if (err)
1527 return err;
1528
1529 if (caret_idx >= ranges.get_num_ranges ())
1530 return "caret_idx out of range";
1531 if (start_idx >= ranges.get_num_ranges ())
1532 return "start_idx out of range";
1533 if (end_idx >= ranges.get_num_ranges ())
1534 return "end_idx out of range";
1535
1536 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1537 ranges.get_range (start_idx).m_start,
1538 ranges.get_range (end_idx).m_finish);
1539 return NULL;
1540 }
1541
1542 #if CHECKING_P
1543
1544 namespace selftest {
1545
1546 /* Selftests of location handling. */
1547
1548 /* Attempt to populate *OUT_RANGE with source location information on the
1549 given character within the string literal found at STRLOC.
1550 CHAR_IDX refers to an offset within the execution character set.
1551 If CONCATS is non-NULL, then any string literals that the token at
1552 STRLOC was concatenated with are also considered.
1553
1554 This is implemented by re-parsing the relevant source line(s).
1555
1556 Return NULL if successful, or an error message if any errors occurred.
1557 Error messages are intended for GCC developers (to help debugging) rather
1558 than for end-users. */
1559
1560 static const char *
1561 get_source_range_for_char (cpp_reader *pfile,
1562 string_concat_db *concats,
1563 location_t strloc,
1564 enum cpp_ttype type,
1565 int char_idx,
1566 source_range *out_range)
1567 {
1568 gcc_checking_assert (char_idx >= 0);
1569 gcc_assert (out_range);
1570
1571 cpp_substring_ranges ranges;
1572 const char *err
1573 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1574 if (err)
1575 return err;
1576
1577 if (char_idx >= ranges.get_num_ranges ())
1578 return "char_idx out of range";
1579
1580 *out_range = ranges.get_range (char_idx);
1581 return NULL;
1582 }
1583
1584 /* As get_source_range_for_char, but write to *OUT the number
1585 of ranges that are available. */
1586
1587 static const char *
1588 get_num_source_ranges_for_substring (cpp_reader *pfile,
1589 string_concat_db *concats,
1590 location_t strloc,
1591 enum cpp_ttype type,
1592 int *out)
1593 {
1594 gcc_assert (out);
1595
1596 cpp_substring_ranges ranges;
1597 const char *err
1598 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1599
1600 if (err)
1601 return err;
1602
1603 *out = ranges.get_num_ranges ();
1604 return NULL;
1605 }
1606
1607 /* Selftests of location handling. */
1608
1609 /* Verify that compare() on linenum_type handles comparisons over the full
1610 range of the type. */
1611
1612 static void
1613 test_linenum_comparisons ()
1614 {
1615 linenum_type min_line (0);
1616 linenum_type max_line (0xffffffff);
1617 ASSERT_EQ (0, compare (min_line, min_line));
1618 ASSERT_EQ (0, compare (max_line, max_line));
1619
1620 ASSERT_GT (compare (max_line, min_line), 0);
1621 ASSERT_LT (compare (min_line, max_line), 0);
1622 }
1623
1624 /* Helper function for verifying location data: when location_t
1625 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1626 as having column 0. */
1627
1628 static bool
1629 should_have_column_data_p (location_t loc)
1630 {
1631 if (IS_ADHOC_LOC (loc))
1632 loc = get_location_from_adhoc_loc (line_table, loc);
1633 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1634 return false;
1635 return true;
1636 }
1637
1638 /* Selftest for should_have_column_data_p. */
1639
1640 static void
1641 test_should_have_column_data_p ()
1642 {
1643 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1644 ASSERT_TRUE
1645 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1646 ASSERT_FALSE
1647 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1648 }
1649
1650 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1651 on LOC. */
1652
1653 static void
1654 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1655 location_t loc)
1656 {
1657 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1658 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1659 /* If location_t values are sufficiently high, then column numbers
1660 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1661 When close to the threshold, column numbers *may* be present: if
1662 the final linemap before the threshold contains a line that straddles
1663 the threshold, locations in that line have column information. */
1664 if (should_have_column_data_p (loc))
1665 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1666 }
1667
1668 /* Various selftests involve constructing a line table and one or more
1669 line maps within it.
1670
1671 For maximum test coverage we want to run these tests with a variety
1672 of situations:
1673 - line_table->default_range_bits: some frontends use a non-zero value
1674 and others use zero
1675 - the fallback modes within line-map.c: there are various threshold
1676 values for source_location/location_t beyond line-map.c changes
1677 behavior (disabling of the range-packing optimization, disabling
1678 of column-tracking). We can exercise these by starting the line_table
1679 at interesting values at or near these thresholds.
1680
1681 The following struct describes a particular case within our test
1682 matrix. */
1683
1684 struct line_table_case
1685 {
1686 line_table_case (int default_range_bits, int base_location)
1687 : m_default_range_bits (default_range_bits),
1688 m_base_location (base_location)
1689 {}
1690
1691 int m_default_range_bits;
1692 int m_base_location;
1693 };
1694
1695 /* Constructor. Store the old value of line_table, and create a new
1696 one, using sane defaults. */
1697
1698 line_table_test::line_table_test ()
1699 {
1700 gcc_assert (saved_line_table == NULL);
1701 saved_line_table = line_table;
1702 line_table = ggc_alloc<line_maps> ();
1703 linemap_init (line_table, BUILTINS_LOCATION);
1704 gcc_assert (saved_line_table->reallocator);
1705 line_table->reallocator = saved_line_table->reallocator;
1706 gcc_assert (saved_line_table->round_alloc_size);
1707 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1708 line_table->default_range_bits = 0;
1709 }
1710
1711 /* Constructor. Store the old value of line_table, and create a new
1712 one, using the sitation described in CASE_. */
1713
1714 line_table_test::line_table_test (const line_table_case &case_)
1715 {
1716 gcc_assert (saved_line_table == NULL);
1717 saved_line_table = line_table;
1718 line_table = ggc_alloc<line_maps> ();
1719 linemap_init (line_table, BUILTINS_LOCATION);
1720 gcc_assert (saved_line_table->reallocator);
1721 line_table->reallocator = saved_line_table->reallocator;
1722 gcc_assert (saved_line_table->round_alloc_size);
1723 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1724 line_table->default_range_bits = case_.m_default_range_bits;
1725 if (case_.m_base_location)
1726 {
1727 line_table->highest_location = case_.m_base_location;
1728 line_table->highest_line = case_.m_base_location;
1729 }
1730 }
1731
1732 /* Destructor. Restore the old value of line_table. */
1733
1734 line_table_test::~line_table_test ()
1735 {
1736 gcc_assert (saved_line_table != NULL);
1737 line_table = saved_line_table;
1738 saved_line_table = NULL;
1739 }
1740
1741 /* Verify basic operation of ordinary linemaps. */
1742
1743 static void
1744 test_accessing_ordinary_linemaps (const line_table_case &case_)
1745 {
1746 line_table_test ltt (case_);
1747
1748 /* Build a simple linemap describing some locations. */
1749 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1750
1751 linemap_line_start (line_table, 1, 100);
1752 location_t loc_a = linemap_position_for_column (line_table, 1);
1753 location_t loc_b = linemap_position_for_column (line_table, 23);
1754
1755 linemap_line_start (line_table, 2, 100);
1756 location_t loc_c = linemap_position_for_column (line_table, 1);
1757 location_t loc_d = linemap_position_for_column (line_table, 17);
1758
1759 /* Example of a very long line. */
1760 linemap_line_start (line_table, 3, 2000);
1761 location_t loc_e = linemap_position_for_column (line_table, 700);
1762
1763 /* Transitioning back to a short line. */
1764 linemap_line_start (line_table, 4, 0);
1765 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1766
1767 if (should_have_column_data_p (loc_back_to_short))
1768 {
1769 /* Verify that we switched to short lines in the linemap. */
1770 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1771 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1772 }
1773
1774 /* Example of a line that will eventually be seen to be longer
1775 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1776 below that. */
1777 linemap_line_start (line_table, 5, 2000);
1778
1779 location_t loc_start_of_very_long_line
1780 = linemap_position_for_column (line_table, 2000);
1781 location_t loc_too_wide
1782 = linemap_position_for_column (line_table, 4097);
1783 location_t loc_too_wide_2
1784 = linemap_position_for_column (line_table, 4098);
1785
1786 /* ...and back to a sane line length. */
1787 linemap_line_start (line_table, 6, 100);
1788 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1789
1790 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1791
1792 /* Multiple files. */
1793 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1794 linemap_line_start (line_table, 1, 200);
1795 location_t loc_f = linemap_position_for_column (line_table, 150);
1796 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1797
1798 /* Verify that we can recover the location info. */
1799 assert_loceq ("foo.c", 1, 1, loc_a);
1800 assert_loceq ("foo.c", 1, 23, loc_b);
1801 assert_loceq ("foo.c", 2, 1, loc_c);
1802 assert_loceq ("foo.c", 2, 17, loc_d);
1803 assert_loceq ("foo.c", 3, 700, loc_e);
1804 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1805
1806 /* In the very wide line, the initial location should be fully tracked. */
1807 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1808 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1809 be disabled. */
1810 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1811 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1812 /*...and column-tracking should be re-enabled for subsequent lines. */
1813 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1814
1815 assert_loceq ("bar.c", 1, 150, loc_f);
1816
1817 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1818 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1819
1820 /* Verify using make_location to build a range, and extracting data
1821 back from it. */
1822 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1823 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1824 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1825 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1826 ASSERT_EQ (loc_b, src_range.m_start);
1827 ASSERT_EQ (loc_d, src_range.m_finish);
1828 }
1829
1830 /* Verify various properties of UNKNOWN_LOCATION. */
1831
1832 static void
1833 test_unknown_location ()
1834 {
1835 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1836 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1837 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1838 }
1839
1840 /* Verify various properties of BUILTINS_LOCATION. */
1841
1842 static void
1843 test_builtins ()
1844 {
1845 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1846 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1847 }
1848
1849 /* Regression test for make_location.
1850 Ensure that we use pure locations for the start/finish of the range,
1851 rather than storing a packed or ad-hoc range as the start/finish. */
1852
1853 static void
1854 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1855 {
1856 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1857 with C++ frontend.
1858 ....................0000000001111111111222.
1859 ....................1234567890123456789012. */
1860 const char *content = " r += !aaa == bbb;\n";
1861 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1862 line_table_test ltt (case_);
1863 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1864
1865 const location_t c11 = linemap_position_for_column (line_table, 11);
1866 const location_t c12 = linemap_position_for_column (line_table, 12);
1867 const location_t c13 = linemap_position_for_column (line_table, 13);
1868 const location_t c14 = linemap_position_for_column (line_table, 14);
1869 const location_t c21 = linemap_position_for_column (line_table, 21);
1870
1871 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1872 return;
1873
1874 /* Use column 13 for the caret location, arbitrarily, to verify that we
1875 handle start != caret. */
1876 const location_t aaa = make_location (c13, c12, c14);
1877 ASSERT_EQ (c13, get_pure_location (aaa));
1878 ASSERT_EQ (c12, get_start (aaa));
1879 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1880 ASSERT_EQ (c14, get_finish (aaa));
1881 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1882
1883 /* Make a location using a location with a range as the start-point. */
1884 const location_t not_aaa = make_location (c11, aaa, c14);
1885 ASSERT_EQ (c11, get_pure_location (not_aaa));
1886 /* It should use the start location of the range, not store the range
1887 itself. */
1888 ASSERT_EQ (c12, get_start (not_aaa));
1889 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1890 ASSERT_EQ (c14, get_finish (not_aaa));
1891 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1892
1893 /* Similarly, make a location with a range as the end-point. */
1894 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1895 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1896 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1897 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1898 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1899 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1900 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1901 /* It should use the finish location of the range, not store the range
1902 itself. */
1903 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1904 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1905 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1906 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1907 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1908 }
1909
1910 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1911
1912 static void
1913 test_reading_source_line ()
1914 {
1915 /* Create a tempfile and write some text to it. */
1916 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1917 "01234567890123456789\n"
1918 "This is the test text\n"
1919 "This is the 3rd line");
1920
1921 /* Read back a specific line from the tempfile. */
1922 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1923 ASSERT_TRUE (source_line);
1924 ASSERT_TRUE (source_line.get_buffer () != NULL);
1925 ASSERT_EQ (20, source_line.length ());
1926 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1927 source_line.get_buffer (), source_line.length ()));
1928
1929 source_line = location_get_source_line (tmp.get_filename (), 2);
1930 ASSERT_TRUE (source_line);
1931 ASSERT_TRUE (source_line.get_buffer () != NULL);
1932 ASSERT_EQ (21, source_line.length ());
1933 ASSERT_TRUE (!strncmp ("This is the test text",
1934 source_line.get_buffer (), source_line.length ()));
1935
1936 source_line = location_get_source_line (tmp.get_filename (), 4);
1937 ASSERT_FALSE (source_line);
1938 ASSERT_TRUE (source_line.get_buffer () == NULL);
1939 }
1940
1941 /* Tests of lexing. */
1942
1943 /* Verify that token TOK from PARSER has cpp_token_as_text
1944 equal to EXPECTED_TEXT. */
1945
1946 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1947 SELFTEST_BEGIN_STMT \
1948 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1949 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1950 SELFTEST_END_STMT
1951
1952 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1953 and ranges from EXP_START_COL to EXP_FINISH_COL.
1954 Use LOC as the effective location of the selftest. */
1955
1956 static void
1957 assert_token_loc_eq (const location &loc,
1958 const cpp_token *tok,
1959 const char *exp_filename, int exp_linenum,
1960 int exp_start_col, int exp_finish_col)
1961 {
1962 location_t tok_loc = tok->src_loc;
1963 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1964 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1965
1966 /* If location_t values are sufficiently high, then column numbers
1967 will be unavailable. */
1968 if (!should_have_column_data_p (tok_loc))
1969 return;
1970
1971 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1972 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1973 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1974 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1975 }
1976
1977 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1978 SELFTEST_LOCATION as the effective location of the selftest. */
1979
1980 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1981 EXP_START_COL, EXP_FINISH_COL) \
1982 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1983 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1984
1985 /* Test of lexing a file using libcpp, verifying tokens and their
1986 location information. */
1987
1988 static void
1989 test_lexer (const line_table_case &case_)
1990 {
1991 /* Create a tempfile and write some text to it. */
1992 const char *content =
1993 /*00000000011111111112222222222333333.3333444444444.455555555556
1994 12345678901234567890123456789012345.6789012345678.901234567890. */
1995 ("test_name /* c-style comment */\n"
1996 " \"test literal\"\n"
1997 " // test c++-style comment\n"
1998 " 42\n");
1999 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2000
2001 line_table_test ltt (case_);
2002
2003 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2004
2005 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2006 ASSERT_NE (fname, NULL);
2007
2008 /* Verify that we get the expected tokens back, with the correct
2009 location information. */
2010
2011 location_t loc;
2012 const cpp_token *tok;
2013 tok = cpp_get_token_with_location (parser, &loc);
2014 ASSERT_NE (tok, NULL);
2015 ASSERT_EQ (tok->type, CPP_NAME);
2016 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2017 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2018
2019 tok = cpp_get_token_with_location (parser, &loc);
2020 ASSERT_NE (tok, NULL);
2021 ASSERT_EQ (tok->type, CPP_STRING);
2022 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2023 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2024
2025 tok = cpp_get_token_with_location (parser, &loc);
2026 ASSERT_NE (tok, NULL);
2027 ASSERT_EQ (tok->type, CPP_NUMBER);
2028 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2029 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2030
2031 tok = cpp_get_token_with_location (parser, &loc);
2032 ASSERT_NE (tok, NULL);
2033 ASSERT_EQ (tok->type, CPP_EOF);
2034
2035 cpp_finish (parser, NULL);
2036 cpp_destroy (parser);
2037 }
2038
2039 /* Forward decls. */
2040
2041 struct lexer_test;
2042 class lexer_test_options;
2043
2044 /* A class for specifying options of a lexer_test.
2045 The "apply" vfunc is called during the lexer_test constructor. */
2046
2047 class lexer_test_options
2048 {
2049 public:
2050 virtual void apply (lexer_test &) = 0;
2051 };
2052
2053 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2054 in its dtor.
2055
2056 This is needed by struct lexer_test to ensure that the cleanup of the
2057 cpp_reader happens *after* the cleanup of the temp_source_file. */
2058
2059 class cpp_reader_ptr
2060 {
2061 public:
2062 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2063
2064 ~cpp_reader_ptr ()
2065 {
2066 cpp_finish (m_ptr, NULL);
2067 cpp_destroy (m_ptr);
2068 }
2069
2070 operator cpp_reader * () const { return m_ptr; }
2071
2072 private:
2073 cpp_reader *m_ptr;
2074 };
2075
2076 /* A struct for writing lexer tests. */
2077
2078 struct lexer_test
2079 {
2080 lexer_test (const line_table_case &case_, const char *content,
2081 lexer_test_options *options);
2082 ~lexer_test ();
2083
2084 const cpp_token *get_token ();
2085
2086 /* The ordering of these fields matters.
2087 The line_table_test must be first, since the cpp_reader_ptr
2088 uses it.
2089 The cpp_reader must be cleaned up *after* the temp_source_file
2090 since the filenames in input.c's input cache are owned by the
2091 cpp_reader; in particular, when ~temp_source_file evicts the
2092 filename the filenames must still be alive. */
2093 line_table_test m_ltt;
2094 cpp_reader_ptr m_parser;
2095 temp_source_file m_tempfile;
2096 string_concat_db m_concats;
2097 bool m_implicitly_expect_EOF;
2098 };
2099
2100 /* Use an EBCDIC encoding for the execution charset, specifically
2101 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2102
2103 This exercises iconv integration within libcpp.
2104 Not every build of iconv supports the given charset,
2105 so we need to flag this error and handle it gracefully. */
2106
2107 class ebcdic_execution_charset : public lexer_test_options
2108 {
2109 public:
2110 ebcdic_execution_charset () : m_num_iconv_errors (0)
2111 {
2112 gcc_assert (s_singleton == NULL);
2113 s_singleton = this;
2114 }
2115 ~ebcdic_execution_charset ()
2116 {
2117 gcc_assert (s_singleton == this);
2118 s_singleton = NULL;
2119 }
2120
2121 void apply (lexer_test &test) FINAL OVERRIDE
2122 {
2123 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2124 cpp_opts->narrow_charset = "IBM1047";
2125
2126 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2127 callbacks->diagnostic = on_diagnostic;
2128 }
2129
2130 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2131 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2132 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2133 rich_location *richloc ATTRIBUTE_UNUSED,
2134 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2135 ATTRIBUTE_FPTR_PRINTF(5,0)
2136 {
2137 gcc_assert (s_singleton);
2138 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2139 const char *msg = "conversion from %s to %s not supported by iconv";
2140 #ifdef ENABLE_NLS
2141 msg = dgettext ("cpplib", msg);
2142 #endif
2143 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2144 when the local iconv build doesn't support the conversion. */
2145 if (strcmp (msgid, msg) == 0)
2146 {
2147 s_singleton->m_num_iconv_errors++;
2148 return true;
2149 }
2150
2151 /* Otherwise, we have an unexpected error. */
2152 abort ();
2153 }
2154
2155 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2156
2157 private:
2158 static ebcdic_execution_charset *s_singleton;
2159 int m_num_iconv_errors;
2160 };
2161
2162 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2163
2164 /* A lexer_test_options subclass that records a list of diagnostic
2165 messages emitted by the lexer. */
2166
2167 class lexer_diagnostic_sink : public lexer_test_options
2168 {
2169 public:
2170 lexer_diagnostic_sink ()
2171 {
2172 gcc_assert (s_singleton == NULL);
2173 s_singleton = this;
2174 }
2175 ~lexer_diagnostic_sink ()
2176 {
2177 gcc_assert (s_singleton == this);
2178 s_singleton = NULL;
2179
2180 int i;
2181 char *str;
2182 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2183 free (str);
2184 }
2185
2186 void apply (lexer_test &test) FINAL OVERRIDE
2187 {
2188 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2189 callbacks->diagnostic = on_diagnostic;
2190 }
2191
2192 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2193 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2194 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2195 rich_location *richloc ATTRIBUTE_UNUSED,
2196 const char *msgid, va_list *ap)
2197 ATTRIBUTE_FPTR_PRINTF(5,0)
2198 {
2199 char *msg = xvasprintf (msgid, *ap);
2200 s_singleton->m_diagnostics.safe_push (msg);
2201 return true;
2202 }
2203
2204 auto_vec<char *> m_diagnostics;
2205
2206 private:
2207 static lexer_diagnostic_sink *s_singleton;
2208 };
2209
2210 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2211
2212 /* Constructor. Override line_table with a new instance based on CASE_,
2213 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2214 start parsing the tempfile. */
2215
2216 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2217 lexer_test_options *options)
2218 : m_ltt (case_),
2219 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2220 /* Create a tempfile and write the text to it. */
2221 m_tempfile (SELFTEST_LOCATION, ".c", content),
2222 m_concats (),
2223 m_implicitly_expect_EOF (true)
2224 {
2225 if (options)
2226 options->apply (*this);
2227
2228 cpp_init_iconv (m_parser);
2229
2230 /* Parse the file. */
2231 const char *fname = cpp_read_main_file (m_parser,
2232 m_tempfile.get_filename ());
2233 ASSERT_NE (fname, NULL);
2234 }
2235
2236 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2237
2238 lexer_test::~lexer_test ()
2239 {
2240 location_t loc;
2241 const cpp_token *tok;
2242
2243 if (m_implicitly_expect_EOF)
2244 {
2245 tok = cpp_get_token_with_location (m_parser, &loc);
2246 ASSERT_NE (tok, NULL);
2247 ASSERT_EQ (tok->type, CPP_EOF);
2248 }
2249 }
2250
2251 /* Get the next token from m_parser. */
2252
2253 const cpp_token *
2254 lexer_test::get_token ()
2255 {
2256 location_t loc;
2257 const cpp_token *tok;
2258
2259 tok = cpp_get_token_with_location (m_parser, &loc);
2260 ASSERT_NE (tok, NULL);
2261 return tok;
2262 }
2263
2264 /* Verify that locations within string literals are correctly handled. */
2265
2266 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2267 using the string concatenation database for TEST.
2268
2269 Assert that the character at index IDX is on EXPECTED_LINE,
2270 and that it begins at column EXPECTED_START_COL and ends at
2271 EXPECTED_FINISH_COL (unless the locations are beyond
2272 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2273 columns). */
2274
2275 static void
2276 assert_char_at_range (const location &loc,
2277 lexer_test& test,
2278 location_t strloc, enum cpp_ttype type, int idx,
2279 int expected_line, int expected_start_col,
2280 int expected_finish_col)
2281 {
2282 cpp_reader *pfile = test.m_parser;
2283 string_concat_db *concats = &test.m_concats;
2284
2285 source_range actual_range = source_range();
2286 const char *err
2287 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2288 &actual_range);
2289 if (should_have_column_data_p (strloc))
2290 ASSERT_EQ_AT (loc, NULL, err);
2291 else
2292 {
2293 ASSERT_STREQ_AT (loc,
2294 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2295 err);
2296 return;
2297 }
2298
2299 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2300 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2301 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2302 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2303
2304 if (should_have_column_data_p (actual_range.m_start))
2305 {
2306 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2307 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2308 }
2309 if (should_have_column_data_p (actual_range.m_finish))
2310 {
2311 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2312 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2313 }
2314 }
2315
2316 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2317 the effective location of any errors. */
2318
2319 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2320 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2321 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2322 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2323 (EXPECTED_FINISH_COL))
2324
2325 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2326 using the string concatenation database for TEST.
2327
2328 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2329
2330 static void
2331 assert_num_substring_ranges (const location &loc,
2332 lexer_test& test,
2333 location_t strloc,
2334 enum cpp_ttype type,
2335 int expected_num_ranges)
2336 {
2337 cpp_reader *pfile = test.m_parser;
2338 string_concat_db *concats = &test.m_concats;
2339
2340 int actual_num_ranges = -1;
2341 const char *err
2342 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2343 &actual_num_ranges);
2344 if (should_have_column_data_p (strloc))
2345 ASSERT_EQ_AT (loc, NULL, err);
2346 else
2347 {
2348 ASSERT_STREQ_AT (loc,
2349 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2350 err);
2351 return;
2352 }
2353 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2354 }
2355
2356 /* Macro for calling assert_num_substring_ranges, supplying
2357 SELFTEST_LOCATION for the effective location of any errors. */
2358
2359 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2360 EXPECTED_NUM_RANGES) \
2361 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2362 (TYPE), (EXPECTED_NUM_RANGES))
2363
2364
2365 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2366 returns an error (using the string concatenation database for TEST). */
2367
2368 static void
2369 assert_has_no_substring_ranges (const location &loc,
2370 lexer_test& test,
2371 location_t strloc,
2372 enum cpp_ttype type,
2373 const char *expected_err)
2374 {
2375 cpp_reader *pfile = test.m_parser;
2376 string_concat_db *concats = &test.m_concats;
2377 cpp_substring_ranges ranges;
2378 const char *actual_err
2379 = get_substring_ranges_for_loc (pfile, concats, strloc,
2380 type, ranges);
2381 if (should_have_column_data_p (strloc))
2382 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2383 else
2384 ASSERT_STREQ_AT (loc,
2385 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2386 actual_err);
2387 }
2388
2389 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2390 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2391 (STRLOC), (TYPE), (ERR))
2392
2393 /* Lex a simple string literal. Verify the substring location data, before
2394 and after running cpp_interpret_string on it. */
2395
2396 static void
2397 test_lexer_string_locations_simple (const line_table_case &case_)
2398 {
2399 /* Digits 0-9 (with 0 at column 10), the simple way.
2400 ....................000000000.11111111112.2222222223333333333
2401 ....................123456789.01234567890.1234567890123456789
2402 We add a trailing comment to ensure that we correctly locate
2403 the end of the string literal token. */
2404 const char *content = " \"0123456789\" /* not a string */\n";
2405 lexer_test test (case_, content, NULL);
2406
2407 /* Verify that we get the expected token back, with the correct
2408 location information. */
2409 const cpp_token *tok = test.get_token ();
2410 ASSERT_EQ (tok->type, CPP_STRING);
2411 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2412 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2413
2414 /* At this point in lexing, the quote characters are treated as part of
2415 the string (they are stripped off by cpp_interpret_string). */
2416
2417 ASSERT_EQ (tok->val.str.len, 12);
2418
2419 /* Verify that cpp_interpret_string works. */
2420 cpp_string dst_string;
2421 const enum cpp_ttype type = CPP_STRING;
2422 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2423 &dst_string, type);
2424 ASSERT_TRUE (result);
2425 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2426 free (const_cast <unsigned char *> (dst_string.text));
2427
2428 /* Verify ranges of individual characters. This no longer includes the
2429 opening quote, but does include the closing quote. */
2430 for (int i = 0; i <= 10; i++)
2431 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2432 10 + i, 10 + i);
2433
2434 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2435 }
2436
2437 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2438 encoding. */
2439
2440 static void
2441 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2442 {
2443 /* EBCDIC support requires iconv. */
2444 if (!HAVE_ICONV)
2445 return;
2446
2447 /* Digits 0-9 (with 0 at column 10), the simple way.
2448 ....................000000000.11111111112.2222222223333333333
2449 ....................123456789.01234567890.1234567890123456789
2450 We add a trailing comment to ensure that we correctly locate
2451 the end of the string literal token. */
2452 const char *content = " \"0123456789\" /* not a string */\n";
2453 ebcdic_execution_charset use_ebcdic;
2454 lexer_test test (case_, content, &use_ebcdic);
2455
2456 /* Verify that we get the expected token back, with the correct
2457 location information. */
2458 const cpp_token *tok = test.get_token ();
2459 ASSERT_EQ (tok->type, CPP_STRING);
2460 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2461 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2462
2463 /* At this point in lexing, the quote characters are treated as part of
2464 the string (they are stripped off by cpp_interpret_string). */
2465
2466 ASSERT_EQ (tok->val.str.len, 12);
2467
2468 /* The remainder of the test requires an iconv implementation that
2469 can convert from UTF-8 to the EBCDIC encoding requested above. */
2470 if (use_ebcdic.iconv_errors_occurred_p ())
2471 return;
2472
2473 /* Verify that cpp_interpret_string works. */
2474 cpp_string dst_string;
2475 const enum cpp_ttype type = CPP_STRING;
2476 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2477 &dst_string, type);
2478 ASSERT_TRUE (result);
2479 /* We should now have EBCDIC-encoded text, specifically
2480 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2481 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2482 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2483 (const char *)dst_string.text);
2484 free (const_cast <unsigned char *> (dst_string.text));
2485
2486 /* Verify that we don't attempt to record substring location information
2487 for such cases. */
2488 ASSERT_HAS_NO_SUBSTRING_RANGES
2489 (test, tok->src_loc, type,
2490 "execution character set != source character set");
2491 }
2492
2493 /* Lex a string literal containing a hex-escaped character.
2494 Verify the substring location data, before and after running
2495 cpp_interpret_string on it. */
2496
2497 static void
2498 test_lexer_string_locations_hex (const line_table_case &case_)
2499 {
2500 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2501 and with a space in place of digit 6, to terminate the escaped
2502 hex code.
2503 ....................000000000.111111.11112222.
2504 ....................123456789.012345.67890123. */
2505 const char *content = " \"01234\\x35 789\"\n";
2506 lexer_test test (case_, content, NULL);
2507
2508 /* Verify that we get the expected token back, with the correct
2509 location information. */
2510 const cpp_token *tok = test.get_token ();
2511 ASSERT_EQ (tok->type, CPP_STRING);
2512 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2513 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2514
2515 /* At this point in lexing, the quote characters are treated as part of
2516 the string (they are stripped off by cpp_interpret_string). */
2517 ASSERT_EQ (tok->val.str.len, 15);
2518
2519 /* Verify that cpp_interpret_string works. */
2520 cpp_string dst_string;
2521 const enum cpp_ttype type = CPP_STRING;
2522 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2523 &dst_string, type);
2524 ASSERT_TRUE (result);
2525 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2526 free (const_cast <unsigned char *> (dst_string.text));
2527
2528 /* Verify ranges of individual characters. This no longer includes the
2529 opening quote, but does include the closing quote. */
2530 for (int i = 0; i <= 4; i++)
2531 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2532 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2533 for (int i = 6; i <= 10; i++)
2534 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2535
2536 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2537 }
2538
2539 /* Lex a string literal containing an octal-escaped character.
2540 Verify the substring location data after running cpp_interpret_string
2541 on it. */
2542
2543 static void
2544 test_lexer_string_locations_oct (const line_table_case &case_)
2545 {
2546 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2547 and with a space in place of digit 6, to terminate the escaped
2548 octal code.
2549 ....................000000000.111111.11112222.2222223333333333444
2550 ....................123456789.012345.67890123.4567890123456789012 */
2551 const char *content = " \"01234\\065 789\" /* not a string */\n";
2552 lexer_test test (case_, content, NULL);
2553
2554 /* Verify that we get the expected token back, with the correct
2555 location information. */
2556 const cpp_token *tok = test.get_token ();
2557 ASSERT_EQ (tok->type, CPP_STRING);
2558 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2559
2560 /* Verify that cpp_interpret_string works. */
2561 cpp_string dst_string;
2562 const enum cpp_ttype type = CPP_STRING;
2563 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2564 &dst_string, type);
2565 ASSERT_TRUE (result);
2566 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2567 free (const_cast <unsigned char *> (dst_string.text));
2568
2569 /* Verify ranges of individual characters. This no longer includes the
2570 opening quote, but does include the closing quote. */
2571 for (int i = 0; i < 5; i++)
2572 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2573 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2574 for (int i = 6; i <= 10; i++)
2575 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2576
2577 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2578 }
2579
2580 /* Test of string literal containing letter escapes. */
2581
2582 static void
2583 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2584 {
2585 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2586 .....................000000000.1.11111.1.1.11222.22222223333333
2587 .....................123456789.0.12345.6.7.89012.34567890123456. */
2588 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2589 lexer_test test (case_, content, NULL);
2590
2591 /* Verify that we get the expected tokens back. */
2592 const cpp_token *tok = test.get_token ();
2593 ASSERT_EQ (tok->type, CPP_STRING);
2594 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2595
2596 /* Verify ranges of individual characters. */
2597 /* "\t". */
2598 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2599 0, 1, 10, 11);
2600 /* "foo". */
2601 for (int i = 1; i <= 3; i++)
2602 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2603 i, 1, 11 + i, 11 + i);
2604 /* "\\" and "\n". */
2605 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2606 4, 1, 15, 16);
2607 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2608 5, 1, 17, 18);
2609
2610 /* "bar" and closing quote for nul-terminator. */
2611 for (int i = 6; i <= 9; i++)
2612 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2613 i, 1, 13 + i, 13 + i);
2614
2615 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2616 }
2617
2618 /* Another test of a string literal containing a letter escape.
2619 Based on string seen in
2620 printf ("%-%\n");
2621 in gcc.dg/format/c90-printf-1.c. */
2622
2623 static void
2624 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2625 {
2626 /* .....................000000000.1111.11.1111.22222222223.
2627 .....................123456789.0123.45.6789.01234567890. */
2628 const char *content = (" \"%-%\\n\" /* non-str */\n");
2629 lexer_test test (case_, content, NULL);
2630
2631 /* Verify that we get the expected tokens back. */
2632 const cpp_token *tok = test.get_token ();
2633 ASSERT_EQ (tok->type, CPP_STRING);
2634 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2635
2636 /* Verify ranges of individual characters. */
2637 /* "%-%". */
2638 for (int i = 0; i < 3; i++)
2639 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2640 i, 1, 10 + i, 10 + i);
2641 /* "\n". */
2642 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2643 3, 1, 13, 14);
2644
2645 /* Closing quote for nul-terminator. */
2646 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2647 4, 1, 15, 15);
2648
2649 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2650 }
2651
2652 /* Lex a string literal containing UCN 4 characters.
2653 Verify the substring location data after running cpp_interpret_string
2654 on it. */
2655
2656 static void
2657 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2658 {
2659 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2660 as UCN 4.
2661 ....................000000000.111111.111122.222222223.33333333344444
2662 ....................123456789.012345.678901.234567890.12345678901234 */
2663 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2664 lexer_test test (case_, content, NULL);
2665
2666 /* Verify that we get the expected token back, with the correct
2667 location information. */
2668 const cpp_token *tok = test.get_token ();
2669 ASSERT_EQ (tok->type, CPP_STRING);
2670 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2671
2672 /* Verify that cpp_interpret_string works.
2673 The string should be encoded in the execution character
2674 set. Assuming that that is UTF-8, we should have the following:
2675 ----------- ---- ----- ------- ----------------
2676 Byte offset Byte Octal Unicode Source Column(s)
2677 ----------- ---- ----- ------- ----------------
2678 0 0x30 '0' 10
2679 1 0x31 '1' 11
2680 2 0x32 '2' 12
2681 3 0x33 '3' 13
2682 4 0x34 '4' 14
2683 5 0xE2 \342 U+2174 15-20
2684 6 0x85 \205 (cont) 15-20
2685 7 0xB4 \264 (cont) 15-20
2686 8 0xE2 \342 U+2175 21-26
2687 9 0x85 \205 (cont) 21-26
2688 10 0xB5 \265 (cont) 21-26
2689 11 0x37 '7' 27
2690 12 0x38 '8' 28
2691 13 0x39 '9' 29
2692 14 0x00 30 (closing quote)
2693 ----------- ---- ----- ------- ---------------. */
2694
2695 cpp_string dst_string;
2696 const enum cpp_ttype type = CPP_STRING;
2697 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2698 &dst_string, type);
2699 ASSERT_TRUE (result);
2700 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2701 (const char *)dst_string.text);
2702 free (const_cast <unsigned char *> (dst_string.text));
2703
2704 /* Verify ranges of individual characters. This no longer includes the
2705 opening quote, but does include the closing quote.
2706 '01234'. */
2707 for (int i = 0; i <= 4; i++)
2708 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2709 /* U+2174. */
2710 for (int i = 5; i <= 7; i++)
2711 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2712 /* U+2175. */
2713 for (int i = 8; i <= 10; i++)
2714 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2715 /* '789' and nul terminator */
2716 for (int i = 11; i <= 14; i++)
2717 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2718
2719 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2720 }
2721
2722 /* Lex a string literal containing UCN 8 characters.
2723 Verify the substring location data after running cpp_interpret_string
2724 on it. */
2725
2726 static void
2727 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2728 {
2729 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2730 ....................000000000.111111.1111222222.2222333333333.344444
2731 ....................123456789.012345.6789012345.6789012345678.901234 */
2732 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2733 lexer_test test (case_, content, NULL);
2734
2735 /* Verify that we get the expected token back, with the correct
2736 location information. */
2737 const cpp_token *tok = test.get_token ();
2738 ASSERT_EQ (tok->type, CPP_STRING);
2739 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2740 "\"01234\\U00002174\\U00002175789\"");
2741
2742 /* Verify that cpp_interpret_string works.
2743 The UTF-8 encoding of the string is identical to that from
2744 the ucn4 testcase above; the only difference is the column
2745 locations. */
2746 cpp_string dst_string;
2747 const enum cpp_ttype type = CPP_STRING;
2748 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2749 &dst_string, type);
2750 ASSERT_TRUE (result);
2751 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2752 (const char *)dst_string.text);
2753 free (const_cast <unsigned char *> (dst_string.text));
2754
2755 /* Verify ranges of individual characters. This no longer includes the
2756 opening quote, but does include the closing quote.
2757 '01234'. */
2758 for (int i = 0; i <= 4; i++)
2759 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2760 /* U+2174. */
2761 for (int i = 5; i <= 7; i++)
2762 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2763 /* U+2175. */
2764 for (int i = 8; i <= 10; i++)
2765 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2766 /* '789' at columns 35-37 */
2767 for (int i = 11; i <= 13; i++)
2768 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2769 /* Closing quote/nul-terminator at column 38. */
2770 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2771
2772 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2773 }
2774
2775 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2776
2777 static uint32_t
2778 uint32_from_big_endian (const uint32_t *ptr_be_value)
2779 {
2780 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2781 return (((uint32_t) buf[0] << 24)
2782 | ((uint32_t) buf[1] << 16)
2783 | ((uint32_t) buf[2] << 8)
2784 | (uint32_t) buf[3]);
2785 }
2786
2787 /* Lex a wide string literal and verify that attempts to read substring
2788 location data from it fail gracefully. */
2789
2790 static void
2791 test_lexer_string_locations_wide_string (const line_table_case &case_)
2792 {
2793 /* Digits 0-9.
2794 ....................000000000.11111111112.22222222233333
2795 ....................123456789.01234567890.12345678901234 */
2796 const char *content = " L\"0123456789\" /* non-str */\n";
2797 lexer_test test (case_, content, NULL);
2798
2799 /* Verify that we get the expected token back, with the correct
2800 location information. */
2801 const cpp_token *tok = test.get_token ();
2802 ASSERT_EQ (tok->type, CPP_WSTRING);
2803 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2804
2805 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2806 cpp_string dst_string;
2807 const enum cpp_ttype type = CPP_WSTRING;
2808 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2809 &dst_string, type);
2810 ASSERT_TRUE (result);
2811 /* The cpp_reader defaults to big-endian with
2812 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2813 now be encoded as UTF-32BE. */
2814 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2815 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2816 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2817 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2818 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2819 free (const_cast <unsigned char *> (dst_string.text));
2820
2821 /* We don't yet support generating substring location information
2822 for L"" strings. */
2823 ASSERT_HAS_NO_SUBSTRING_RANGES
2824 (test, tok->src_loc, type,
2825 "execution character set != source character set");
2826 }
2827
2828 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2829
2830 static uint16_t
2831 uint16_from_big_endian (const uint16_t *ptr_be_value)
2832 {
2833 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2834 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2835 }
2836
2837 /* Lex a u"" string literal and verify that attempts to read substring
2838 location data from it fail gracefully. */
2839
2840 static void
2841 test_lexer_string_locations_string16 (const line_table_case &case_)
2842 {
2843 /* Digits 0-9.
2844 ....................000000000.11111111112.22222222233333
2845 ....................123456789.01234567890.12345678901234 */
2846 const char *content = " u\"0123456789\" /* non-str */\n";
2847 lexer_test test (case_, content, NULL);
2848
2849 /* Verify that we get the expected token back, with the correct
2850 location information. */
2851 const cpp_token *tok = test.get_token ();
2852 ASSERT_EQ (tok->type, CPP_STRING16);
2853 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2854
2855 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2856 cpp_string dst_string;
2857 const enum cpp_ttype type = CPP_STRING16;
2858 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2859 &dst_string, type);
2860 ASSERT_TRUE (result);
2861
2862 /* The cpp_reader defaults to big-endian, so dst_string should
2863 now be encoded as UTF-16BE. */
2864 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2865 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2866 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2867 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2868 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2869 free (const_cast <unsigned char *> (dst_string.text));
2870
2871 /* We don't yet support generating substring location information
2872 for L"" strings. */
2873 ASSERT_HAS_NO_SUBSTRING_RANGES
2874 (test, tok->src_loc, type,
2875 "execution character set != source character set");
2876 }
2877
2878 /* Lex a U"" string literal and verify that attempts to read substring
2879 location data from it fail gracefully. */
2880
2881 static void
2882 test_lexer_string_locations_string32 (const line_table_case &case_)
2883 {
2884 /* Digits 0-9.
2885 ....................000000000.11111111112.22222222233333
2886 ....................123456789.01234567890.12345678901234 */
2887 const char *content = " U\"0123456789\" /* non-str */\n";
2888 lexer_test test (case_, content, NULL);
2889
2890 /* Verify that we get the expected token back, with the correct
2891 location information. */
2892 const cpp_token *tok = test.get_token ();
2893 ASSERT_EQ (tok->type, CPP_STRING32);
2894 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2895
2896 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2897 cpp_string dst_string;
2898 const enum cpp_ttype type = CPP_STRING32;
2899 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2900 &dst_string, type);
2901 ASSERT_TRUE (result);
2902
2903 /* The cpp_reader defaults to big-endian, so dst_string should
2904 now be encoded as UTF-32BE. */
2905 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2906 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2907 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2908 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2909 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2910 free (const_cast <unsigned char *> (dst_string.text));
2911
2912 /* We don't yet support generating substring location information
2913 for L"" strings. */
2914 ASSERT_HAS_NO_SUBSTRING_RANGES
2915 (test, tok->src_loc, type,
2916 "execution character set != source character set");
2917 }
2918
2919 /* Lex a u8-string literal.
2920 Verify the substring location data after running cpp_interpret_string
2921 on it. */
2922
2923 static void
2924 test_lexer_string_locations_u8 (const line_table_case &case_)
2925 {
2926 /* Digits 0-9.
2927 ....................000000000.11111111112.22222222233333
2928 ....................123456789.01234567890.12345678901234 */
2929 const char *content = " u8\"0123456789\" /* non-str */\n";
2930 lexer_test test (case_, content, NULL);
2931
2932 /* Verify that we get the expected token back, with the correct
2933 location information. */
2934 const cpp_token *tok = test.get_token ();
2935 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2936 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2937
2938 /* Verify that cpp_interpret_string works. */
2939 cpp_string dst_string;
2940 const enum cpp_ttype type = CPP_STRING;
2941 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2942 &dst_string, type);
2943 ASSERT_TRUE (result);
2944 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2945 free (const_cast <unsigned char *> (dst_string.text));
2946
2947 /* Verify ranges of individual characters. This no longer includes the
2948 opening quote, but does include the closing quote. */
2949 for (int i = 0; i <= 10; i++)
2950 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2951 }
2952
2953 /* Lex a string literal containing UTF-8 source characters.
2954 Verify the substring location data after running cpp_interpret_string
2955 on it. */
2956
2957 static void
2958 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2959 {
2960 /* This string literal is written out to the source file as UTF-8,
2961 and is of the form "before mojibake after", where "mojibake"
2962 is written as the following four unicode code points:
2963 U+6587 CJK UNIFIED IDEOGRAPH-6587
2964 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2965 U+5316 CJK UNIFIED IDEOGRAPH-5316
2966 U+3051 HIRAGANA LETTER KE.
2967 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2968 "before" and "after" are 1 byte per unicode character.
2969
2970 The numbering shown are "columns", which are *byte* numbers within
2971 the line, rather than unicode character numbers.
2972
2973 .................... 000000000.1111111.
2974 .................... 123456789.0123456. */
2975 const char *content = (" \"before "
2976 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2977 UTF-8: 0xE6 0x96 0x87
2978 C octal escaped UTF-8: \346\226\207
2979 "column" numbers: 17-19. */
2980 "\346\226\207"
2981
2982 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2983 UTF-8: 0xE5 0xAD 0x97
2984 C octal escaped UTF-8: \345\255\227
2985 "column" numbers: 20-22. */
2986 "\345\255\227"
2987
2988 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2989 UTF-8: 0xE5 0x8C 0x96
2990 C octal escaped UTF-8: \345\214\226
2991 "column" numbers: 23-25. */
2992 "\345\214\226"
2993
2994 /* U+3051 HIRAGANA LETTER KE
2995 UTF-8: 0xE3 0x81 0x91
2996 C octal escaped UTF-8: \343\201\221
2997 "column" numbers: 26-28. */
2998 "\343\201\221"
2999
3000 /* column numbers 29 onwards
3001 2333333.33334444444444
3002 9012345.67890123456789. */
3003 " after\" /* non-str */\n");
3004 lexer_test test (case_, content, NULL);
3005
3006 /* Verify that we get the expected token back, with the correct
3007 location information. */
3008 const cpp_token *tok = test.get_token ();
3009 ASSERT_EQ (tok->type, CPP_STRING);
3010 ASSERT_TOKEN_AS_TEXT_EQ
3011 (test.m_parser, tok,
3012 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3013
3014 /* Verify that cpp_interpret_string works. */
3015 cpp_string dst_string;
3016 const enum cpp_ttype type = CPP_STRING;
3017 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3018 &dst_string, type);
3019 ASSERT_TRUE (result);
3020 ASSERT_STREQ
3021 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3022 (const char *)dst_string.text);
3023 free (const_cast <unsigned char *> (dst_string.text));
3024
3025 /* Verify ranges of individual characters. This no longer includes the
3026 opening quote, but does include the closing quote.
3027 Assuming that both source and execution encodings are UTF-8, we have
3028 a run of 25 octets in each, plus the NUL terminator. */
3029 for (int i = 0; i < 25; i++)
3030 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3031 /* NUL-terminator should use the closing quote at column 35. */
3032 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3033
3034 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3035 }
3036
3037 /* Test of string literal concatenation. */
3038
3039 static void
3040 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3041 {
3042 /* Digits 0-9.
3043 .....................000000000.111111.11112222222222
3044 .....................123456789.012345.67890123456789. */
3045 const char *content = (" \"01234\" /* non-str */\n"
3046 " \"56789\" /* non-str */\n");
3047 lexer_test test (case_, content, NULL);
3048
3049 location_t input_locs[2];
3050
3051 /* Verify that we get the expected tokens back. */
3052 auto_vec <cpp_string> input_strings;
3053 const cpp_token *tok_a = test.get_token ();
3054 ASSERT_EQ (tok_a->type, CPP_STRING);
3055 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3056 input_strings.safe_push (tok_a->val.str);
3057 input_locs[0] = tok_a->src_loc;
3058
3059 const cpp_token *tok_b = test.get_token ();
3060 ASSERT_EQ (tok_b->type, CPP_STRING);
3061 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3062 input_strings.safe_push (tok_b->val.str);
3063 input_locs[1] = tok_b->src_loc;
3064
3065 /* Verify that cpp_interpret_string works. */
3066 cpp_string dst_string;
3067 const enum cpp_ttype type = CPP_STRING;
3068 bool result = cpp_interpret_string (test.m_parser,
3069 input_strings.address (), 2,
3070 &dst_string, type);
3071 ASSERT_TRUE (result);
3072 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3073 free (const_cast <unsigned char *> (dst_string.text));
3074
3075 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3076 test.m_concats.record_string_concatenation (2, input_locs);
3077
3078 location_t initial_loc = input_locs[0];
3079
3080 /* "01234" on line 1. */
3081 for (int i = 0; i <= 4; i++)
3082 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3083 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3084 for (int i = 5; i <= 10; i++)
3085 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3086
3087 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3088 }
3089
3090 /* Another test of string literal concatenation. */
3091
3092 static void
3093 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3094 {
3095 /* Digits 0-9.
3096 .....................000000000.111.11111112222222
3097 .....................123456789.012.34567890123456. */
3098 const char *content = (" \"01\" /* non-str */\n"
3099 " \"23\" /* non-str */\n"
3100 " \"45\" /* non-str */\n"
3101 " \"67\" /* non-str */\n"
3102 " \"89\" /* non-str */\n");
3103 lexer_test test (case_, content, NULL);
3104
3105 auto_vec <cpp_string> input_strings;
3106 location_t input_locs[5];
3107
3108 /* Verify that we get the expected tokens back. */
3109 for (int i = 0; i < 5; i++)
3110 {
3111 const cpp_token *tok = test.get_token ();
3112 ASSERT_EQ (tok->type, CPP_STRING);
3113 input_strings.safe_push (tok->val.str);
3114 input_locs[i] = tok->src_loc;
3115 }
3116
3117 /* Verify that cpp_interpret_string works. */
3118 cpp_string dst_string;
3119 const enum cpp_ttype type = CPP_STRING;
3120 bool result = cpp_interpret_string (test.m_parser,
3121 input_strings.address (), 5,
3122 &dst_string, type);
3123 ASSERT_TRUE (result);
3124 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3125 free (const_cast <unsigned char *> (dst_string.text));
3126
3127 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3128 test.m_concats.record_string_concatenation (5, input_locs);
3129
3130 location_t initial_loc = input_locs[0];
3131
3132 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3133 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3134 and expect get_source_range_for_substring to fail.
3135 However, for a string concatenation test, we can have a case
3136 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3137 but subsequent strings can be after it.
3138 Attempting to detect this within assert_char_at_range
3139 would overcomplicate the logic for the common test cases, so
3140 we detect it here. */
3141 if (should_have_column_data_p (input_locs[0])
3142 && !should_have_column_data_p (input_locs[4]))
3143 {
3144 /* Verify that get_source_range_for_substring gracefully rejects
3145 this case. */
3146 source_range actual_range;
3147 const char *err
3148 = get_source_range_for_char (test.m_parser, &test.m_concats,
3149 initial_loc, type, 0, &actual_range);
3150 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3151 return;
3152 }
3153
3154 for (int i = 0; i < 5; i++)
3155 for (int j = 0; j < 2; j++)
3156 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3157 i + 1, 10 + j, 10 + j);
3158
3159 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3160 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3161
3162 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3163 }
3164
3165 /* Another test of string literal concatenation, this time combined with
3166 various kinds of escaped characters. */
3167
3168 static void
3169 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3170 {
3171 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3172 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3173 const char *content
3174 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3175 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3176 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3177 lexer_test test (case_, content, NULL);
3178
3179 auto_vec <cpp_string> input_strings;
3180 location_t input_locs[4];
3181
3182 /* Verify that we get the expected tokens back. */
3183 for (int i = 0; i < 4; i++)
3184 {
3185 const cpp_token *tok = test.get_token ();
3186 ASSERT_EQ (tok->type, CPP_STRING);
3187 input_strings.safe_push (tok->val.str);
3188 input_locs[i] = tok->src_loc;
3189 }
3190
3191 /* Verify that cpp_interpret_string works. */
3192 cpp_string dst_string;
3193 const enum cpp_ttype type = CPP_STRING;
3194 bool result = cpp_interpret_string (test.m_parser,
3195 input_strings.address (), 4,
3196 &dst_string, type);
3197 ASSERT_TRUE (result);
3198 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3199 free (const_cast <unsigned char *> (dst_string.text));
3200
3201 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3202 test.m_concats.record_string_concatenation (4, input_locs);
3203
3204 location_t initial_loc = input_locs[0];
3205
3206 for (int i = 0; i <= 4; i++)
3207 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3208 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3209 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3210 for (int i = 7; i <= 9; i++)
3211 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3212
3213 /* NUL-terminator should use the location of the final closing quote. */
3214 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3215
3216 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3217 }
3218
3219 /* Test of string literal in a macro. */
3220
3221 static void
3222 test_lexer_string_locations_macro (const line_table_case &case_)
3223 {
3224 /* Digits 0-9.
3225 .....................0000000001111111111.22222222223.
3226 .....................1234567890123456789.01234567890. */
3227 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3228 " MACRO");
3229 lexer_test test (case_, content, NULL);
3230
3231 /* Verify that we get the expected tokens back. */
3232 const cpp_token *tok = test.get_token ();
3233 ASSERT_EQ (tok->type, CPP_PADDING);
3234
3235 tok = test.get_token ();
3236 ASSERT_EQ (tok->type, CPP_STRING);
3237 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3238
3239 /* Verify ranges of individual characters. We ought to
3240 see columns within the macro definition. */
3241 for (int i = 0; i <= 10; i++)
3242 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3243 i, 1, 20 + i, 20 + i);
3244
3245 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3246
3247 tok = test.get_token ();
3248 ASSERT_EQ (tok->type, CPP_PADDING);
3249 }
3250
3251 /* Test of stringification of a macro argument. */
3252
3253 static void
3254 test_lexer_string_locations_stringified_macro_argument
3255 (const line_table_case &case_)
3256 {
3257 /* .....................000000000111111111122222222223.
3258 .....................123456789012345678901234567890. */
3259 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3260 "MACRO(foo)\n");
3261 lexer_test test (case_, content, NULL);
3262
3263 /* Verify that we get the expected token back. */
3264 const cpp_token *tok = test.get_token ();
3265 ASSERT_EQ (tok->type, CPP_PADDING);
3266
3267 tok = test.get_token ();
3268 ASSERT_EQ (tok->type, CPP_STRING);
3269 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3270
3271 /* We don't support getting the location of a stringified macro
3272 argument. Verify that it fails gracefully. */
3273 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3274 "cpp_interpret_string_1 failed");
3275
3276 tok = test.get_token ();
3277 ASSERT_EQ (tok->type, CPP_PADDING);
3278
3279 tok = test.get_token ();
3280 ASSERT_EQ (tok->type, CPP_PADDING);
3281 }
3282
3283 /* Ensure that we are fail gracefully if something attempts to pass
3284 in a location that isn't a string literal token. Seen on this code:
3285
3286 const char a[] = " %d ";
3287 __builtin_printf (a, 0.5);
3288 ^
3289
3290 when c-format.c erroneously used the indicated one-character
3291 location as the format string location, leading to a read past the
3292 end of a string buffer in cpp_interpret_string_1. */
3293
3294 static void
3295 test_lexer_string_locations_non_string (const line_table_case &case_)
3296 {
3297 /* .....................000000000111111111122222222223.
3298 .....................123456789012345678901234567890. */
3299 const char *content = (" a\n");
3300 lexer_test test (case_, content, NULL);
3301
3302 /* Verify that we get the expected token back. */
3303 const cpp_token *tok = test.get_token ();
3304 ASSERT_EQ (tok->type, CPP_NAME);
3305 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3306
3307 /* At this point, libcpp is attempting to interpret the name as a
3308 string literal, despite it not starting with a quote. We don't detect
3309 that, but we should at least fail gracefully. */
3310 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3311 "cpp_interpret_string_1 failed");
3312 }
3313
3314 /* Ensure that we can read substring information for a token which
3315 starts in one linemap and ends in another . Adapted from
3316 gcc.dg/cpp/pr69985.c. */
3317
3318 static void
3319 test_lexer_string_locations_long_line (const line_table_case &case_)
3320 {
3321 /* .....................000000.000111111111
3322 .....................123456.789012346789. */
3323 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3324 " \"0123456789012345678901234567890123456789"
3325 "0123456789012345678901234567890123456789"
3326 "0123456789012345678901234567890123456789"
3327 "0123456789\"\n");
3328
3329 lexer_test test (case_, content, NULL);
3330
3331 /* Verify that we get the expected token back. */
3332 const cpp_token *tok = test.get_token ();
3333 ASSERT_EQ (tok->type, CPP_STRING);
3334
3335 if (!should_have_column_data_p (line_table->highest_location))
3336 return;
3337
3338 /* Verify ranges of individual characters. */
3339 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3340 for (int i = 0; i < 131; i++)
3341 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3342 i, 2, 7 + i, 7 + i);
3343 }
3344
3345 /* Test of locations within a raw string that doesn't contain a newline. */
3346
3347 static void
3348 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3349 {
3350 /* .....................00.0000000111111111122.
3351 .....................12.3456789012345678901. */
3352 const char *content = ("R\"foo(0123456789)foo\"\n");
3353 lexer_test test (case_, content, NULL);
3354
3355 /* Verify that we get the expected token back. */
3356 const cpp_token *tok = test.get_token ();
3357 ASSERT_EQ (tok->type, CPP_STRING);
3358
3359 /* Verify that cpp_interpret_string works. */
3360 cpp_string dst_string;
3361 const enum cpp_ttype type = CPP_STRING;
3362 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3363 &dst_string, type);
3364 ASSERT_TRUE (result);
3365 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3366 free (const_cast <unsigned char *> (dst_string.text));
3367
3368 if (!should_have_column_data_p (line_table->highest_location))
3369 return;
3370
3371 /* 0-9, plus the nil terminator. */
3372 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3373 for (int i = 0; i < 11; i++)
3374 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3375 i, 1, 7 + i, 7 + i);
3376 }
3377
3378 /* Test of locations within a raw string that contains a newline. */
3379
3380 static void
3381 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3382 {
3383 /* .....................00.0000.
3384 .....................12.3456. */
3385 const char *content = ("R\"foo(\n"
3386 /* .....................00000.
3387 .....................12345. */
3388 "hello\n"
3389 "world\n"
3390 /* .....................00000.
3391 .....................12345. */
3392 ")foo\"\n");
3393 lexer_test test (case_, content, NULL);
3394
3395 /* Verify that we get the expected token back. */
3396 const cpp_token *tok = test.get_token ();
3397 ASSERT_EQ (tok->type, CPP_STRING);
3398
3399 /* Verify that cpp_interpret_string works. */
3400 cpp_string dst_string;
3401 const enum cpp_ttype type = CPP_STRING;
3402 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3403 &dst_string, type);
3404 ASSERT_TRUE (result);
3405 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3406 free (const_cast <unsigned char *> (dst_string.text));
3407
3408 if (!should_have_column_data_p (line_table->highest_location))
3409 return;
3410
3411 /* Currently we don't support locations within raw strings that
3412 contain newlines. */
3413 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3414 "range endpoints are on different lines");
3415 }
3416
3417 /* Test of parsing an unterminated raw string. */
3418
3419 static void
3420 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3421 {
3422 const char *content = "R\"ouch()ouCh\" /* etc */";
3423
3424 lexer_diagnostic_sink diagnostics;
3425 lexer_test test (case_, content, &diagnostics);
3426 test.m_implicitly_expect_EOF = false;
3427
3428 /* Attempt to parse the raw string. */
3429 const cpp_token *tok = test.get_token ();
3430 ASSERT_EQ (tok->type, CPP_EOF);
3431
3432 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3433 /* We expect the message "unterminated raw string"
3434 in the "cpplib" translation domain.
3435 It's not clear that dgettext is available on all supported hosts,
3436 so this assertion is commented-out for now.
3437 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3438 diagnostics.m_diagnostics[0]);
3439 */
3440 }
3441
3442 /* Test of lexing char constants. */
3443
3444 static void
3445 test_lexer_char_constants (const line_table_case &case_)
3446 {
3447 /* Various char constants.
3448 .....................0000000001111111111.22222222223.
3449 .....................1234567890123456789.01234567890. */
3450 const char *content = (" 'a'\n"
3451 " u'a'\n"
3452 " U'a'\n"
3453 " L'a'\n"
3454 " 'abc'\n");
3455 lexer_test test (case_, content, NULL);
3456
3457 /* Verify that we get the expected tokens back. */
3458 /* 'a'. */
3459 const cpp_token *tok = test.get_token ();
3460 ASSERT_EQ (tok->type, CPP_CHAR);
3461 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3462
3463 unsigned int chars_seen;
3464 int unsignedp;
3465 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3466 &chars_seen, &unsignedp);
3467 ASSERT_EQ (cc, 'a');
3468 ASSERT_EQ (chars_seen, 1);
3469
3470 /* u'a'. */
3471 tok = test.get_token ();
3472 ASSERT_EQ (tok->type, CPP_CHAR16);
3473 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3474
3475 /* U'a'. */
3476 tok = test.get_token ();
3477 ASSERT_EQ (tok->type, CPP_CHAR32);
3478 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3479
3480 /* L'a'. */
3481 tok = test.get_token ();
3482 ASSERT_EQ (tok->type, CPP_WCHAR);
3483 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3484
3485 /* 'abc' (c-char-sequence). */
3486 tok = test.get_token ();
3487 ASSERT_EQ (tok->type, CPP_CHAR);
3488 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3489 }
3490 /* A table of interesting location_t values, giving one axis of our test
3491 matrix. */
3492
3493 static const location_t boundary_locations[] = {
3494 /* Zero means "don't override the default values for a new line_table". */
3495 0,
3496
3497 /* An arbitrary non-zero value that isn't close to one of
3498 the boundary values below. */
3499 0x10000,
3500
3501 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3502 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3503 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3504 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3505 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3506 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3507
3508 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3509 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3510 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3511 LINE_MAP_MAX_LOCATION_WITH_COLS,
3512 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3513 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3514 };
3515
3516 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3517
3518 void
3519 for_each_line_table_case (void (*testcase) (const line_table_case &))
3520 {
3521 /* As noted above in the description of struct line_table_case,
3522 we want to explore a test matrix of interesting line_table
3523 situations, running various selftests for each case within the
3524 matrix. */
3525
3526 /* Run all tests with:
3527 (a) line_table->default_range_bits == 0, and
3528 (b) line_table->default_range_bits == 5. */
3529 int num_cases_tested = 0;
3530 for (int default_range_bits = 0; default_range_bits <= 5;
3531 default_range_bits += 5)
3532 {
3533 /* ...and use each of the "interesting" location values as
3534 the starting location within line_table. */
3535 const int num_boundary_locations
3536 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3537 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3538 {
3539 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3540
3541 testcase (c);
3542
3543 num_cases_tested++;
3544 }
3545 }
3546
3547 /* Verify that we fully covered the test matrix. */
3548 ASSERT_EQ (num_cases_tested, 2 * 12);
3549 }
3550
3551 /* Run all of the selftests within this file. */
3552
3553 void
3554 input_c_tests ()
3555 {
3556 test_linenum_comparisons ();
3557 test_should_have_column_data_p ();
3558 test_unknown_location ();
3559 test_builtins ();
3560 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3561
3562 for_each_line_table_case (test_accessing_ordinary_linemaps);
3563 for_each_line_table_case (test_lexer);
3564 for_each_line_table_case (test_lexer_string_locations_simple);
3565 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3566 for_each_line_table_case (test_lexer_string_locations_hex);
3567 for_each_line_table_case (test_lexer_string_locations_oct);
3568 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3569 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3570 for_each_line_table_case (test_lexer_string_locations_ucn4);
3571 for_each_line_table_case (test_lexer_string_locations_ucn8);
3572 for_each_line_table_case (test_lexer_string_locations_wide_string);
3573 for_each_line_table_case (test_lexer_string_locations_string16);
3574 for_each_line_table_case (test_lexer_string_locations_string32);
3575 for_each_line_table_case (test_lexer_string_locations_u8);
3576 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3577 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3578 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3579 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3580 for_each_line_table_case (test_lexer_string_locations_macro);
3581 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3582 for_each_line_table_case (test_lexer_string_locations_non_string);
3583 for_each_line_table_case (test_lexer_string_locations_long_line);
3584 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3585 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3586 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3587 for_each_line_table_case (test_lexer_char_constants);
3588
3589 test_reading_source_line ();
3590 }
3591
3592 } // namespace selftest
3593
3594 #endif /* CHECKING_P */