Convert cpplib to use libiberty/hashtab.c.
[gcc.git] / gcc / cppfiles.c
1 /* Part of CPP library. (include file handling)
2 Copyright (C) 1986, 1987, 1989, 1992, 1993, 1994, 1995, 1998,
3 1999, 2000 Free Software Foundation, Inc.
4 Written by Per Bothner, 1994.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Split out of cpplib.c, Zack Weinberg, Oct 1998
8
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22
23 In other words, you are welcome to use, share and improve this program.
24 You are forbidden to forbid anyone else to use, share and improve
25 what you give them. Help stamp out software-hoarding! */
26
27 #include "config.h"
28 #include "system.h"
29 #include "cpplib.h"
30 #include "cpphash.h"
31 #include "hashtab.h"
32 #include "intl.h"
33
34 static IHASH *redundant_include_p PARAMS ((cpp_reader *, IHASH *,
35 struct file_name_list *));
36 static struct file_name_map *read_name_map
37 PARAMS ((cpp_reader *, const char *));
38 static char *read_filename_string PARAMS ((int, FILE *));
39 static char *remap_filename PARAMS ((cpp_reader *, char *,
40 struct file_name_list *));
41 static long read_and_prescan PARAMS ((cpp_reader *, cpp_buffer *,
42 int, size_t));
43 static struct file_name_list *actual_directory
44 PARAMS ((cpp_reader *, const char *));
45
46 static unsigned int hash_IHASH PARAMS ((const void *));
47 static int eq_IHASH PARAMS ((const void *, const void *));
48
49 static void init_input_buffer PARAMS ((cpp_reader *, int, struct stat *));
50 static int file_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
51 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
52
53 #if 0
54 static void hack_vms_include_specification PARAMS ((char *));
55 #endif
56
57 /* Initial size of include hash table. */
58 #define IHASHSIZE 50
59
60 #ifndef INCLUDE_LEN_FUDGE
61 #define INCLUDE_LEN_FUDGE 0
62 #endif
63
64 /* Open files in nonblocking mode, so we don't get stuck if someone
65 clever has asked cpp to process /dev/rmt0. _cpp_read_include_file
66 will check that we have a real file to work with. Also take care
67 not to acquire a controlling terminal by mistake (this can't happen
68 on sane systems, but paranoia is a virtue). */
69 #define OMODES O_RDONLY|O_NONBLOCK|O_NOCTTY
70
71 /* Calculate hash of an IHASH entry. */
72 static unsigned int
73 hash_IHASH (x)
74 const void *x;
75 {
76 IHASH *i = (IHASH *)x;
77 unsigned int r = 0, len = 0;
78 const U_CHAR *s = i->nshort;
79
80 if (i->hash != (unsigned long)-1)
81 return i->hash;
82
83 do
84 len++, r = r * 67 + (*s++ - 113);
85 while (*s && *s != '.');
86 i->hash = r + len;
87 return r + len;
88 }
89
90 /* Compare an existing IHASH structure with a potential one. */
91 static int
92 eq_IHASH (x, y)
93 const void *x;
94 const void *y;
95 {
96 const U_CHAR *a = ((const IHASH *)x)->nshort;
97 const U_CHAR *b = ((const IHASH *)y)->nshort;
98 return !strcmp (a, b);
99 }
100
101 /* Init the hash table. In here so it can see the hash and eq functions. */
102 void
103 _cpp_init_include_hash (pfile)
104 cpp_reader *pfile;
105 {
106 pfile->all_include_files
107 = htab_create (IHASHSIZE, hash_IHASH, eq_IHASH, free);
108 }
109
110 /* Return 0 if the file pointed to by IHASH has never been included before,
111 -1 if it has been included before and need not be again,
112 or a pointer to an IHASH entry which is the file to be reread.
113 "Never before" is with respect to the position in ILIST.
114
115 This will not detect redundancies involving odd uses of the
116 `current directory' rule for "" includes. They aren't quite
117 pathological, but I think they are rare enough not to worry about.
118 The simplest example is:
119
120 top.c:
121 #include "a/a.h"
122 #include "b/b.h"
123
124 a/a.h:
125 #include "../b/b.h"
126
127 and the problem is that for `current directory' includes,
128 ihash->foundhere is not on any of the global include chains,
129 so the test below (i->foundhere == l) may be false even when
130 the directories are in fact the same. */
131
132 static IHASH *
133 redundant_include_p (pfile, ihash, ilist)
134 cpp_reader *pfile;
135 IHASH *ihash;
136 struct file_name_list *ilist;
137 {
138 struct file_name_list *l;
139 IHASH *i;
140
141 if (! ihash->foundhere)
142 return 0;
143
144 for (i = ihash; i; i = i->next_this_file)
145 for (l = ilist; l; l = l->next)
146 if (i->foundhere == l)
147 /* The control_macro works like this: If it's NULL, the file
148 is to be included again. If it's "", the file is never to
149 be included again. If it's a string, the file is not to be
150 included again if the string is the name of a defined macro. */
151 return (i->control_macro
152 && (i->control_macro[0] == '\0'
153 || cpp_defined (pfile, i->control_macro, -1)))
154 ? (IHASH *)-1 : i;
155
156 return 0;
157 }
158
159 /* Return 1 if the file named by FNAME has been included before in
160 any context, 0 otherwise. */
161 int
162 cpp_included (pfile, fname)
163 cpp_reader *pfile;
164 const char *fname;
165 {
166 IHASH dummy, *ptr;
167 dummy.nshort = fname;
168 dummy.hash = -1;
169 ptr = htab_find (pfile->all_include_files, (const void *)&dummy);
170 return (ptr != NULL);
171 }
172
173 static int
174 file_cleanup (pbuf, pfile)
175 cpp_buffer *pbuf;
176 cpp_reader *pfile;
177 {
178 if (pbuf->buf)
179 free ((PTR) pbuf->buf);
180 if (pfile->system_include_depth)
181 pfile->system_include_depth--;
182 return 0;
183 }
184
185 /* Search for include file FNAME in the include chain starting at
186 SEARCH_START. Return -2 if this file doesn't need to be included
187 (because it was included already and it's marked idempotent),
188 -1 if an error occurred, or a file descriptor open on the file.
189 *IHASH is set to point to the include hash entry for this file, and
190 *BEFORE is set to 1 if the file was included before (but needs to be read
191 again). */
192 int
193 _cpp_find_include_file (pfile, fname, search_start, ihash, before)
194 cpp_reader *pfile;
195 const char *fname;
196 struct file_name_list *search_start;
197 IHASH **ihash;
198 int *before;
199 {
200 struct file_name_list *path;
201 IHASH *ih, **slot;
202 IHASH dummy;
203 int f;
204 char *name;
205
206 dummy.hash = -1;
207 dummy.nshort = fname;
208 path = (fname[0] == '/') ? ABSOLUTE_PATH : search_start;
209 slot = (IHASH **) htab_find_slot (pfile->all_include_files,
210 (const void *)&dummy, 1);
211
212 if (*slot && (ih = redundant_include_p (pfile, *slot, path)))
213 {
214 if (ih == (IHASH *)-1)
215 return -2;
216
217 *before = 1;
218 *ihash = ih;
219 return open (ih->name, OMODES);
220 }
221
222 if (path == ABSOLUTE_PATH)
223 {
224 name = (char *) fname;
225 f = open (name, OMODES);
226 }
227 else
228 {
229 /* Search directory path, trying to open the file. */
230 name = alloca (strlen (fname) + pfile->max_include_len
231 + 2 + INCLUDE_LEN_FUDGE);
232 do
233 {
234 memcpy (name, path->name, path->nlen);
235 name[path->nlen] = '/';
236 strcpy (&name[path->nlen+1], fname);
237 _cpp_simplify_pathname (name);
238 if (CPP_OPTIONS (pfile)->remap)
239 name = remap_filename (pfile, name, path);
240
241 f = open (name, OMODES);
242 #ifdef EACCES
243 if (f == -1 && errno == EACCES)
244 {
245 cpp_error (pfile,
246 "included file `%s' exists but is not readable",
247 name);
248 return -1;
249 }
250 #endif
251 if (f >= 0)
252 break;
253 path = path->next;
254 }
255 while (path);
256 }
257 if (f == -1)
258 return -1;
259
260 ih = (IHASH *) xmalloc (sizeof (IHASH) + strlen (name));
261 strcpy ((char *)ih->name, name);
262 ih->foundhere = path;
263 if (path == ABSOLUTE_PATH)
264 ih->nshort = ih->name;
265 else
266 ih->nshort = strstr (ih->name, fname);
267 ih->control_macro = NULL;
268 ih->hash = dummy.hash;
269
270 ih->next_this_file = *slot;
271 *slot = ih;
272
273 *before = 0;
274 *ihash = ih;
275 return f;
276 }
277
278 /* The file_name_map structure holds a mapping of file names for a
279 particular directory. This mapping is read from the file named
280 FILE_NAME_MAP_FILE in that directory. Such a file can be used to
281 map filenames on a file system with severe filename restrictions,
282 such as DOS. The format of the file name map file is just a series
283 of lines with two tokens on each line. The first token is the name
284 to map, and the second token is the actual name to use. */
285
286 struct file_name_map
287 {
288 struct file_name_map *map_next;
289 char *map_from;
290 char *map_to;
291 };
292
293 #define FILE_NAME_MAP_FILE "header.gcc"
294
295 /* Read a space delimited string of unlimited length from a stdio
296 file. */
297
298 static char *
299 read_filename_string (ch, f)
300 int ch;
301 FILE *f;
302 {
303 char *alloc, *set;
304 int len;
305
306 len = 20;
307 set = alloc = xmalloc (len + 1);
308 if (! is_space(ch))
309 {
310 *set++ = ch;
311 while ((ch = getc (f)) != EOF && ! is_space(ch))
312 {
313 if (set - alloc == len)
314 {
315 len *= 2;
316 alloc = xrealloc (alloc, len + 1);
317 set = alloc + len / 2;
318 }
319 *set++ = ch;
320 }
321 }
322 *set = '\0';
323 ungetc (ch, f);
324 return alloc;
325 }
326
327 /* This structure holds a linked list of file name maps, one per directory. */
328
329 struct file_name_map_list
330 {
331 struct file_name_map_list *map_list_next;
332 char *map_list_name;
333 struct file_name_map *map_list_map;
334 };
335
336 /* Read the file name map file for DIRNAME. */
337
338 static struct file_name_map *
339 read_name_map (pfile, dirname)
340 cpp_reader *pfile;
341 const char *dirname;
342 {
343 register struct file_name_map_list *map_list_ptr;
344 char *name;
345 FILE *f;
346
347 for (map_list_ptr = CPP_OPTIONS (pfile)->map_list; map_list_ptr;
348 map_list_ptr = map_list_ptr->map_list_next)
349 if (! strcmp (map_list_ptr->map_list_name, dirname))
350 return map_list_ptr->map_list_map;
351
352 map_list_ptr = ((struct file_name_map_list *)
353 xmalloc (sizeof (struct file_name_map_list)));
354 map_list_ptr->map_list_name = xstrdup (dirname);
355
356 name = (char *) alloca (strlen (dirname) + strlen (FILE_NAME_MAP_FILE) + 2);
357 strcpy (name, dirname);
358 if (*dirname)
359 strcat (name, "/");
360 strcat (name, FILE_NAME_MAP_FILE);
361 f = fopen (name, "r");
362 if (!f)
363 map_list_ptr->map_list_map = (struct file_name_map *)-1;
364 else
365 {
366 int ch;
367 int dirlen = strlen (dirname);
368
369 while ((ch = getc (f)) != EOF)
370 {
371 char *from, *to;
372 struct file_name_map *ptr;
373
374 if (is_space(ch))
375 continue;
376 from = read_filename_string (ch, f);
377 while ((ch = getc (f)) != EOF && is_hspace(ch))
378 ;
379 to = read_filename_string (ch, f);
380
381 ptr = ((struct file_name_map *)
382 xmalloc (sizeof (struct file_name_map)));
383 ptr->map_from = from;
384
385 /* Make the real filename absolute. */
386 if (*to == '/')
387 ptr->map_to = to;
388 else
389 {
390 ptr->map_to = xmalloc (dirlen + strlen (to) + 2);
391 strcpy (ptr->map_to, dirname);
392 ptr->map_to[dirlen] = '/';
393 strcpy (ptr->map_to + dirlen + 1, to);
394 free (to);
395 }
396
397 ptr->map_next = map_list_ptr->map_list_map;
398 map_list_ptr->map_list_map = ptr;
399
400 while ((ch = getc (f)) != '\n')
401 if (ch == EOF)
402 break;
403 }
404 fclose (f);
405 }
406
407 map_list_ptr->map_list_next = CPP_OPTIONS (pfile)->map_list;
408 CPP_OPTIONS (pfile)->map_list = map_list_ptr;
409
410 return map_list_ptr->map_list_map;
411 }
412
413 /* Remap NAME based on the file_name_map (if any) for LOC. */
414
415 static char *
416 remap_filename (pfile, name, loc)
417 cpp_reader *pfile;
418 char *name;
419 struct file_name_list *loc;
420 {
421 struct file_name_map *map;
422 const char *from, *p, *dir;
423
424 if (! loc->name_map)
425 loc->name_map = read_name_map (pfile,
426 loc->name
427 ? loc->name : ".");
428
429 if (loc->name_map == (struct file_name_map *)-1)
430 return name;
431
432 from = name + strlen (loc->name) + 1;
433
434 for (map = loc->name_map; map; map = map->map_next)
435 if (!strcmp (map->map_from, from))
436 return map->map_to;
437
438 /* Try to find a mapping file for the particular directory we are
439 looking in. Thus #include <sys/types.h> will look up sys/types.h
440 in /usr/include/header.gcc and look up types.h in
441 /usr/include/sys/header.gcc. */
442 p = strrchr (name, '/');
443 if (!p)
444 p = name;
445 if (loc && loc->name
446 && strlen (loc->name) == (size_t) (p - name)
447 && !strncmp (loc->name, name, p - name))
448 /* FILENAME is in SEARCHPTR, which we've already checked. */
449 return name;
450
451 if (p == name)
452 {
453 dir = ".";
454 from = name;
455 }
456 else
457 {
458 char * newdir = (char *) alloca (p - name + 1);
459 memcpy (newdir, name, p - name);
460 newdir[p - name] = '\0';
461 dir = newdir;
462 from = p + 1;
463 }
464
465 for (map = read_name_map (pfile, dir); map; map = map->map_next)
466 if (! strcmp (map->map_from, name))
467 return map->map_to;
468
469 return name;
470 }
471
472 /* Push an input buffer and load it up with the contents of FNAME.
473 If FNAME is "" or NULL, read standard input. */
474 int
475 cpp_read_file (pfile, fname)
476 cpp_reader *pfile;
477 const char *fname;
478 {
479 IHASH *ih, **slot;
480 IHASH dummy;
481 int f;
482
483 if (fname == NULL)
484 fname = "";
485
486 dummy.hash = -1;
487 dummy.nshort = fname;
488 slot = (IHASH **) htab_find_slot (pfile->all_include_files,
489 (const void *) &dummy, 1);
490 if (*slot && (ih = redundant_include_p (pfile, *slot, ABSOLUTE_PATH)))
491 {
492 if (ih == (IHASH *)-1)
493 return 1; /* Already included. */
494 }
495 else
496 {
497 ih = (IHASH *) xmalloc (sizeof (IHASH) + strlen (fname));
498 ih->control_macro = 0;
499 ih->foundhere = ABSOLUTE_PATH; /* well sort of ... */
500 ih->hash = dummy.hash;
501 strcpy ((char *)ih->name, fname);
502 ih->nshort = ih->name;
503
504 ih->next_this_file = *slot;
505 *slot = ih;
506 }
507
508 if (*fname == '\0')
509 f = 0;
510 else
511 f = open (fname, OMODES);
512
513 return _cpp_read_include_file (pfile, f, ih);
514 }
515
516 /* Read the contents of FD into the buffer on the top of PFILE's stack.
517 IHASH points to the include hash entry for the file associated with
518 FD.
519
520 The caller is responsible for the cpp_push_buffer. */
521
522 int
523 _cpp_read_include_file (pfile, fd, ihash)
524 cpp_reader *pfile;
525 int fd;
526 IHASH *ihash;
527 {
528 struct stat st;
529 size_t st_size;
530 long length;
531 cpp_buffer *fp;
532
533 fp = cpp_push_buffer (pfile, NULL, 0);
534
535 if (fp == 0)
536 goto push_fail;
537
538 if (fstat (fd, &st) < 0)
539 goto perror_fail;
540 if (fcntl (fd, F_SETFL, 0) == -1) /* turn off nonblocking mode */
541 goto perror_fail;
542
543 /* If fd points to a plain file, we know how big it is, so we can
544 allocate the buffer all at once. If fd is a pipe or terminal, we
545 can't. Most C source files are 4k or less, so we guess that. If
546 fd is something weird, like a block device or a directory, we
547 don't want to read it at all.
548
549 Unfortunately, different systems use different st.st_mode values
550 for pipes: some have S_ISFIFO, some S_ISSOCK, some are buggy and
551 zero the entire struct stat except a couple fields. Hence the
552 mess below.
553
554 In all cases, read_and_prescan will resize the buffer if it
555 turns out there's more data than we thought. */
556
557 if (S_ISREG (st.st_mode))
558 {
559 /* off_t might have a wider range than size_t - in other words,
560 the max size of a file might be bigger than the address
561 space. We can't handle a file that large. (Anyone with
562 a single source file bigger than 4GB needs to rethink
563 their coding style.) */
564 st_size = (size_t) st.st_size;
565 if ((unsigned HOST_WIDEST_INT) st_size
566 != (unsigned HOST_WIDEST_INT) st.st_size)
567 {
568 cpp_error (pfile, "file `%s' is too large", ihash->name);
569 goto fail;
570 }
571 }
572 else if (S_ISFIFO (st.st_mode) || S_ISSOCK (st.st_mode)
573 /* Permit any kind of character device: the sensible ones are
574 ttys and /dev/null, but weeding out the others is too hard. */
575 || S_ISCHR (st.st_mode)
576 /* Some 4.x (x<4) derivatives have a bug that makes fstat() of a
577 socket or pipe return a stat struct with most fields zeroed. */
578 || (st.st_mode == 0 && st.st_nlink == 0 && st.st_size == 0))
579 {
580 /* Cannot get its file size before reading. 4k is a decent
581 first guess. */
582 st_size = 4096;
583 }
584 else
585 {
586 cpp_error (pfile, "`%s' is not a file, pipe, or tty", ihash->name);
587 goto fail;
588 }
589
590 if (pfile->input_buffer == NULL)
591 init_input_buffer (pfile, fd, &st);
592
593 /* Read the file, converting end-of-line characters and trigraphs
594 (if enabled). */
595 fp->ihash = ihash;
596 fp->nominal_fname = ihash->name;
597 length = read_and_prescan (pfile, fp, fd, st_size);
598 if (length < 0)
599 goto fail;
600 if (length == 0)
601 ihash->control_macro = (const U_CHAR *) ""; /* never re-include */
602
603 close (fd);
604 fp->rlimit = fp->alimit = fp->buf + length;
605 fp->cur = fp->buf;
606 if (ihash->foundhere != ABSOLUTE_PATH)
607 fp->system_header_p = ihash->foundhere->sysp;
608 fp->lineno = 1;
609 fp->colno = 1;
610 fp->line_base = fp->buf;
611 fp->cleanup = file_cleanup;
612
613 /* The ->actual_dir field is only used when ignore_srcdir is not in effect;
614 see do_include */
615 if (!CPP_OPTIONS (pfile)->ignore_srcdir)
616 fp->actual_dir = actual_directory (pfile, ihash->name);
617
618 pfile->input_stack_listing_current = 0;
619 pfile->only_seen_white = 2;
620 return 1;
621
622 perror_fail:
623 cpp_error_from_errno (pfile, ihash->name);
624 fail:
625 cpp_pop_buffer (pfile);
626 push_fail:
627 close (fd);
628 return 0;
629 }
630
631 /* Given a path FNAME, extract the directory component and place it
632 onto the actual_dirs list. Return a pointer to the allocated
633 file_name_list structure. These structures are used to implement
634 current-directory "" include searching. */
635
636 static struct file_name_list *
637 actual_directory (pfile, fname)
638 cpp_reader *pfile;
639 const char *fname;
640 {
641 char *last_slash, *dir;
642 size_t dlen;
643 struct file_name_list *x;
644
645 dir = xstrdup (fname);
646 last_slash = strrchr (dir, '/');
647 if (last_slash)
648 {
649 if (last_slash == dir)
650 {
651 dlen = 1;
652 last_slash[1] = '\0';
653 }
654 else
655 {
656 dlen = last_slash - dir;
657 *last_slash = '\0';
658 }
659 }
660 else
661 {
662 dir[0] = '.';
663 dir[1] = '\0';
664 dlen = 1;
665 }
666
667 if (dlen > pfile->max_include_len)
668 pfile->max_include_len = dlen;
669
670 for (x = pfile->actual_dirs; x; x = x->alloc)
671 if (!strcmp (x->name, dir))
672 {
673 free (dir);
674 return x;
675 }
676
677 /* Not found, make a new one. */
678 x = (struct file_name_list *) xmalloc (sizeof (struct file_name_list));
679 x->name = dir;
680 x->nlen = dlen;
681 x->next = CPP_OPTIONS (pfile)->quote_include;
682 x->alloc = pfile->actual_dirs;
683 x->sysp = CPP_BUFFER (pfile)->system_header_p;
684 x->name_map = NULL;
685
686 pfile->actual_dirs = x;
687 return x;
688 }
689
690 /* Determine the current line and column. Used only by read_and_prescan. */
691 static U_CHAR *
692 find_position (start, limit, linep)
693 U_CHAR *start;
694 U_CHAR *limit;
695 unsigned long *linep;
696 {
697 unsigned long line = *linep;
698 U_CHAR *lbase = start;
699 while (start < limit)
700 {
701 U_CHAR ch = *start++;
702 if (ch == '\n' || ch == '\r')
703 {
704 line++;
705 lbase = start;
706 }
707 }
708 *linep = line;
709 return lbase;
710 }
711
712 /* Read the entire contents of file DESC into buffer BUF. LEN is how
713 much memory to allocate initially; more will be allocated if
714 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
715 canonical form (\n). If enabled, convert and/or warn about
716 trigraphs. Convert backslash-newline to a one-character escape
717 (\r) and remove it from "embarrassing" places (i.e. the middle of a
718 token). If there is no newline at the end of the file, add one and
719 warn. Returns -1 on failure, or the actual length of the data to
720 be scanned.
721
722 This function does a lot of work, and can be a serious performance
723 bottleneck. It has been tuned heavily; make sure you understand it
724 before hacking. The common case - no trigraphs, Unix style line
725 breaks, backslash-newline set off by whitespace, newline at EOF -
726 has been optimized at the expense of the others. The performance
727 penalty for DOS style line breaks (\r\n) is about 15%.
728
729 Warnings lose particularly heavily since we have to determine the
730 line number, which involves scanning from the beginning of the file
731 or from the last warning. The penalty for the absence of a newline
732 at the end of reload1.c is about 60%. (reload1.c is 329k.)
733
734 If your file has more than one kind of end-of-line marker, you
735 will get messed-up line numbering. */
736
737 /* Table of characters that can't be handled in the inner loop.
738 Keep these contiguous to optimize the performance of the code generated
739 for the switch that uses them. */
740 #define SPECCASE_EMPTY 0
741 #define SPECCASE_NUL 1
742 #define SPECCASE_CR 2
743 #define SPECCASE_BACKSLASH 3
744 #define SPECCASE_QUESTION 4
745
746 static long
747 read_and_prescan (pfile, fp, desc, len)
748 cpp_reader *pfile;
749 cpp_buffer *fp;
750 int desc;
751 size_t len;
752 {
753 U_CHAR *buf = (U_CHAR *) xmalloc (len);
754 U_CHAR *ip, *op, *line_base;
755 U_CHAR *ibase;
756 U_CHAR *speccase = pfile->input_speccase;
757 unsigned long line;
758 unsigned int deferred_newlines;
759 int count;
760 size_t offset;
761
762 offset = 0;
763 op = buf;
764 line_base = buf;
765 line = 1;
766 ibase = pfile->input_buffer + 2;
767 deferred_newlines = 0;
768
769 for (;;)
770 {
771 read_next:
772
773 count = read (desc, pfile->input_buffer + 2, pfile->input_buffer_len);
774 if (count < 0)
775 goto error;
776 else if (count == 0)
777 break;
778
779 offset += count;
780 ip = ibase;
781 ibase = pfile->input_buffer + 2;
782 ibase[count] = ibase[count+1] = '\0';
783
784 if (offset > len)
785 {
786 size_t delta_op;
787 size_t delta_line_base;
788 len *= 2;
789 if (offset > len)
790 /* len overflowed.
791 This could happen if the file is larger than half the
792 maximum address space of the machine. */
793 goto too_big;
794
795 delta_op = op - buf;
796 delta_line_base = line_base - buf;
797 buf = (U_CHAR *) xrealloc (buf, len);
798 op = buf + delta_op;
799 line_base = buf + delta_line_base;
800 }
801
802 for (;;)
803 {
804 unsigned int span = 0;
805
806 /* Deal with \-newline in the middle of a token. */
807 if (deferred_newlines)
808 {
809 while (speccase[ip[span]] == SPECCASE_EMPTY
810 && ip[span] != '\n'
811 && ip[span] != '\t'
812 && ip[span] != ' ')
813 span++;
814 memcpy (op, ip, span);
815 op += span;
816 ip += span;
817 /* If ip[0] is SPECCASE_EMPTY, we have hit white space.
818 Dump out the remaining deferred \-newlines. */
819 if (speccase[ip[0]] == SPECCASE_EMPTY)
820 while (deferred_newlines)
821 deferred_newlines--, *op++ = '\r';
822 span = 0;
823 }
824
825 /* Copy as much as we can without special treatment. */
826 while (speccase[ip[span]] == SPECCASE_EMPTY) span++;
827 memcpy (op, ip, span);
828 op += span;
829 ip += span;
830
831 switch (speccase[*ip++])
832 {
833 case SPECCASE_NUL: /* \0 */
834 ibase[-1] = op[-1];
835 goto read_next;
836
837 case SPECCASE_CR: /* \r */
838 if (ip[-2] == '\n')
839 continue;
840 else if (*ip == '\n')
841 ip++;
842 else if (*ip == '\0')
843 {
844 *--ibase = '\r';
845 goto read_next;
846 }
847 *op++ = '\n';
848 break;
849
850 case SPECCASE_BACKSLASH: /* \ */
851 backslash:
852 {
853 /* If we're at the end of the intermediate buffer,
854 we have to shift the backslash down to the start
855 and come back next pass. */
856 if (*ip == '\0')
857 {
858 *--ibase = '\\';
859 goto read_next;
860 }
861 else if (*ip == '\n')
862 {
863 ip++;
864 if (*ip == '\r') ip++;
865 if (*ip == '\n' || *ip == '\t' || *ip == ' ')
866 *op++ = '\r';
867 else if (op[-1] == '\t' || op[-1] == ' '
868 || op[-1] == '\r' || op[-1] == '\n')
869 *op++ = '\r';
870 else
871 deferred_newlines++;
872 }
873 else if (*ip == '\r')
874 {
875 ip++;
876 if (*ip == '\n') ip++;
877 else if (*ip == '\0')
878 {
879 *--ibase = '\r';
880 *--ibase = '\\';
881 goto read_next;
882 }
883 else if (*ip == '\r' || *ip == '\t' || *ip == ' ')
884 *op++ = '\r';
885 else
886 deferred_newlines++;
887 }
888 else
889 *op++ = '\\';
890 }
891 break;
892
893 case SPECCASE_QUESTION: /* ? */
894 {
895 unsigned int d, t;
896 /* If we're at the end of the intermediate buffer,
897 we have to shift the ?'s down to the start and
898 come back next pass. */
899 d = ip[0];
900 if (d == '\0')
901 {
902 *--ibase = '?';
903 goto read_next;
904 }
905 if (d != '?')
906 {
907 *op++ = '?';
908 break;
909 }
910 d = ip[1];
911 if (d == '\0')
912 {
913 *--ibase = '?';
914 *--ibase = '?';
915 goto read_next;
916 }
917
918 /* Trigraph map:
919 * from to from to from to
920 * ?? = # ?? ) ] ?? ! |
921 * ?? ( [ ?? ' ^ ?? > }
922 * ?? / \ ?? < { ?? - ~
923 */
924 if (d == '=') t = '#';
925 else if (d == ')') t = ']';
926 else if (d == '!') t = '|';
927 else if (d == '(') t = '[';
928 else if (d == '\'') t = '^';
929 else if (d == '>') t = '}';
930 else if (d == '/') t = '\\';
931 else if (d == '<') t = '{';
932 else if (d == '-') t = '~';
933 else
934 {
935 *op++ = '?';
936 break;
937 }
938 ip += 2;
939 if (CPP_OPTIONS (pfile)->warn_trigraphs)
940 {
941 unsigned long col;
942 line_base = find_position (line_base, op, &line);
943 col = op - line_base + 1;
944 if (CPP_OPTIONS (pfile)->trigraphs)
945 cpp_warning_with_line (pfile, line, col,
946 "trigraph ??%c converted to %c", d, t);
947 else
948 cpp_warning_with_line (pfile, line, col,
949 "trigraph ??%c ignored", d);
950 }
951 if (CPP_OPTIONS (pfile)->trigraphs)
952 {
953 if (t == '\\')
954 goto backslash;
955 else
956 *op++ = t;
957 }
958 else
959 {
960 *op++ = '?';
961 *op++ = '?';
962 *op++ = d;
963 }
964 }
965 }
966 }
967 }
968
969 if (offset == 0)
970 return 0;
971
972 /* Deal with pushed-back chars at true EOF.
973 This may be any of: ?? ? \ \r \n \\r \\n.
974 \r must become \n, \\r or \\n must become \r.
975 We know we have space already. */
976 if (ibase == pfile->input_buffer)
977 {
978 if (*ibase == '?')
979 {
980 *op++ = '?';
981 *op++ = '?';
982 }
983 else
984 *op++ = '\r';
985 }
986 else if (ibase == pfile->input_buffer + 1)
987 {
988 if (*ibase == '\r')
989 *op++ = '\n';
990 else
991 *op++ = *ibase;
992 }
993
994 if (op[-1] != '\n')
995 {
996 unsigned long col;
997 line_base = find_position (line_base, op, &line);
998 col = op - line_base + 1;
999 cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
1000 if (offset + 1 > len)
1001 {
1002 len += 1;
1003 if (offset + 1 > len)
1004 goto too_big;
1005 buf = (U_CHAR *) xrealloc (buf, len);
1006 op = buf + offset;
1007 }
1008 *op++ = '\n';
1009 }
1010
1011 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
1012 return op - buf;
1013
1014 too_big:
1015 cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
1016 free (buf);
1017 return -1;
1018
1019 error:
1020 cpp_error_from_errno (pfile, fp->ihash->name);
1021 free (buf);
1022 return -1;
1023 }
1024
1025 /* Initialize the `input_buffer' and `input_speccase' tables.
1026 These are only used by read_and_prescan, but they're large and
1027 somewhat expensive to set up, so we want them allocated once for
1028 the duration of the cpp run. */
1029
1030 static void
1031 init_input_buffer (pfile, fd, st)
1032 cpp_reader *pfile;
1033 int fd;
1034 struct stat *st;
1035 {
1036 long pipe_buf;
1037 U_CHAR *tmp;
1038
1039 /* Table of characters that cannot be handled by the
1040 read_and_prescan inner loop. The number of non-EMPTY entries
1041 should be as small as humanly possible. */
1042
1043 tmp = (U_CHAR *) xmalloc (1 << CHAR_BIT);
1044 memset (tmp, SPECCASE_EMPTY, 1 << CHAR_BIT);
1045 tmp['\0'] = SPECCASE_NUL;
1046 tmp['\r'] = SPECCASE_CR;
1047 tmp['\\'] = SPECCASE_BACKSLASH;
1048 if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs)
1049 tmp['?'] = SPECCASE_QUESTION;
1050
1051 pfile->input_speccase = tmp;
1052
1053 /* Determine the appropriate size for the input buffer. Normal C
1054 source files are smaller than eight K. If we are reading a pipe,
1055 we want to make sure the input buffer is bigger than the kernel's
1056 pipe buffer. */
1057 pipe_buf = -1;
1058
1059 if (! S_ISREG (st->st_mode))
1060 {
1061 #ifdef _PC_PIPE_BUF
1062 pipe_buf = fpathconf (fd, _PC_PIPE_BUF);
1063 #endif
1064 if (pipe_buf == -1)
1065 {
1066 #ifdef PIPE_BUF
1067 pipe_buf = PIPE_BUF;
1068 #else
1069 pipe_buf = 8192;
1070 #endif
1071 }
1072 }
1073
1074 if (pipe_buf < 8192)
1075 pipe_buf = 8192;
1076 /* PIPE_BUF bytes of buffer proper, 2 to detect running off the end
1077 without address arithmetic all the time, and 2 for pushback in
1078 the case there's a potential trigraph or end-of-line digraph at
1079 the end of a block. */
1080
1081 tmp = (U_CHAR *) xmalloc (pipe_buf + 2 + 2);
1082 pfile->input_buffer = tmp;
1083 pfile->input_buffer_len = pipe_buf;
1084 }
1085
1086 /* Simplify a path name in place, deleting redundant components. This
1087 reduces OS overhead and guarantees that equivalent paths compare
1088 the same (modulo symlinks).
1089
1090 Transforms made:
1091 foo/bar/../quux foo/quux
1092 foo/./bar foo/bar
1093 foo//bar foo/bar
1094 /../quux /quux
1095 //quux //quux (POSIX allows leading // as a namespace escape)
1096
1097 Guarantees no trailing slashes. All transforms reduce the length
1098 of the string.
1099 */
1100 void
1101 _cpp_simplify_pathname (path)
1102 char *path;
1103 {
1104 char *from, *to;
1105 char *base;
1106 int absolute = 0;
1107
1108 #if defined (HAVE_DOS_BASED_FILE_SYSTEM)
1109 /* Convert all backslashes to slashes. */
1110 for (from = path; *from; from++)
1111 if (*from == '\\') *from = '/';
1112
1113 /* Skip over leading drive letter if present. */
1114 if (ISALPHA (path[0]) && path[1] == ':')
1115 from = to = &path[2];
1116 else
1117 from = to = path;
1118 #else
1119 from = to = path;
1120 #endif
1121
1122 /* Remove redundant initial /s. */
1123 if (*from == '/')
1124 {
1125 absolute = 1;
1126 to++;
1127 from++;
1128 if (*from == '/')
1129 {
1130 if (*++from == '/')
1131 /* 3 or more initial /s are equivalent to 1 /. */
1132 while (*++from == '/');
1133 else
1134 /* On some hosts // differs from /; Posix allows this. */
1135 to++;
1136 }
1137 }
1138 base = to;
1139
1140 for (;;)
1141 {
1142 while (*from == '/')
1143 from++;
1144
1145 if (from[0] == '.' && from[1] == '/')
1146 from += 2;
1147 else if (from[0] == '.' && from[1] == '\0')
1148 goto done;
1149 else if (from[0] == '.' && from[1] == '.' && from[2] == '/')
1150 {
1151 if (base == to)
1152 {
1153 if (absolute)
1154 from += 3;
1155 else
1156 {
1157 *to++ = *from++;
1158 *to++ = *from++;
1159 *to++ = *from++;
1160 base = to;
1161 }
1162 }
1163 else
1164 {
1165 to -= 2;
1166 while (to > base && *to != '/') to--;
1167 if (*to == '/')
1168 to++;
1169 from += 3;
1170 }
1171 }
1172 else if (from[0] == '.' && from[1] == '.' && from[2] == '\0')
1173 {
1174 if (base == to)
1175 {
1176 if (!absolute)
1177 {
1178 *to++ = *from++;
1179 *to++ = *from++;
1180 }
1181 }
1182 else
1183 {
1184 to -= 2;
1185 while (to > base && *to != '/') to--;
1186 if (*to == '/')
1187 to++;
1188 }
1189 goto done;
1190 }
1191 else
1192 /* Copy this component and trailing /, if any. */
1193 while ((*to++ = *from++) != '/')
1194 {
1195 if (!to[-1])
1196 {
1197 to--;
1198 goto done;
1199 }
1200 }
1201
1202 }
1203
1204 done:
1205 /* Trim trailing slash */
1206 if (to[0] == '/' && (!absolute || to > path+1))
1207 to--;
1208
1209 /* Change the empty string to "." so that stat() on the result
1210 will always work. */
1211 if (to == path)
1212 *to++ = '.';
1213
1214 *to = '\0';
1215
1216 return;
1217 }
1218
1219 /* It is not clear when this should be used if at all, so I've
1220 disabled it until someone who understands VMS can look at it. */
1221 #if 0
1222
1223 /* Under VMS we need to fix up the "include" specification filename.
1224
1225 Rules for possible conversions
1226
1227 fullname tried paths
1228
1229 name name
1230 ./dir/name [.dir]name
1231 /dir/name dir:name
1232 /name [000000]name, name
1233 dir/name dir:[000000]name, dir:name, dir/name
1234 dir1/dir2/name dir1:[dir2]name, dir1:[000000.dir2]name
1235 path:/name path:[000000]name, path:name
1236 path:/dir/name path:[000000.dir]name, path:[dir]name
1237 path:dir/name path:[dir]name
1238 [path]:[dir]name [path.dir]name
1239 path/[dir]name [path.dir]name
1240
1241 The path:/name input is constructed when expanding <> includes. */
1242
1243
1244 static void
1245 hack_vms_include_specification (fullname)
1246 char *fullname;
1247 {
1248 register char *basename, *unixname, *local_ptr, *first_slash;
1249 int f, check_filename_before_returning, must_revert;
1250 char Local[512];
1251
1252 check_filename_before_returning = 0;
1253 must_revert = 0;
1254 /* See if we can find a 1st slash. If not, there's no path information. */
1255 first_slash = strchr (fullname, '/');
1256 if (first_slash == 0)
1257 return 0; /* Nothing to do!!! */
1258
1259 /* construct device spec if none given. */
1260
1261 if (strchr (fullname, ':') == 0)
1262 {
1263
1264 /* If fullname has a slash, take it as device spec. */
1265
1266 if (first_slash == fullname)
1267 {
1268 first_slash = strchr (fullname + 1, '/'); /* 2nd slash ? */
1269 if (first_slash)
1270 *first_slash = ':'; /* make device spec */
1271 for (basename = fullname; *basename != 0; basename++)
1272 *basename = *(basename+1); /* remove leading slash */
1273 }
1274 else if ((first_slash[-1] != '.') /* keep ':/', './' */
1275 && (first_slash[-1] != ':')
1276 && (first_slash[-1] != ']')) /* or a vms path */
1277 {
1278 *first_slash = ':';
1279 }
1280 else if ((first_slash[1] == '[') /* skip './' in './[dir' */
1281 && (first_slash[-1] == '.'))
1282 fullname += 2;
1283 }
1284
1285 /* Get part after first ':' (basename[-1] == ':')
1286 or last '/' (basename[-1] == '/'). */
1287
1288 basename = base_name (fullname);
1289
1290 local_ptr = Local; /* initialize */
1291
1292 /* We are trying to do a number of things here. First of all, we are
1293 trying to hammer the filenames into a standard format, such that later
1294 processing can handle them.
1295
1296 If the file name contains something like [dir.], then it recognizes this
1297 as a root, and strips the ".]". Later processing will add whatever is
1298 needed to get things working properly.
1299
1300 If no device is specified, then the first directory name is taken to be
1301 a device name (or a rooted logical). */
1302
1303 /* Point to the UNIX filename part (which needs to be fixed!)
1304 but skip vms path information.
1305 [basename != fullname since first_slash != 0]. */
1306
1307 if ((basename[-1] == ':') /* vms path spec. */
1308 || (basename[-1] == ']')
1309 || (basename[-1] == '>'))
1310 unixname = basename;
1311 else
1312 unixname = fullname;
1313
1314 if (*unixname == '/')
1315 unixname++;
1316
1317 /* If the directory spec is not rooted, we can just copy
1318 the UNIX filename part and we are done. */
1319
1320 if (((basename - fullname) > 1)
1321 && ( (basename[-1] == ']')
1322 || (basename[-1] == '>')))
1323 {
1324 if (basename[-2] != '.')
1325 {
1326
1327 /* The VMS part ends in a `]', and the preceding character is not a `.'.
1328 -> PATH]:/name (basename = '/name', unixname = 'name')
1329 We strip the `]', and then splice the two parts of the name in the
1330 usual way. Given the default locations for include files in cccp.c,
1331 we will only use this code if the user specifies alternate locations
1332 with the /include (-I) switch on the command line. */
1333
1334 basename -= 1; /* Strip "]" */
1335 unixname--; /* backspace */
1336 }
1337 else
1338 {
1339
1340 /* The VMS part has a ".]" at the end, and this will not do. Later
1341 processing will add a second directory spec, and this would be a syntax
1342 error. Thus we strip the ".]", and thus merge the directory specs.
1343 We also backspace unixname, so that it points to a '/'. This inhibits the
1344 generation of the 000000 root directory spec (which does not belong here
1345 in this case). */
1346
1347 basename -= 2; /* Strip ".]" */
1348 unixname--; /* backspace */
1349 }
1350 }
1351
1352 else
1353
1354 {
1355
1356 /* We drop in here if there is no VMS style directory specification yet.
1357 If there is no device specification either, we make the first dir a
1358 device and try that. If we do not do this, then we will be essentially
1359 searching the users default directory (as if they did a #include "asdf.h").
1360
1361 Then all we need to do is to push a '[' into the output string. Later
1362 processing will fill this in, and close the bracket. */
1363
1364 if ((unixname != fullname) /* vms path spec found. */
1365 && (basename[-1] != ':'))
1366 *local_ptr++ = ':'; /* dev not in spec. take first dir */
1367
1368 *local_ptr++ = '['; /* Open the directory specification */
1369 }
1370
1371 if (unixname == fullname) /* no vms dir spec. */
1372 {
1373 must_revert = 1;
1374 if ((first_slash != 0) /* unix dir spec. */
1375 && (*unixname != '/') /* not beginning with '/' */
1376 && (*unixname != '.')) /* or './' or '../' */
1377 *local_ptr++ = '.'; /* dir is local ! */
1378 }
1379
1380 /* at this point we assume that we have the device spec, and (at least
1381 the opening "[" for a directory specification. We may have directories
1382 specified already.
1383
1384 If there are no other slashes then the filename will be
1385 in the "root" directory. Otherwise, we need to add
1386 directory specifications. */
1387
1388 if (strchr (unixname, '/') == 0)
1389 {
1390 /* if no directories specified yet and none are following. */
1391 if (local_ptr[-1] == '[')
1392 {
1393 /* Just add "000000]" as the directory string */
1394 strcpy (local_ptr, "000000]");
1395 local_ptr += strlen (local_ptr);
1396 check_filename_before_returning = 1; /* we might need to fool with this later */
1397 }
1398 }
1399 else
1400 {
1401
1402 /* As long as there are still subdirectories to add, do them. */
1403 while (strchr (unixname, '/') != 0)
1404 {
1405 /* If this token is "." we can ignore it
1406 if it's not at the beginning of a path. */
1407 if ((unixname[0] == '.') && (unixname[1] == '/'))
1408 {
1409 /* remove it at beginning of path. */
1410 if ( ((unixname == fullname) /* no device spec */
1411 && (fullname+2 != basename)) /* starts with ./ */
1412 /* or */
1413 || ((basename[-1] == ':') /* device spec */
1414 && (unixname-1 == basename))) /* and ./ afterwards */
1415 *local_ptr++ = '.'; /* make '[.' start of path. */
1416 unixname += 2;
1417 continue;
1418 }
1419
1420 /* Add a subdirectory spec. Do not duplicate "." */
1421 if ( local_ptr[-1] != '.'
1422 && local_ptr[-1] != '['
1423 && local_ptr[-1] != '<')
1424 *local_ptr++ = '.';
1425
1426 /* If this is ".." then the spec becomes "-" */
1427 if ( (unixname[0] == '.')
1428 && (unixname[1] == '.')
1429 && (unixname[2] == '/'))
1430 {
1431 /* Add "-" and skip the ".." */
1432 if ((local_ptr[-1] == '.')
1433 && (local_ptr[-2] == '['))
1434 local_ptr--; /* prevent [.- */
1435 *local_ptr++ = '-';
1436 unixname += 3;
1437 continue;
1438 }
1439
1440 /* Copy the subdirectory */
1441 while (*unixname != '/')
1442 *local_ptr++= *unixname++;
1443
1444 unixname++; /* Skip the "/" */
1445 }
1446
1447 /* Close the directory specification */
1448 if (local_ptr[-1] == '.') /* no trailing periods */
1449 local_ptr--;
1450
1451 if (local_ptr[-1] == '[') /* no dir needed */
1452 local_ptr--;
1453 else
1454 *local_ptr++ = ']';
1455 }
1456
1457 /* Now add the filename. */
1458
1459 while (*unixname)
1460 *local_ptr++ = *unixname++;
1461 *local_ptr = 0;
1462
1463 /* Now append it to the original VMS spec. */
1464
1465 strcpy ((must_revert==1)?fullname:basename, Local);
1466
1467 /* If we put a [000000] in the filename, try to open it first. If this fails,
1468 remove the [000000], and return that name. This provides flexibility
1469 to the user in that they can use both rooted and non-rooted logical names
1470 to point to the location of the file. */
1471
1472 if (check_filename_before_returning)
1473 {
1474 f = open (fullname, OMODES);
1475 if (f >= 0)
1476 {
1477 /* The file name is OK as it is, so return it as is. */
1478 close (f);
1479 return 1;
1480 }
1481
1482 /* The filename did not work. Try to remove the [000000] from the name,
1483 and return it. */
1484
1485 basename = strchr (fullname, '[');
1486 local_ptr = strchr (fullname, ']') + 1;
1487 strcpy (basename, local_ptr); /* this gets rid of it */
1488
1489 }
1490
1491 return 1;
1492 }
1493 #endif /* VMS */