1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
4 This file is part of GAS, the GNU Assembler.
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
21 /* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
24 pair. This needs better error-handling.
28 #include "as.h" /* For BAD_CASE() only */
30 #if (__STDC__ != 1) && !defined(const)
31 #define const /* Nothing */
35 static const char symbol_chars
[] =
36 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
38 #define LEX_IS_SYMBOL_COMPONENT 1
39 #define LEX_IS_WHITESPACE 2
40 #define LEX_IS_LINE_SEPARATOR 3
41 #define LEX_IS_COMMENT_START 4
42 #define LEX_IS_LINE_COMMENT_START 5
43 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
44 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
45 #define LEX_IS_STRINGQUOTE 8
46 #define LEX_IS_COLON 9
47 #define LEX_IS_NEWLINE 10
48 #define LEX_IS_ONECHAR_QUOTE 11
49 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
50 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
51 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
52 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
53 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
54 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
56 /* FIXME-soon: The entire lexer/parser thingy should be
57 built statically at compile time rather than dynamically
58 each and every time the assembler is run. xoxorich. */
65 lex
[' '] = LEX_IS_WHITESPACE
;
66 lex
['\t'] = LEX_IS_WHITESPACE
;
67 lex
['\n'] = LEX_IS_NEWLINE
;
68 lex
[';'] = LEX_IS_LINE_SEPARATOR
;
69 lex
['"'] = LEX_IS_STRINGQUOTE
;
71 lex
['\''] = LEX_IS_ONECHAR_QUOTE
;
73 lex
[':'] = LEX_IS_COLON
;
77 #ifdef SINGLE_QUOTE_STRINGS
78 lex
['\''] = LEX_IS_STRINGQUOTE
;
81 /* Note that these override the previous defaults, e.g. if ';'
83 is a comment char, then it isn't a line separator. */
84 for (p
= symbol_chars
; *p
; ++p
)
86 lex
[(unsigned char) *p
] = LEX_IS_SYMBOL_COMPONENT
;
87 } /* declare symbol characters */
89 for (p
= comment_chars
; *p
; p
++)
91 lex
[(unsigned char) *p
] = LEX_IS_COMMENT_START
;
92 } /* declare comment chars */
94 for (p
= line_comment_chars
; *p
; p
++)
96 lex
[(unsigned char) *p
] = LEX_IS_LINE_COMMENT_START
;
97 } /* declare line comment chars */
99 for (p
= line_separator_chars
; *p
; p
++)
101 lex
[(unsigned char) *p
] = LEX_IS_LINE_SEPARATOR
;
102 } /* declare line separators */
104 /* Only allow slash-star comments if slash is not in use */
107 lex
['/'] = LEX_IS_TWOCHAR_COMMENT_1ST
;
109 /* FIXME-soon. This is a bad hack but otherwise, we
110 can't do c-style comments when '/' is a line
111 comment char. xoxorich. */
114 lex
['*'] = LEX_IS_TWOCHAR_COMMENT_2ND
;
116 } /* do_scrub_begin() */
123 return getc (scrub_file
);
130 ungetc (ch
, scrub_file
);
131 } /* scrub_to_file() */
134 char *scrub_last_string
;
139 return scrub_string
== scrub_last_string
? EOF
: *scrub_string
++;
140 } /* scrub_from_string() */
146 *--scrub_string
= ch
;
147 } /* scrub_to_string() */
149 /* Saved state of the scrubber */
151 static int old_state
;
152 static char *out_string
;
153 static char out_buf
[20];
154 static int add_newlines
= 0;
156 /* Data structure for saving the state of app across #include's. Note that
157 app is called asynchronously to the parsing of the .include's, so our
158 state at the time .include is interpreted is completely unrelated.
159 That's why we have to save it all. */
166 char out_buf
[sizeof (out_buf
)];
169 char *scrub_last_string
;
176 register struct app_save
*saved
;
178 saved
= (struct app_save
*) xmalloc (sizeof (*saved
));
179 saved
->state
= state
;
180 saved
->old_state
= old_state
;
181 saved
->out_string
= out_string
;
182 memcpy (saved
->out_buf
, out_buf
, sizeof (out_buf
));
183 saved
->add_newlines
= add_newlines
;
184 saved
->scrub_string
= scrub_string
;
185 saved
->scrub_last_string
= scrub_last_string
;
186 saved
->scrub_file
= scrub_file
;
188 /* do_scrub_begin() is not useful, just wastes time. */
189 return (char *) saved
;
196 register struct app_save
*saved
= (struct app_save
*) arg
;
198 /* There is no do_scrub_end (). */
199 state
= saved
->state
;
200 old_state
= saved
->old_state
;
201 out_string
= saved
->out_string
;
202 memcpy (out_buf
, saved
->out_buf
, sizeof (out_buf
));
203 add_newlines
= saved
->add_newlines
;
204 scrub_string
= saved
->scrub_string
;
205 scrub_last_string
= saved
->scrub_last_string
;
206 scrub_file
= saved
->scrub_file
;
211 /* @@ This assumes that \n &c are the same on host and target. This is not
238 do_scrub_next_char (get
, unget
)
242 /*State 0: beginning of normal line
243 1: After first whitespace on line (flush more white)
244 2: After first non-white (opcode) on line (keep 1white)
245 3: after second white on line (into operands) (flush white)
246 4: after putting out a .line, put out digits
247 5: parsing a string, then go to old-state
248 6: putting out \ escape in a "d string.
249 7: After putting out a .appfile, put out string.
250 8: After putting out a .appfile string, flush until newline.
251 9: After seeing symbol char in state 3 (keep 1white after symchar)
252 10: After seeing whitespace in state 9 (keep white before symchar)
253 -1: output string in out_string and go to the state in old_state
254 -2: flush text until a '*' '/' is seen, then go to state old_state
257 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
258 constructs like ``.loc 1 20''. This was turning into ``.loc
259 120''. States 9 and 10 ensure that a space is never dropped in
260 between characters which could appear in a identifier. Ian
261 Taylor, ian@cygnus.com. */
263 register int ch
, ch2
= 0;
269 if (*out_string
== 0)
283 while (ch
!= EOF
&& ch
!= '\n' && ch
!= '*');
284 if (ch
== '\n' || ch
== EOF
)
287 /* At this point, ch must be a '*' */
288 while ((ch
= (*get
) ()) == '*')
292 if (ch
== EOF
|| ch
== '/')
301 if (ch
== EOF
|| (ch
>= '0' && ch
<= '9'))
305 while (ch
!= EOF
&& IS_WHITESPACE (ch
))
310 out_string
= "\n.appfile ";
313 return *out_string
++;
317 while (ch
!= EOF
&& ch
!= '\n')
326 if (lex
[ch
] == LEX_IS_STRINGQUOTE
)
338 as_warn ("End of file in string: inserted '\"'");
353 /* Handle strings broken across lines, by turning '\n' into
363 case 'x': /* '\\x' introduces escaped sequences on the PA */
372 #endif /* BACKSLASH_V */
382 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
384 as_warn ("Unknown escape '\\%c' in string: Ignored", ch
);
386 #else /* ONLY_STANDARD_ESCAPES */
388 /* Accept \x as x for any x */
390 #endif /* ONLY_STANDARD_ESCAPES */
393 as_warn ("End of file in string: '\"' inserted");
412 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
420 as_warn ("End of file not at end of a line: Newline inserted.");
426 case LEX_IS_WHITESPACE
:
429 while (ch
!= EOF
&& IS_WHITESPACE (ch
));
433 if (IS_COMMENT (ch
) || (state
== 0 && IS_LINE_COMMENT (ch
)) || ch
== '/' || IS_LINE_SEPARATOR (ch
))
438 (*unget
) (ch
); /* Put back */
439 return ' '; /* Always return one space at start of line */
442 /* If we're in state 2, we've seen a non-white
443 character followed by whitespace. If the next
444 character is ':', this is whitespace after a label
445 name which we can ignore. */
446 if (state
== 2 && lex
[ch
] == LEX_IS_COLON
)
456 goto recycle
; /* Punted leading sp */
458 BAD_CASE (state
); /* We can't get here */
462 return ' '; /* Sp after opco */
464 goto recycle
; /* Sp in operands */
467 state
= 10; /* Sp after symbol char */
474 case LEX_IS_TWOCHAR_COMMENT_1ST
:
476 if (ch2
!= EOF
&& lex
[ch2
] == LEX_IS_TWOCHAR_COMMENT_2ND
)
483 if (ch2
!= EOF
&& IS_NEWLINE (ch2
))
487 (lex
[ch2
] != LEX_IS_TWOCHAR_COMMENT_2ND
));
490 (lex
[ch2
] == LEX_IS_TWOCHAR_COMMENT_2ND
))
496 || lex
[ch2
] == LEX_IS_TWOCHAR_COMMENT_1ST
)
501 as_warn ("End of file in multiline comment");
510 if (state
== 9 || state
== 10)
516 case LEX_IS_STRINGQUOTE
:
517 if (state
== 9 || state
== 10)
525 case LEX_IS_ONECHAR_QUOTE
:
529 as_warn ("End-of-file after a one-character quote; \\000 inserted");
535 ch
= process_escape (ch
);
537 sprintf (out_buf
, "%d", (int) (unsigned char) ch
);
540 /* None of these 'x constants for us. We want 'x'. */
541 if ((ch
= (*get
) ()) != '\'')
543 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
544 as_warn ("Missing close quote: (assumed)");
549 if (strlen (out_buf
) == 1)
553 if (state
== 9 || state
== 10)
558 out_string
= out_buf
;
559 return *out_string
++;
563 if (state
== 9 || state
== 10)
570 /* Roll out a bunch of newlines from inside comments, etc. */
576 /* fall thru into... */
578 case LEX_IS_LINE_SEPARATOR
:
582 case LEX_IS_LINE_COMMENT_START
:
583 if (state
== 0) /* Only comment at start of line. */
585 /* FIXME-someday: The two character comment stuff was badly
586 thought out. On i386, we want '/' as line comment start
587 AND we want C style comments. hence this hack. The
588 whole lexical process should be reworked. xoxorich. */
595 return (do_scrub_next_char (get
, unget
));
605 while (ch
!= EOF
&& IS_WHITESPACE (ch
));
608 as_warn ("EOF in comment: Newline inserted");
611 if (ch
< '0' || ch
> '9')
613 /* Non-numerics: Eat whole comment line */
614 while (ch
!= EOF
&& !IS_NEWLINE (ch
))
617 as_warn ("EOF in Comment: Newline inserted");
621 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
625 out_string
= ".appline ";
626 return *out_string
++;
629 /* We have a line comment character which is not at the start of
630 a line. If this is also a normal comment character, fall
631 through. Otherwise treat it as a default character. */
632 if (strchr (comment_chars
, ch
) == NULL
)
635 case LEX_IS_COMMENT_START
:
638 while (ch
!= EOF
&& !IS_NEWLINE (ch
));
640 as_warn ("EOF in comment: Newline inserted");
644 case LEX_IS_SYMBOL_COMPONENT
:
647 /* This is a symbol character following another symbol
648 character, with whitespace in between. We skipped the
649 whitespace earlier, so output it now. */
659 /* Some relatively `normal' character. */
662 state
= 2; /* Now seeing opcode */
667 state
= 2; /* Ditto */
672 if (lex
[ch
] != LEX_IS_SYMBOL_COMPONENT
)
676 else if (state
== 10)
683 return ch
; /* Opcode or operands already */
691 const char comment_chars
[] = "|";
692 const char line_comment_chars
[] = "#";
699 while ((ch
= do_scrub_next_char (stdin
)) != EOF
)