* expr.c (operand): Handle 08 and 09 in MRI mode.
[binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19
20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
21 /* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
24 pair. This needs better error-handling. */
25
26 #include <stdio.h>
27 #include "as.h" /* For BAD_CASE() only */
28
29 #if (__STDC__ != 1)
30 #ifndef const
31 #define const /* empty */
32 #endif
33 #endif
34
35 static char lex[256];
36 static const char symbol_chars[] =
37 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
38
39 #define LEX_IS_SYMBOL_COMPONENT 1
40 #define LEX_IS_WHITESPACE 2
41 #define LEX_IS_LINE_SEPARATOR 3
42 #define LEX_IS_COMMENT_START 4
43 #define LEX_IS_LINE_COMMENT_START 5
44 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
45 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
46 #define LEX_IS_STRINGQUOTE 8
47 #define LEX_IS_COLON 9
48 #define LEX_IS_NEWLINE 10
49 #define LEX_IS_ONECHAR_QUOTE 11
50 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
51 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
52 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
53 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
54 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
55 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
56
57 static int process_escape PARAMS ((int));
58
59 /* FIXME-soon: The entire lexer/parser thingy should be
60 built statically at compile time rather than dynamically
61 each and every time the assembler is run. xoxorich. */
62
63 void
64 do_scrub_begin ()
65 {
66 const char *p;
67
68 lex[' '] = LEX_IS_WHITESPACE;
69 lex['\t'] = LEX_IS_WHITESPACE;
70 lex['\n'] = LEX_IS_NEWLINE;
71 lex[';'] = LEX_IS_LINE_SEPARATOR;
72 lex[':'] = LEX_IS_COLON;
73
74 if (! flag_mri)
75 {
76 lex['"'] = LEX_IS_STRINGQUOTE;
77
78 #ifndef TC_HPPA
79 lex['\''] = LEX_IS_ONECHAR_QUOTE;
80 #endif
81
82 #ifdef SINGLE_QUOTE_STRINGS
83 lex['\''] = LEX_IS_STRINGQUOTE;
84 #endif
85 }
86
87 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
88 in state 5 of do_scrub_chars must be changed. */
89
90 /* Note that these override the previous defaults, e.g. if ';' is a
91 comment char, then it isn't a line separator. */
92 for (p = symbol_chars; *p; ++p)
93 {
94 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
95 } /* declare symbol characters */
96
97 for (p = comment_chars; *p; p++)
98 {
99 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
100 } /* declare comment chars */
101
102 for (p = line_comment_chars; *p; p++)
103 {
104 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
105 } /* declare line comment chars */
106
107 for (p = line_separator_chars; *p; p++)
108 {
109 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
110 } /* declare line separators */
111
112 /* Only allow slash-star comments if slash is not in use */
113 if (lex['/'] == 0)
114 {
115 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
116 }
117 /* FIXME-soon. This is a bad hack but otherwise, we can't do
118 c-style comments when '/' is a line comment char. xoxorich. */
119 if (lex['*'] == 0)
120 {
121 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
122 }
123
124 if (flag_mri)
125 {
126 lex['\''] = LEX_IS_STRINGQUOTE;
127 lex[';'] = LEX_IS_COMMENT_START;
128 lex['*'] = LEX_IS_LINE_COMMENT_START;
129 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
130 then it can't be used in an expression. */
131 lex['!'] = LEX_IS_LINE_COMMENT_START;
132 }
133 } /* do_scrub_begin() */
134
135 /* Saved state of the scrubber */
136 static int state;
137 static int old_state;
138 static char *out_string;
139 static char out_buf[20];
140 static int add_newlines;
141 static char *saved_input;
142 static int saved_input_len;
143
144 /* Data structure for saving the state of app across #include's. Note that
145 app is called asynchronously to the parsing of the .include's, so our
146 state at the time .include is interpreted is completely unrelated.
147 That's why we have to save it all. */
148
149 struct app_save
150 {
151 int state;
152 int old_state;
153 char *out_string;
154 char out_buf[sizeof (out_buf)];
155 int add_newlines;
156 char *saved_input;
157 int saved_input_len;
158 };
159
160 char *
161 app_push ()
162 {
163 register struct app_save *saved;
164
165 saved = (struct app_save *) xmalloc (sizeof (*saved));
166 saved->state = state;
167 saved->old_state = old_state;
168 saved->out_string = out_string;
169 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
170 saved->add_newlines = add_newlines;
171 saved->saved_input = saved_input;
172 saved->saved_input_len = saved_input_len;
173
174 /* do_scrub_begin() is not useful, just wastes time. */
175
176 state = 0;
177 saved_input = NULL;
178
179 return (char *) saved;
180 }
181
182 void
183 app_pop (arg)
184 char *arg;
185 {
186 register struct app_save *saved = (struct app_save *) arg;
187
188 /* There is no do_scrub_end (). */
189 state = saved->state;
190 old_state = saved->old_state;
191 out_string = saved->out_string;
192 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
193 add_newlines = saved->add_newlines;
194 saved_input = saved->saved_input;
195 saved_input_len = saved->saved_input_len;
196
197 free (arg);
198 } /* app_pop() */
199
200 /* @@ This assumes that \n &c are the same on host and target. This is not
201 necessarily true. */
202 static int
203 process_escape (ch)
204 int ch;
205 {
206 switch (ch)
207 {
208 case 'b':
209 return '\b';
210 case 'f':
211 return '\f';
212 case 'n':
213 return '\n';
214 case 'r':
215 return '\r';
216 case 't':
217 return '\t';
218 case '\'':
219 return '\'';
220 case '"':
221 return '\"';
222 default:
223 return ch;
224 }
225 }
226
227 /* This function is called to process input characters. The GET
228 parameter is used to retrieve more input characters. GET should
229 set its parameter to point to a buffer, and return the length of
230 the buffer; it should return 0 at end of file. The scrubbed output
231 characters are put into the buffer starting at TOSTART; the TOSTART
232 buffer is TOLEN bytes in length. The function returns the number
233 of scrubbed characters put into TOSTART. This will be TOLEN unless
234 end of file was seen. This function is arranged as a state
235 machine, and saves its state so that it may return at any point.
236 This is the way the old code used to work. */
237
238 int
239 do_scrub_chars (get, tostart, tolen)
240 int (*get) PARAMS ((char **));
241 char *tostart;
242 int tolen;
243 {
244 char *to = tostart;
245 char *toend = tostart + tolen;
246 char *from;
247 char *fromend;
248 int fromlen;
249 register int ch, ch2 = 0;
250 int not_cpp_line = 0;
251
252 /*State 0: beginning of normal line
253 1: After first whitespace on line (flush more white)
254 2: After first non-white (opcode) on line (keep 1white)
255 3: after second white on line (into operands) (flush white)
256 4: after putting out a .line, put out digits
257 5: parsing a string, then go to old-state
258 6: putting out \ escape in a "d string.
259 7: After putting out a .appfile, put out string.
260 8: After putting out a .appfile string, flush until newline.
261 9: After seeing symbol char in state 3 (keep 1white after symchar)
262 10: After seeing whitespace in state 9 (keep white before symchar)
263 11: After seeing a symbol character in state 0 (eg a label definition)
264 -1: output string in out_string and go to the state in old_state
265 -2: flush text until a '*' '/' is seen, then go to state old_state
266 */
267
268 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
269 constructs like ``.loc 1 20''. This was turning into ``.loc
270 120''. States 9 and 10 ensure that a space is never dropped in
271 between characters which could appear in a identifier. Ian
272 Taylor, ian@cygnus.com.
273
274 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
275 correctly on the PA (and any other target where colons are optional).
276 Jeff Law, law@cs.utah.edu. */
277
278 /* This macro gets the next input character. */
279
280 #define GET() \
281 (from < fromend \
282 ? *from++ \
283 : ((saved_input != NULL \
284 ? (free (saved_input), \
285 saved_input = NULL, \
286 0) \
287 : 0), \
288 fromlen = (*get) (&from), \
289 fromend = from + fromlen, \
290 (fromlen == 0 \
291 ? EOF \
292 : *from++)))
293
294 /* This macro pushes a character back on the input stream. */
295
296 #define UNGET(uch) (*--from = (uch))
297
298 /* This macro puts a character into the output buffer. If this
299 character fills the output buffer, this macro jumps to the label
300 TOFULL. We use this rather ugly approach because we need to
301 handle two different termination conditions: EOF on the input
302 stream, and a full output buffer. It would be simpler if we
303 always read in the entire input stream before processing it, but
304 I don't want to make such a significant change to the assembler's
305 memory usage. */
306
307 #define PUT(pch) \
308 do \
309 { \
310 *to++ = (pch); \
311 if (to >= toend) \
312 goto tofull; \
313 } \
314 while (0)
315
316 if (saved_input != NULL)
317 {
318 from = saved_input;
319 fromend = from + saved_input_len;
320 }
321 else
322 {
323 fromlen = (*get) (&from);
324 if (fromlen == 0)
325 return 0;
326 fromend = from + fromlen;
327 }
328
329 while (1)
330 {
331 /* The cases in this switch end with continue, in order to
332 branch back to the top of this while loop and generate the
333 next output character in the appropriate state. */
334 switch (state)
335 {
336 case -1:
337 ch = *out_string++;
338 if (*out_string == '\0')
339 {
340 state = old_state;
341 old_state = 3;
342 }
343 PUT (ch);
344 continue;
345
346 case -2:
347 for (;;)
348 {
349 do
350 {
351 ch = GET ();
352
353 if (ch == EOF)
354 {
355 as_warn ("end of file in comment");
356 goto fromeof;
357 }
358
359 if (ch == '\n')
360 PUT ('\n');
361 }
362 while (ch != '*');
363
364 while ((ch = GET ()) == '*')
365 ;
366
367 if (ch == EOF)
368 {
369 as_warn ("end of file in comment");
370 goto fromeof;
371 }
372
373 if (ch == '/')
374 break;
375
376 UNGET (ch);
377 }
378
379 state = old_state;
380 PUT (' ');
381 continue;
382
383 case 4:
384 ch = GET ();
385 if (ch == EOF)
386 goto fromeof;
387 else if (ch >= '0' && ch <= '9')
388 PUT (ch);
389 else
390 {
391 while (ch != EOF && IS_WHITESPACE (ch))
392 ch = GET ();
393 if (ch == '"')
394 {
395 UNGET (ch);
396 out_string = "\n\t.appfile ";
397 old_state = 7;
398 state = -1;
399 PUT (*out_string++);
400 }
401 else
402 {
403 while (ch != EOF && ch != '\n')
404 ch = GET ();
405 state = 0;
406 PUT (ch);
407 }
408 }
409 continue;
410
411 case 5:
412 /* We are going to copy everything up to a quote character,
413 with special handling for a backslash. We try to
414 optimize the copying in the simple case without using the
415 GET and PUT macros. */
416 {
417 char *s;
418 int len;
419
420 for (s = from; s < fromend; s++)
421 {
422 ch = *s;
423 /* This condition must be changed if the type of any
424 other character can be LEX_IS_STRINGQUOTE. */
425 if (ch == '\\'
426 || ch == '"'
427 || ch == '\''
428 || ch == '\n')
429 break;
430 }
431 len = s - from;
432 if (len > toend - to)
433 len = toend - to;
434 if (len > 0)
435 {
436 memcpy (to, from, len);
437 to += len;
438 from += len;
439 }
440 }
441
442 ch = GET ();
443 if (ch == EOF)
444 {
445 as_warn ("end of file in string: inserted '\"'");
446 state = old_state;
447 UNGET ('\n');
448 PUT ('"');
449 }
450 else if (lex[ch] == LEX_IS_STRINGQUOTE)
451 {
452 state = old_state;
453 PUT (ch);
454 }
455 #ifndef NO_STRING_ESCAPES
456 else if (ch == '\\')
457 {
458 state = 6;
459 PUT (ch);
460 }
461 #endif
462 else if (flag_mri && ch == '\n')
463 {
464 /* Just quietly terminate the string. This permits lines like
465 bne label loop if we haven't reach end yet
466 */
467 state = old_state;
468 UNGET (ch);
469 PUT ('\'');
470 }
471 else
472 {
473 PUT (ch);
474 }
475 continue;
476
477 case 6:
478 state = 5;
479 ch = GET ();
480 switch (ch)
481 {
482 /* Handle strings broken across lines, by turning '\n' into
483 '\\' and 'n'. */
484 case '\n':
485 UNGET ('n');
486 add_newlines++;
487 PUT ('\\');
488 continue;
489
490 case '"':
491 case '\\':
492 case 'b':
493 case 'f':
494 case 'n':
495 case 'r':
496 case 't':
497 case 'v':
498 case 'x':
499 case 'X':
500 case '0':
501 case '1':
502 case '2':
503 case '3':
504 case '4':
505 case '5':
506 case '6':
507 case '7':
508 break;
509 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
510 default:
511 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
512 break;
513 #else /* ONLY_STANDARD_ESCAPES */
514 default:
515 /* Accept \x as x for any x */
516 break;
517 #endif /* ONLY_STANDARD_ESCAPES */
518
519 case EOF:
520 as_warn ("End of file in string: '\"' inserted");
521 PUT ('"');
522 continue;
523 }
524 PUT (ch);
525 continue;
526
527 case 7:
528 ch = GET ();
529 state = 5;
530 old_state = 8;
531 if (ch == EOF)
532 goto fromeof;
533 PUT (ch);
534 continue;
535
536 case 8:
537 do
538 ch = GET ();
539 while (ch != '\n' && ch != EOF);
540 if (ch == EOF)
541 goto fromeof;
542 state = 0;
543 PUT (ch);
544 continue;
545 }
546
547 /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
548
549 /* flushchar: */
550 ch = GET ();
551 recycle:
552 if (ch == EOF)
553 {
554 if (state != 0)
555 {
556 as_warn ("end of file not at end of a line; newline inserted");
557 state = 0;
558 PUT ('\n');
559 }
560 goto fromeof;
561 }
562
563 switch (lex[ch])
564 {
565 case LEX_IS_WHITESPACE:
566 if (state == 0)
567 {
568 /* Preserve a single whitespace character at the
569 beginning of a line. */
570 state = 1;
571 PUT (ch);
572 break;
573 }
574 do
575 {
576 ch = GET ();
577 }
578 while (ch != EOF && IS_WHITESPACE (ch));
579 if (ch == EOF)
580 goto fromeof;
581
582 if (IS_COMMENT (ch)
583 || (state == 0 && IS_LINE_COMMENT (ch))
584 || ch == '/'
585 || IS_LINE_SEPARATOR (ch))
586 {
587 /* cpp never outputs a leading space before the #, so
588 try to avoid being confused. */
589 not_cpp_line = 1;
590 goto recycle;
591 }
592
593 /* If we're in state 2 or 11, we've seen a non-white
594 character followed by whitespace. If the next character
595 is ':', this is whitespace after a label name which we
596 normally must ignore. In MRI mode, though, spaces are
597 not permitted between the label and the colon. */
598 if ((state == 2 || state == 11)
599 && lex[ch] == LEX_IS_COLON
600 && ! flag_mri)
601 {
602 state = 1;
603 PUT (ch);
604 break;
605 }
606
607 switch (state)
608 {
609 case 0:
610 state++;
611 goto recycle; /* Punted leading sp */
612 case 1:
613 /* We can arrive here if we leave a leading whitespace
614 character at the beginning of a line. */
615 goto recycle;
616 case 2:
617 state = 3;
618 if (to + 1 < toend)
619 {
620 /* Optimize common case by skipping UNGET/GET. */
621 PUT (' '); /* Sp after opco */
622 goto recycle;
623 }
624 UNGET (ch);
625 PUT (' ');
626 break;
627 case 3:
628 if (flag_mri)
629 {
630 /* In MRI mode, we keep these spaces. */
631 UNGET (ch);
632 PUT (' ');
633 break;
634 }
635 goto recycle; /* Sp in operands */
636 case 9:
637 case 10:
638 if (flag_mri)
639 {
640 /* In MRI mode, we keep these spaces. */
641 state = 3;
642 UNGET (ch);
643 PUT (' ');
644 break;
645 }
646 state = 10; /* Sp after symbol char */
647 goto recycle;
648 case 11:
649 state = 1;
650 UNGET (ch);
651 PUT (' '); /* Sp after label definition. */
652 break;
653 default:
654 BAD_CASE (state);
655 }
656 break;
657
658 case LEX_IS_TWOCHAR_COMMENT_1ST:
659 ch2 = GET ();
660 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
661 {
662 for (;;)
663 {
664 do
665 {
666 ch2 = GET ();
667 if (ch2 != EOF && IS_NEWLINE (ch2))
668 add_newlines++;
669 }
670 while (ch2 != EOF &&
671 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
672
673 while (ch2 != EOF &&
674 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
675 {
676 ch2 = GET ();
677 }
678
679 if (ch2 == EOF
680 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
681 break;
682 UNGET (ch);
683 }
684 if (ch2 == EOF)
685 as_warn ("end of file in multiline comment");
686
687 ch = ' ';
688 goto recycle;
689 }
690 else
691 {
692 if (ch2 != EOF)
693 UNGET (ch2);
694 if (state == 9 || state == 10)
695 state = 3;
696 PUT (ch);
697 }
698 break;
699
700 case LEX_IS_STRINGQUOTE:
701 if (state == 10)
702 {
703 /* Preserve the whitespace in foo "bar" */
704 UNGET (ch);
705 state = 3;
706 PUT (' ');
707
708 /* PUT didn't jump out. We could just break, but we
709 know what will happen, so optimize a bit. */
710 ch = GET ();
711 old_state = 3;
712 }
713 else if (state == 9)
714 old_state = 3;
715 else
716 old_state = state;
717 state = 5;
718 PUT (ch);
719 break;
720
721 #ifndef IEEE_STYLE
722 case LEX_IS_ONECHAR_QUOTE:
723 if (state == 10)
724 {
725 /* Preserve the whitespace in foo 'b' */
726 UNGET (ch);
727 state = 3;
728 PUT (' ');
729 break;
730 }
731 ch = GET ();
732 if (ch == EOF)
733 {
734 as_warn ("end of file after a one-character quote; \\0 inserted");
735 ch = 0;
736 }
737 if (ch == '\\')
738 {
739 ch = GET ();
740 if (ch == EOF)
741 {
742 as_warn ("end of file in escape character");
743 ch = '\\';
744 }
745 else
746 ch = process_escape (ch);
747 }
748 sprintf (out_buf, "%d", (int) (unsigned char) ch);
749
750 /* None of these 'x constants for us. We want 'x'. */
751 if ((ch = GET ()) != '\'')
752 {
753 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
754 as_warn ("Missing close quote: (assumed)");
755 #else
756 if (ch != EOF)
757 UNGET (ch);
758 #endif
759 }
760 if (strlen (out_buf) == 1)
761 {
762 PUT (out_buf[0]);
763 break;
764 }
765 if (state == 9)
766 old_state = 3;
767 else
768 old_state = state;
769 state = -1;
770 out_string = out_buf;
771 PUT (*out_string++);
772 break;
773 #endif
774
775 case LEX_IS_COLON:
776 if (state == 9 || state == 10)
777 state = 3;
778 else if (state != 3)
779 state = 1;
780 PUT (ch);
781 break;
782
783 case LEX_IS_NEWLINE:
784 /* Roll out a bunch of newlines from inside comments, etc. */
785 if (add_newlines)
786 {
787 --add_newlines;
788 UNGET (ch);
789 }
790 /* fall thru into... */
791
792 case LEX_IS_LINE_SEPARATOR:
793 state = 0;
794 PUT (ch);
795 break;
796
797 case LEX_IS_LINE_COMMENT_START:
798 if (state == 0) /* Only comment at start of line. */
799 {
800 /* FIXME-someday: The two character comment stuff was
801 badly thought out. On i386, we want '/' as line
802 comment start AND we want C style comments. hence
803 this hack. The whole lexical process should be
804 reworked. xoxorich. */
805 if (ch == '/')
806 {
807 ch2 = GET ();
808 if (ch2 == '*')
809 {
810 state = -2;
811 break;
812 }
813 else
814 {
815 UNGET (ch2);
816 }
817 } /* bad hack */
818
819 if (ch != '#')
820 not_cpp_line = 1;
821
822 do
823 {
824 ch = GET ();
825 }
826 while (ch != EOF && IS_WHITESPACE (ch));
827 if (ch == EOF)
828 {
829 as_warn ("end of file in comment; newline inserted");
830 PUT ('\n');
831 break;
832 }
833 if (ch < '0' || ch > '9' || not_cpp_line)
834 {
835 /* Non-numerics: Eat whole comment line */
836 while (ch != EOF && !IS_NEWLINE (ch))
837 ch = GET ();
838 if (ch == EOF)
839 as_warn ("EOF in Comment: Newline inserted");
840 state = 0;
841 PUT ('\n');
842 break;
843 }
844 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
845 UNGET (ch);
846 old_state = 4;
847 state = -1;
848 out_string = "\t.appline ";
849 PUT (*out_string++);
850 break;
851 }
852
853 /* We have a line comment character which is not at the
854 start of a line. If this is also a normal comment
855 character, fall through. Otherwise treat it as a default
856 character. */
857 if (strchr (comment_chars, ch) == NULL
858 && (! flag_mri
859 || (ch != '!' && ch != '*')))
860 goto de_fault;
861 if (flag_mri
862 && (ch == '!' || ch == '*')
863 && state != 1
864 && state != 10)
865 goto de_fault;
866 /* Fall through. */
867 case LEX_IS_COMMENT_START:
868 do
869 {
870 ch = GET ();
871 }
872 while (ch != EOF && !IS_NEWLINE (ch));
873 if (ch == EOF)
874 as_warn ("end of file in comment; newline inserted");
875 state = 0;
876 PUT ('\n');
877 break;
878
879 case LEX_IS_SYMBOL_COMPONENT:
880 if (state == 10)
881 {
882 /* This is a symbol character following another symbol
883 character, with whitespace in between. We skipped
884 the whitespace earlier, so output it now. */
885 UNGET (ch);
886 state = 3;
887 PUT (' ');
888 break;
889 }
890
891 if (state == 3)
892 state = 9;
893
894 /* This is a common case. Quickly copy CH and all the
895 following symbol component or normal characters. */
896 if (to + 1 < toend)
897 {
898 char *s;
899 int len;
900
901 for (s = from; s < fromend; s++)
902 {
903 int type;
904
905 ch2 = *s;
906 type = lex[ch2];
907 if (type != 0
908 && type != LEX_IS_SYMBOL_COMPONENT)
909 break;
910 }
911 if (s > from)
912 {
913 /* Handle the last character normally, for
914 simplicity. */
915 --s;
916 }
917 len = s - from;
918 if (len > (toend - to) - 1)
919 len = (toend - to) - 1;
920 if (len > 0)
921 {
922 PUT (ch);
923 if (len > 8)
924 {
925 memcpy (to, from, len);
926 to += len;
927 from += len;
928 }
929 else
930 {
931 switch (len)
932 {
933 case 8: *to++ = *from++;
934 case 7: *to++ = *from++;
935 case 6: *to++ = *from++;
936 case 5: *to++ = *from++;
937 case 4: *to++ = *from++;
938 case 3: *to++ = *from++;
939 case 2: *to++ = *from++;
940 case 1: *to++ = *from++;
941 }
942 }
943 ch = GET ();
944 }
945 }
946
947 /* Fall through. */
948 default:
949 de_fault:
950 /* Some relatively `normal' character. */
951 if (state == 0)
952 {
953 state = 11; /* Now seeing label definition */
954 }
955 else if (state == 1)
956 {
957 state = 2; /* Ditto */
958 }
959 else if (state == 9)
960 {
961 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
962 state = 3;
963 }
964 else if (state == 10)
965 {
966 state = 3;
967 }
968 PUT (ch);
969 break;
970 }
971 }
972
973 /*NOTREACHED*/
974
975 fromeof:
976 /* We have reached the end of the input. */
977 return to - tostart;
978
979 tofull:
980 /* The output buffer is full. Save any input we have not yet
981 processed. */
982 if (fromend > from)
983 {
984 char *save;
985
986 save = (char *) xmalloc (fromend - from);
987 memcpy (save, from, fromend - from);
988 if (saved_input != NULL)
989 free (saved_input);
990 saved_input = save;
991 saved_input_len = fromend - from;
992 }
993 else
994 {
995 if (saved_input != NULL)
996 {
997 free (saved_input);
998 saved_input = NULL;
999 }
1000 }
1001 return to - tostart;
1002 }
1003
1004 /* end of app.c */