ignored by import
[gcc.git] / libio / iovfscanf.c
1 /*
2 Copyright (C) 1993 Free Software Foundation
3
4 This file is part of the GNU IO Library. This library is free
5 software; you can redistribute it and/or modify it under the
6 terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this library; see the file COPYING. If not, write to the Free
17 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
19 As a special exception, if you link this library with files
20 compiled with a GNU compiler to produce an executable, this does not cause
21 the resulting executable to be covered by the GNU General Public License.
22 This exception does not however invalidate any other reasons why
23 the executable file might be covered by the GNU General Public License. */
24
25 /*
26 * Copyright (c) 1990 The Regents of the University of California.
27 * All rights reserved.
28 *
29 * Redistribution and use in source and binary forms are permitted
30 * provided that the above copyright notice and this paragraph are
31 * duplicated in all such forms and that any documentation,
32 * advertising materials, and other materials related to such
33 * distribution and use acknowledge that the software was developed
34 * by the University of California, Berkeley. The name of the
35 * University may not be used to endorse or promote products derived
36 * from this software without specific prior written permission.
37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
38 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
39 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
40 */
41
42 /* Extensively hacked for GNU iostream by Per Bothner 1991, 1992, 1993.
43 Changes copyright Free Software Foundation 1992, 1993. */
44
45 #if defined(LIBC_SCCS) && !defined(lint)
46 static char sccsid[] = "%W% (Berkeley) %G%";
47 #endif /* LIBC_SCCS and not lint */
48
49 #include <libioP.h>
50 #include <ctype.h>
51 #ifdef __STDC__
52 #include <stdarg.h>
53 #else
54 #include <varargs.h>
55 #endif
56
57 #ifndef NO_FLOATING_POINT
58 #define FLOATING_POINT
59 #endif
60
61 #ifdef FLOATING_POINT
62 #include "floatio.h"
63 #define BUF (MAXEXP+MAXFRACT+3) /* 3 = sign + decimal point + NUL */
64 #else
65 #define BUF 40
66 #endif
67
68 /*
69 * Flags used during conversion.
70 */
71 #define LONG 0x01 /* l: long or double */
72 #define LONGDBL 0x02 /* L: long double; unimplemented */
73 #define SHORT 0x04 /* h: short */
74 #define SUPPRESS 0x08 /* suppress assignment */
75 #define POINTER 0x10 /* weird %p pointer (`fake hex') */
76 #define NOSKIP 0x20 /* do not skip blanks */
77 #define WIDTH 0x40 /* width */
78
79 /*
80 * The following are used in numeric conversions only:
81 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
82 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
83 */
84 #define SIGNOK 0x40 /* +/- is (still) legal */
85 #define NDIGITS 0x80 /* no digits detected */
86
87 #define DPTOK 0x100 /* (float) decimal point is still legal */
88 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
89
90 #define PFXOK 0x100 /* 0x prefix is (still) legal */
91 #define NZDIGITS 0x200 /* no zero digits detected */
92
93 /*
94 * Conversion types.
95 */
96 #define CT_CHAR 0 /* %c conversion */
97 #define CT_CCL 1 /* %[...] conversion */
98 #define CT_STRING 2 /* %s conversion */
99 #define CT_INT 3 /* integer, i.e., strtol or strtoul */
100 #define CT_FLOAT 4 /* floating, i.e., strtod */
101
102 #define u_char unsigned char
103 #define u_long unsigned long
104
105 #ifdef __cplusplus
106 extern "C" {
107 #endif
108 extern u_long strtoul __P((const char*, char**, int));
109 extern long strtol __P((const char*, char**, int));
110 static const u_char *__sccl __P((char *tab, const u_char *fmt));
111 #ifndef _IO_USE_DTOA
112 extern double atof();
113 #endif
114 #ifdef __cplusplus
115 }
116 #endif
117
118 /* If errp != NULL, *errp|=1 if we see a premature EOF;
119 *errp|=2 if we an invalid character. */
120
121 int
122 DEFUN(_IO_vfscanf, (fp, fmt0, ap, errp),
123 register _IO_FILE *fp AND char const *fmt0
124 AND _IO_va_list ap AND int *errp)
125 {
126 register const u_char *fmt = (const u_char *)fmt0;
127 register int c; /* character from format, or conversion */
128 register _IO_ssize_t width; /* field width, or 0 */
129 register char *p; /* points into all kinds of strings */
130 register int n; /* handy integer */
131 register int flags = 0; /* flags as defined above */
132 register char *p0; /* saves original value of p when necessary */
133 int nassigned; /* number of fields assigned */
134 int nread; /* number of characters consumed from fp */
135 /* Assignments to base and ccfn are just to suppress warnings from gcc.*/
136 int base = 0; /* base argument to strtol/strtoul */
137 typedef u_long (*strtoulfn) __P((const char*, char**, int));
138 strtoulfn ccfn = 0;
139 /* conversion function (strtol/strtoul) */
140 char ccltab[256]; /* character class table for %[...] */
141 char buf[BUF]; /* buffer for numeric conversions */
142 int seen_eof = 0;
143
144 /* `basefix' is used to avoid `if' tests in the integer scanner */
145 static short basefix[17] =
146 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
147
148 nassigned = 0;
149 nread = 0;
150 for (;;) {
151 c = *fmt++;
152 if (c == 0)
153 goto done;
154 if (isspace(c)) {
155 for (;;) {
156 c = _IO_getc(fp);
157 if (c == EOF) {
158 seen_eof++;
159 break;
160 }
161 if (!isspace(c)) {
162 _IO_ungetc (c, fp);
163 break;
164 }
165 nread++;
166 }
167 continue;
168 }
169 if (c != '%')
170 goto literal;
171 width = 0;
172 flags = 0;
173 /*
174 * switch on the format. continue if done;
175 * break once format type is derived.
176 */
177 again: c = *fmt++;
178 switch (c) {
179 case '%':
180 literal:
181 n = _IO_getc(fp);
182 if (n == EOF)
183 goto eof_failure;
184 if (n != c) {
185 _IO_ungetc (n, fp);
186 goto match_failure;
187 }
188 nread++;
189 continue;
190
191 case '*':
192 if (flags) goto control_failure;
193 flags = SUPPRESS;
194 goto again;
195 case 'l':
196 if (flags & ~(SUPPRESS | WIDTH)) goto control_failure;
197 flags |= LONG;
198 goto again;
199 case 'L':
200 if (flags & ~(SUPPRESS | WIDTH)) goto control_failure;
201 flags |= LONGDBL;
202 goto again;
203 case 'h':
204 if (flags & ~(SUPPRESS | WIDTH)) goto control_failure;
205 flags |= SHORT;
206 goto again;
207
208 case '0': case '1': case '2': case '3': case '4':
209 case '5': case '6': case '7': case '8': case '9':
210 if (flags & ~(SUPPRESS | WIDTH)) goto control_failure;
211 flags |= WIDTH;
212 width = width * 10 + c - '0';
213 goto again;
214
215 /*
216 * Conversions.
217 * Those marked `compat' are for 4.[123]BSD compatibility.
218 *
219 * (According to ANSI, E and X formats are supposed
220 * to the same as e and x. Sorry about that.)
221 */
222 case 'D': /* compat */
223 flags |= LONG;
224 /* FALLTHROUGH */
225 case 'd':
226 c = CT_INT;
227 ccfn = (strtoulfn)strtol;
228 base = 10;
229 break;
230
231 case 'i':
232 c = CT_INT;
233 ccfn = (strtoulfn)strtol;
234 base = 0;
235 break;
236
237 case 'O': /* compat */
238 flags |= LONG;
239 /* FALLTHROUGH */
240 case 'o':
241 c = CT_INT;
242 ccfn = strtoul;
243 base = 8;
244 break;
245
246 case 'u':
247 c = CT_INT;
248 ccfn = strtoul;
249 base = 10;
250 break;
251
252 case 'X':
253 case 'x':
254 flags |= PFXOK; /* enable 0x prefixing */
255 c = CT_INT;
256 ccfn = strtoul;
257 base = 16;
258 break;
259
260 #ifdef FLOATING_POINT
261 case 'E': case 'F':
262 case 'e': case 'f': case 'g':
263 c = CT_FLOAT;
264 break;
265 #endif
266
267 case 's':
268 c = CT_STRING;
269 break;
270
271 case '[':
272 fmt = __sccl(ccltab, fmt);
273 flags |= NOSKIP;
274 c = CT_CCL;
275 break;
276
277 case 'c':
278 flags |= NOSKIP;
279 c = CT_CHAR;
280 break;
281
282 case 'p': /* pointer format is like hex */
283 flags |= POINTER | PFXOK;
284 c = CT_INT;
285 ccfn = strtoul;
286 base = 16;
287 break;
288
289 case 'n':
290 if (flags & SUPPRESS) /* ??? */
291 continue;
292 if (flags & SHORT)
293 *va_arg(ap, short *) = nread;
294 else if (flags & LONG)
295 *va_arg(ap, long *) = nread;
296 else
297 *va_arg(ap, int *) = nread;
298 continue;
299
300 /*
301 * Disgusting backwards compatibility hacks. XXX
302 */
303 case '\0': /* compat */
304 nassigned = EOF;
305 goto done;
306
307 default: /* compat */
308 if (isupper(c))
309 flags |= LONG;
310 c = CT_INT;
311 ccfn = (strtoulfn)strtol;
312 base = 10;
313 break;
314 }
315
316 /*
317 * We have a conversion that requires input.
318 */
319 if (_IO_peekc(fp) == EOF)
320 goto eof_failure;
321
322 /*
323 * Consume leading white space, except for formats
324 * that suppress this.
325 */
326 if ((flags & NOSKIP) == 0) {
327 n = (unsigned char)*fp->_IO_read_ptr;
328 while (isspace(n)) {
329 fp->_IO_read_ptr++;
330 nread++;
331 n = _IO_peekc(fp);
332 if (n == EOF)
333 goto eof_failure;
334 }
335 /* Note that there is at least one character in
336 the buffer, so conversions that do not set NOSKIP
337 can no longer result in an input failure. */
338 }
339
340 /*
341 * Do the conversion.
342 */
343 switch (c) {
344
345 case CT_CHAR:
346 /* scan arbitrary characters (sets NOSKIP) */
347 if (width == 0) /* FIXME! */
348 width = 1;
349 if (flags & SUPPRESS) {
350 _IO_size_t sum = 0;
351 for (;;) {
352 n = fp->_IO_read_end - fp->_IO_read_ptr;
353 if (n < (int)width) {
354 sum += n;
355 width -= n;
356 fp->_IO_read_ptr += n;
357 if (__underflow(fp) == EOF)
358 if (sum == 0)
359 goto eof_failure;
360 else {
361 seen_eof++;
362 break;
363 }
364 } else {
365 sum += width;
366 fp->_IO_read_ptr += width;
367 break;
368 }
369 }
370 nread += sum;
371 } else {
372 _IO_size_t r =
373
374 _IO_XSGETN (fp, (char*)va_arg(ap, char*), width);
375 if (r != width)
376 goto eof_failure;
377 nread += r;
378 nassigned++;
379 }
380 break;
381
382 case CT_CCL:
383 /* scan a (nonempty) character class (sets NOSKIP) */
384 if (width == 0)
385 width = ~0; /* `infinity' */
386 /* take only those things in the class */
387 if (flags & SUPPRESS) {
388 n = 0;
389 while (ccltab[(unsigned char)*fp->_IO_read_ptr]) {
390 n++, fp->_IO_read_ptr++;
391 if (--width == 0)
392 break;
393 if (_IO_peekc(fp) == EOF) {
394 if (n == 0)
395 goto eof_failure;
396 seen_eof++;
397 break;
398 }
399 }
400 if (n == 0)
401 goto match_failure;
402 } else {
403 p0 = p = va_arg(ap, char *);
404 while (ccltab[(unsigned char)*fp->_IO_read_ptr]) {
405 *p++ = *fp->_IO_read_ptr++;
406 if (--width == 0)
407 break;
408 if (_IO_peekc(fp) == EOF) {
409 if (p == p0)
410 goto eof_failure;
411 seen_eof++;
412 break;
413 }
414 }
415 n = p - p0;
416 if (n == 0)
417 goto match_failure;
418 *p = 0;
419 nassigned++;
420 }
421 nread += n;
422 break;
423
424 case CT_STRING:
425 /* like CCL, but zero-length string OK, & no NOSKIP */
426 if (width == 0)
427 width = ~0;
428 if (flags & SUPPRESS) {
429 n = 0;
430 while (!isspace((unsigned char)*fp->_IO_read_ptr)) {
431 n++, fp->_IO_read_ptr++;
432 if (--width == 0)
433 break;
434 if (_IO_peekc(fp) == EOF) {
435 seen_eof++;
436 break;
437 }
438 }
439 nread += n;
440 } else {
441 p0 = p = va_arg(ap, char *);
442 while (!isspace((unsigned char)*fp->_IO_read_ptr)) {
443 *p++ = *fp->_IO_read_ptr++;
444 if (--width == 0)
445 break;
446 if (_IO_peekc(fp) == EOF) {
447 seen_eof++;
448 break;
449 }
450 }
451 *p = 0;
452 nread += p - p0;
453 nassigned++;
454 }
455 continue;
456
457 case CT_INT:
458 /* scan an integer as if by strtol/strtoul */
459 if (width == 0 || width > sizeof(buf) - 1)
460 width = sizeof(buf) - 1;
461 flags |= SIGNOK | NDIGITS | NZDIGITS;
462 for (p = buf; width; width--) {
463 c = (unsigned char)*fp->_IO_read_ptr;
464 /*
465 * Switch on the character; `goto ok'
466 * if we accept it as a part of number.
467 */
468 switch (c) {
469
470 /*
471 * The digit 0 is always legal, but is
472 * special. For %i conversions, if no
473 * digits (zero or nonzero) have been
474 * scanned (only signs), we will have
475 * base==0. In that case, we should set
476 * it to 8 and enable 0x prefixing.
477 * Also, if we have not scanned zero digits
478 * before this, do not turn off prefixing
479 * (someone else will turn it off if we
480 * have scanned any nonzero digits).
481 */
482 case '0':
483 if (base == 0) {
484 base = 8;
485 flags |= PFXOK;
486 }
487 if (flags & NZDIGITS)
488 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
489 else
490 flags &= ~(SIGNOK|PFXOK|NDIGITS);
491 goto ok;
492
493 /* 1 through 7 always legal */
494 case '1': case '2': case '3':
495 case '4': case '5': case '6': case '7':
496 base = basefix[base];
497 flags &= ~(SIGNOK | PFXOK | NDIGITS);
498 goto ok;
499
500 /* digits 8 and 9 ok iff decimal or hex */
501 case '8': case '9':
502 base = basefix[base];
503 if (base <= 8)
504 break; /* not legal here */
505 flags &= ~(SIGNOK | PFXOK | NDIGITS);
506 goto ok;
507
508 /* letters ok iff hex */
509 case 'A': case 'B': case 'C':
510 case 'D': case 'E': case 'F':
511 case 'a': case 'b': case 'c':
512 case 'd': case 'e': case 'f':
513 /* no need to fix base here */
514 if (base <= 10)
515 break; /* not legal here */
516 flags &= ~(SIGNOK | PFXOK | NDIGITS);
517 goto ok;
518
519 /* sign ok only as first character */
520 case '+': case '-':
521 if (flags & SIGNOK) {
522 flags &= ~SIGNOK;
523 goto ok;
524 }
525 break;
526
527 /* x ok iff flag still set & 2nd char */
528 case 'x': case 'X':
529 if (flags & PFXOK && p == buf + 1) {
530 base = 16; /* if %i */
531 flags &= ~PFXOK;
532 goto ok;
533 }
534 break;
535 }
536
537 /*
538 * If we got here, c is not a legal character
539 * for a number. Stop accumulating digits.
540 */
541 break;
542 ok:
543 /*
544 * c is legal: store it and look at the next.
545 */
546 *p++ = c;
547 fp->_IO_read_ptr++;
548 if (_IO_peekc(fp) == EOF) {
549 seen_eof++;
550 break; /* EOF */
551 }
552 }
553 /*
554 * If we had only a sign, it is no good; push
555 * back the sign. If the number ends in `x',
556 * it was [sign] '0' 'x', so push back the x
557 * and treat it as [sign] '0'.
558 */
559 if (flags & NDIGITS) {
560 if (p > buf)
561 (void) _IO_ungetc(*(u_char *)--p, fp);
562 goto match_failure;
563 }
564 c = ((u_char *)p)[-1];
565 if (c == 'x' || c == 'X') {
566 --p;
567 (void) _IO_ungetc (c, fp);
568 }
569 if ((flags & SUPPRESS) == 0) {
570 u_long res;
571
572 *p = 0;
573 res = (*ccfn)(buf, (char **)NULL, base);
574 if (flags & POINTER)
575 *va_arg(ap, void **) = (void *)res;
576 else if (flags & SHORT)
577 *va_arg(ap, short *) = res;
578 else if (flags & LONG)
579 *va_arg(ap, long *) = res;
580 else
581 *va_arg(ap, int *) = res;
582 nassigned++;
583 }
584 nread += p - buf;
585 break;
586
587 #ifdef FLOATING_POINT
588 case CT_FLOAT:
589 /* scan a floating point number as if by strtod */
590 if (width == 0 || width > sizeof(buf) - 1)
591 width = sizeof(buf) - 1;
592 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
593 for (p = buf; width; width--) {
594 c = (unsigned char)*fp->_IO_read_ptr;
595 /*
596 * This code mimicks the integer conversion
597 * code, but is much simpler.
598 */
599 switch (c) {
600
601 case '0': case '1': case '2': case '3':
602 case '4': case '5': case '6': case '7':
603 case '8': case '9':
604 flags &= ~(SIGNOK | NDIGITS);
605 goto fok;
606
607 case '+': case '-':
608 if (flags & SIGNOK) {
609 flags &= ~SIGNOK;
610 goto fok;
611 }
612 break;
613 case '.':
614 if (flags & DPTOK) {
615 flags &= ~(SIGNOK | DPTOK);
616 goto fok;
617 }
618 break;
619 case 'e': case 'E':
620 /* no exponent without some digits */
621 if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
622 flags =
623 (flags & ~(EXPOK|DPTOK)) |
624 SIGNOK | NDIGITS;
625 goto fok;
626 }
627 break;
628 }
629 break;
630 fok:
631 *p++ = c;
632 fp->_IO_read_ptr++;
633 if (_IO_peekc(fp) == EOF) {
634 seen_eof++;
635 break; /* EOF */
636 }
637 }
638 /*
639 * If no digits, might be missing exponent digits
640 * (just give back the exponent) or might be missing
641 * regular digits, but had sign and/or decimal point.
642 */
643 if (flags & NDIGITS) {
644 if (flags & EXPOK) {
645 /* no digits at all */
646 while (p > buf)
647 _IO_ungetc (*(u_char *)--p, fp);
648 goto match_failure;
649 }
650 /* just a bad exponent (e and maybe sign) */
651 c = *(u_char *)--p;
652 if (c != 'e' && c != 'E') {
653 (void) _IO_ungetc (c, fp);/* sign */
654 c = *(u_char *)--p;
655 }
656 (void) _IO_ungetc (c, fp);
657 }
658 if ((flags & SUPPRESS) == 0) {
659 double res;
660 *p = 0;
661 #ifdef _IO_USE_DTOA
662 res = _IO_strtod(buf, NULL);
663 #else
664 res = atof(buf);
665 #endif
666 if (flags & LONG)
667 *va_arg(ap, double *) = res;
668 else
669 *va_arg(ap, float *) = res;
670 nassigned++;
671 }
672 nread += p - buf;
673 break;
674 #endif /* FLOATING_POINT */
675 }
676 }
677 eof_failure:
678 seen_eof++;
679 input_failure:
680 if (nassigned == 0)
681 nassigned = -1;
682 control_failure:
683 match_failure:
684 if (errp)
685 *errp |= 2;
686 done:
687 if (errp && seen_eof)
688 *errp |= 1;
689 return (nassigned);
690 }
691
692 /*
693 * Fill in the given table from the scanset at the given format
694 * (just after `['). Return a pointer to the character past the
695 * closing `]'. The table has a 1 wherever characters should be
696 * considered part of the scanset.
697 */
698 static const u_char *
699 DEFUN(__sccl, (tab, fmt),
700 register char *tab AND register const u_char *fmt)
701 {
702 register int c, n, v;
703
704 /* first `clear' the whole table */
705 c = *fmt++; /* first char hat => negated scanset */
706 if (c == '^') {
707 v = 1; /* default => accept */
708 c = *fmt++; /* get new first char */
709 } else
710 v = 0; /* default => reject */
711 /* should probably use memset here */
712 for (n = 0; n < 256; n++)
713 tab[n] = v;
714 if (c == 0)
715 return (fmt - 1);/* format ended before closing ] */
716
717 /*
718 * Now set the entries corresponding to the actual scanset
719 * to the opposite of the above.
720 *
721 * The first character may be ']' (or '-') without being special;
722 * the last character may be '-'.
723 */
724 v = 1 - v;
725 for (;;) {
726 tab[c] = v; /* take character c */
727 doswitch:
728 n = *fmt++; /* and examine the next */
729 switch (n) {
730
731 case 0: /* format ended too soon */
732 return (fmt - 1);
733
734 case '-':
735 /*
736 * A scanset of the form
737 * [01+-]
738 * is defined as `the digit 0, the digit 1,
739 * the character +, the character -', but
740 * the effect of a scanset such as
741 * [a-zA-Z0-9]
742 * is implementation defined. The V7 Unix
743 * scanf treats `a-z' as `the letters a through
744 * z', but treats `a-a' as `the letter a, the
745 * character -, and the letter a'.
746 *
747 * For compatibility, the `-' is not considerd
748 * to define a range if the character following
749 * it is either a close bracket (required by ANSI)
750 * or is not numerically greater than the character
751 * we just stored in the table (c).
752 */
753 n = *fmt;
754 if (n == ']' || n < c) {
755 c = '-';
756 break; /* resume the for(;;) */
757 }
758 fmt++;
759 do { /* fill in the range */
760 tab[++c] = v;
761 } while (c < n);
762 #if 1 /* XXX another disgusting compatibility hack */
763 /*
764 * Alas, the V7 Unix scanf also treats formats
765 * such as [a-c-e] as `the letters a through e'.
766 * This too is permitted by the standard....
767 */
768 goto doswitch;
769 #else
770 c = *fmt++;
771 if (c == 0)
772 return (fmt - 1);
773 if (c == ']')
774 return (fmt);
775 #endif
776 break;
777
778 case ']': /* end of scanset */
779 return (fmt);
780
781 default: /* just another character */
782 c = n;
783 break;
784 }
785 }
786 /* NOTREACHED */
787 }