re PR libfortran/35863 ([F2003] Implement ENCODING="UTF-8")
[gcc.git] / libgfortran / io / read.c
1 /* Copyright (C) 2002, 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
2 Contributed by Andy Vaught
3 F2003 I/O support contributed by Jerry DeLisle
4
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
19 executable.)
20
21 Libgfortran is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with Libgfortran; see the file COPYING. If not, write to
28 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
29 Boston, MA 02110-1301, USA. */
30
31 #include "io.h"
32 #include <string.h>
33 #include <errno.h>
34 #include <ctype.h>
35 #include <stdlib.h>
36
37 /* read.c -- Deal with formatted reads */
38
39
40 /* set_integer()-- All of the integer assignments come here to
41 * actually place the value into memory. */
42
43 void
44 set_integer (void *dest, GFC_INTEGER_LARGEST value, int length)
45 {
46 switch (length)
47 {
48 #ifdef HAVE_GFC_INTEGER_16
49 case 16:
50 {
51 GFC_INTEGER_16 tmp = value;
52 memcpy (dest, (void *) &tmp, length);
53 }
54 break;
55 #endif
56 case 8:
57 {
58 GFC_INTEGER_8 tmp = value;
59 memcpy (dest, (void *) &tmp, length);
60 }
61 break;
62 case 4:
63 {
64 GFC_INTEGER_4 tmp = value;
65 memcpy (dest, (void *) &tmp, length);
66 }
67 break;
68 case 2:
69 {
70 GFC_INTEGER_2 tmp = value;
71 memcpy (dest, (void *) &tmp, length);
72 }
73 break;
74 case 1:
75 {
76 GFC_INTEGER_1 tmp = value;
77 memcpy (dest, (void *) &tmp, length);
78 }
79 break;
80 default:
81 internal_error (NULL, "Bad integer kind");
82 }
83 }
84
85
86 /* max_value()-- Given a length (kind), return the maximum signed or
87 * unsigned value */
88
89 GFC_UINTEGER_LARGEST
90 max_value (int length, int signed_flag)
91 {
92 GFC_UINTEGER_LARGEST value;
93 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
94 int n;
95 #endif
96
97 switch (length)
98 {
99 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
100 case 16:
101 case 10:
102 value = 1;
103 for (n = 1; n < 4 * length; n++)
104 value = (value << 2) + 3;
105 if (! signed_flag)
106 value = 2*value+1;
107 break;
108 #endif
109 case 8:
110 value = signed_flag ? 0x7fffffffffffffff : 0xffffffffffffffff;
111 break;
112 case 4:
113 value = signed_flag ? 0x7fffffff : 0xffffffff;
114 break;
115 case 2:
116 value = signed_flag ? 0x7fff : 0xffff;
117 break;
118 case 1:
119 value = signed_flag ? 0x7f : 0xff;
120 break;
121 default:
122 internal_error (NULL, "Bad integer kind");
123 }
124
125 return value;
126 }
127
128
129 /* convert_real()-- Convert a character representation of a floating
130 * point number to the machine number. Returns nonzero if there is a
131 * range problem during conversion. TODO: handle not-a-numbers and
132 * infinities. */
133
134 int
135 convert_real (st_parameter_dt *dtp, void *dest, const char *buffer, int length)
136 {
137 errno = 0;
138
139 switch (length)
140 {
141 case 4:
142 {
143 GFC_REAL_4 tmp =
144 #if defined(HAVE_STRTOF)
145 strtof (buffer, NULL);
146 #else
147 (GFC_REAL_4) strtod (buffer, NULL);
148 #endif
149 memcpy (dest, (void *) &tmp, length);
150 }
151 break;
152 case 8:
153 {
154 GFC_REAL_8 tmp = strtod (buffer, NULL);
155 memcpy (dest, (void *) &tmp, length);
156 }
157 break;
158 #if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
159 case 10:
160 {
161 GFC_REAL_10 tmp = strtold (buffer, NULL);
162 memcpy (dest, (void *) &tmp, length);
163 }
164 break;
165 #endif
166 #if defined(HAVE_GFC_REAL_16) && defined (HAVE_STRTOLD)
167 case 16:
168 {
169 GFC_REAL_16 tmp = strtold (buffer, NULL);
170 memcpy (dest, (void *) &tmp, length);
171 }
172 break;
173 #endif
174 default:
175 internal_error (&dtp->common, "Unsupported real kind during IO");
176 }
177
178 if (errno == EINVAL)
179 {
180 generate_error (&dtp->common, LIBERROR_READ_VALUE,
181 "Error during floating point read");
182 next_record (dtp, 1);
183 return 1;
184 }
185
186 return 0;
187 }
188
189
190 /* read_l()-- Read a logical value */
191
192 void
193 read_l (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
194 {
195 char *p;
196 size_t w;
197
198 w = f->u.w;
199
200 p = gfc_alloca (w);
201
202 if (read_block_form (dtp, p, &w) == FAILURE)
203 return;
204
205 while (*p == ' ')
206 {
207 if (--w == 0)
208 goto bad;
209 p++;
210 }
211
212 if (*p == '.')
213 {
214 if (--w == 0)
215 goto bad;
216 p++;
217 }
218
219 switch (*p)
220 {
221 case 't':
222 case 'T':
223 set_integer (dest, (GFC_INTEGER_LARGEST) 1, length);
224 break;
225 case 'f':
226 case 'F':
227 set_integer (dest, (GFC_INTEGER_LARGEST) 0, length);
228 break;
229 default:
230 bad:
231 generate_error (&dtp->common, LIBERROR_READ_VALUE,
232 "Bad value on logical read");
233 next_record (dtp, 1);
234 break;
235 }
236 }
237
238
239 /* read_a()-- Read a character record. This one is pretty easy. */
240
241 void
242 read_a (st_parameter_dt *dtp, const fnode *f, char *p, int length)
243 {
244 char *s;
245 int m, n, wi, status;
246 size_t w;
247
248 wi = f->u.w;
249 if (wi == -1) /* '(A)' edit descriptor */
250 wi = length;
251
252 w = wi;
253
254 s = gfc_alloca (w);
255
256 dtp->u.p.sf_read_comma = 0;
257 status = read_block_form (dtp, s, &w);
258 dtp->u.p.sf_read_comma =
259 dtp->u.p.decimal_status == DECIMAL_COMMA ? 0 : 1;
260 if (status == FAILURE)
261 return;
262 if (w > (size_t) length)
263 s += (w - length);
264
265 m = ((int) w > length) ? length : (int) w;
266 memcpy (p, s, m);
267
268 n = length - w;
269 if (n > 0)
270 memset (p + m, ' ', n);
271 }
272
273 void
274 read_a_char4 (st_parameter_dt *dtp, const fnode *f, char *p, int length)
275 {
276 char *s;
277 gfc_char4_t *dest;
278 int m, n, wi, status;
279 size_t w;
280
281 wi = f->u.w;
282 if (wi == -1) /* '(A)' edit descriptor */
283 wi = length;
284
285 w = wi;
286
287 s = gfc_alloca (w);
288
289 /* Read in w bytes, treating comma as not a separator. */
290 dtp->u.p.sf_read_comma = 0;
291 status = read_block_form (dtp, s, &w);
292 dtp->u.p.sf_read_comma =
293 dtp->u.p.decimal_status == DECIMAL_COMMA ? 0 : 1;
294
295 if (status == FAILURE)
296 return;
297 if (w > (size_t) length)
298 s += (w - length);
299
300 m = ((int) w > length) ? length : (int) w;
301
302 dest = (gfc_char4_t *) p;
303
304 for (n = 0; n < m; n++, dest++, s++)
305 *dest = (unsigned char ) *s;
306
307 for (n = 0; n < length - (int) w; n++, dest++)
308 *dest = (unsigned char) ' ';
309 }
310
311 /* eat_leading_spaces()-- Given a character pointer and a width,
312 * ignore the leading spaces. */
313
314 static char *
315 eat_leading_spaces (int *width, char *p)
316 {
317 for (;;)
318 {
319 if (*width == 0 || *p != ' ')
320 break;
321
322 (*width)--;
323 p++;
324 }
325
326 return p;
327 }
328
329
330 static char
331 next_char (st_parameter_dt *dtp, char **p, int *w)
332 {
333 char c, *q;
334
335 if (*w == 0)
336 return '\0';
337
338 q = *p;
339 c = *q++;
340 *p = q;
341
342 (*w)--;
343
344 if (c != ' ')
345 return c;
346 if (dtp->u.p.blank_status != BLANK_UNSPECIFIED)
347 return ' '; /* return a blank to signal a null */
348
349 /* At this point, the rest of the field has to be trailing blanks */
350
351 while (*w > 0)
352 {
353 if (*q++ != ' ')
354 return '?';
355 (*w)--;
356 }
357
358 *p = q;
359 return '\0';
360 }
361
362
363 /* read_decimal()-- Read a decimal integer value. The values here are
364 * signed values. */
365
366 void
367 read_decimal (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
368 {
369 GFC_UINTEGER_LARGEST value, maxv, maxv_10;
370 GFC_INTEGER_LARGEST v;
371 int w, negative;
372 size_t wu;
373 char c, *p;
374
375 wu = f->u.w;
376
377 p = gfc_alloca (wu);
378
379 if (read_block_form (dtp, p, &wu) == FAILURE)
380 return;
381
382 w = wu;
383
384 p = eat_leading_spaces (&w, p);
385 if (w == 0)
386 {
387 set_integer (dest, (GFC_INTEGER_LARGEST) 0, length);
388 return;
389 }
390
391 maxv = max_value (length, 1);
392 maxv_10 = maxv / 10;
393
394 negative = 0;
395 value = 0;
396
397 switch (*p)
398 {
399 case '-':
400 negative = 1;
401 /* Fall through */
402
403 case '+':
404 p++;
405 if (--w == 0)
406 goto bad;
407 /* Fall through */
408
409 default:
410 break;
411 }
412
413 /* At this point we have a digit-string */
414 value = 0;
415
416 for (;;)
417 {
418 c = next_char (dtp, &p, &w);
419 if (c == '\0')
420 break;
421
422 if (c == ' ')
423 {
424 if (dtp->u.p.blank_status == BLANK_NULL) continue;
425 if (dtp->u.p.blank_status == BLANK_ZERO) c = '0';
426 }
427
428 if (c < '0' || c > '9')
429 goto bad;
430
431 if (value > maxv_10)
432 goto overflow;
433
434 c -= '0';
435 value = 10 * value;
436
437 if (value > maxv - c)
438 goto overflow;
439 value += c;
440 }
441
442 v = value;
443 if (negative)
444 v = -v;
445
446 set_integer (dest, v, length);
447 return;
448
449 bad:
450 generate_error (&dtp->common, LIBERROR_READ_VALUE,
451 "Bad value during integer read");
452 next_record (dtp, 1);
453 return;
454
455 overflow:
456 generate_error (&dtp->common, LIBERROR_READ_OVERFLOW,
457 "Value overflowed during integer read");
458 next_record (dtp, 1);
459
460 }
461
462
463 /* read_radix()-- This function reads values for non-decimal radixes.
464 * The difference here is that we treat the values here as unsigned
465 * values for the purposes of overflow. If minus sign is present and
466 * the top bit is set, the value will be incorrect. */
467
468 void
469 read_radix (st_parameter_dt *dtp, const fnode *f, char *dest, int length,
470 int radix)
471 {
472 GFC_UINTEGER_LARGEST value, maxv, maxv_r;
473 GFC_INTEGER_LARGEST v;
474 int w, negative;
475 char c, *p;
476 size_t wu;
477
478 wu = f->u.w;
479
480 p = gfc_alloca (wu);
481
482 if (read_block_form (dtp, p, &wu) == FAILURE)
483 return;
484
485 w = wu;
486
487 p = eat_leading_spaces (&w, p);
488 if (w == 0)
489 {
490 set_integer (dest, (GFC_INTEGER_LARGEST) 0, length);
491 return;
492 }
493
494 maxv = max_value (length, 0);
495 maxv_r = maxv / radix;
496
497 negative = 0;
498 value = 0;
499
500 switch (*p)
501 {
502 case '-':
503 negative = 1;
504 /* Fall through */
505
506 case '+':
507 p++;
508 if (--w == 0)
509 goto bad;
510 /* Fall through */
511
512 default:
513 break;
514 }
515
516 /* At this point we have a digit-string */
517 value = 0;
518
519 for (;;)
520 {
521 c = next_char (dtp, &p, &w);
522 if (c == '\0')
523 break;
524 if (c == ' ')
525 {
526 if (dtp->u.p.blank_status == BLANK_NULL) continue;
527 if (dtp->u.p.blank_status == BLANK_ZERO) c = '0';
528 }
529
530 switch (radix)
531 {
532 case 2:
533 if (c < '0' || c > '1')
534 goto bad;
535 break;
536
537 case 8:
538 if (c < '0' || c > '7')
539 goto bad;
540 break;
541
542 case 16:
543 switch (c)
544 {
545 case '0':
546 case '1':
547 case '2':
548 case '3':
549 case '4':
550 case '5':
551 case '6':
552 case '7':
553 case '8':
554 case '9':
555 break;
556
557 case 'a':
558 case 'b':
559 case 'c':
560 case 'd':
561 case 'e':
562 case 'f':
563 c = c - 'a' + '9' + 1;
564 break;
565
566 case 'A':
567 case 'B':
568 case 'C':
569 case 'D':
570 case 'E':
571 case 'F':
572 c = c - 'A' + '9' + 1;
573 break;
574
575 default:
576 goto bad;
577 }
578
579 break;
580 }
581
582 if (value > maxv_r)
583 goto overflow;
584
585 c -= '0';
586 value = radix * value;
587
588 if (maxv - c < value)
589 goto overflow;
590 value += c;
591 }
592
593 v = value;
594 if (negative)
595 v = -v;
596
597 set_integer (dest, v, length);
598 return;
599
600 bad:
601 generate_error (&dtp->common, LIBERROR_READ_VALUE,
602 "Bad value during integer read");
603 next_record (dtp, 1);
604 return;
605
606 overflow:
607 generate_error (&dtp->common, LIBERROR_READ_OVERFLOW,
608 "Value overflowed during integer read");
609 next_record (dtp, 1);
610
611 }
612
613
614 /* read_f()-- Read a floating point number with F-style editing, which
615 is what all of the other floating point descriptors behave as. The
616 tricky part is that optional spaces are allowed after an E or D,
617 and the implicit decimal point if a decimal point is not present in
618 the input. */
619
620 void
621 read_f (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
622 {
623 size_t wu;
624 int w, seen_dp, exponent;
625 int exponent_sign, val_sign;
626 int ndigits;
627 int edigits;
628 int i;
629 char *p, *buffer;
630 char *digits;
631 char scratch[SCRATCH_SIZE];
632
633 val_sign = 1;
634 seen_dp = 0;
635 wu = f->u.w;
636
637 p = gfc_alloca (wu);
638
639 if (read_block_form (dtp, p, &wu) == FAILURE)
640 return;
641
642 w = wu;
643
644 p = eat_leading_spaces (&w, p);
645 if (w == 0)
646 goto zero;
647
648 /* Optional sign */
649
650 if (*p == '-' || *p == '+')
651 {
652 if (*p == '-')
653 val_sign = -1;
654 p++;
655 w--;
656 }
657
658 exponent_sign = 1;
659 p = eat_leading_spaces (&w, p);
660 if (w == 0)
661 goto zero;
662
663 /* A digit, a '.' or a exponent character ('e', 'E', 'd' or 'D')
664 is required at this point */
665
666 if (!isdigit (*p) && *p != '.' && *p != ',' && *p != 'd' && *p != 'D'
667 && *p != 'e' && *p != 'E')
668 goto bad_float;
669
670 /* Remember the position of the first digit. */
671 digits = p;
672 ndigits = 0;
673
674 /* Scan through the string to find the exponent. */
675 while (w > 0)
676 {
677 switch (*p)
678 {
679 case ',':
680 if (dtp->u.p.decimal_status == DECIMAL_COMMA && *p == ',')
681 *p = '.';
682 /* Fall through */
683 case '.':
684 if (seen_dp)
685 goto bad_float;
686 seen_dp = 1;
687 /* Fall through */
688
689 case '0':
690 case '1':
691 case '2':
692 case '3':
693 case '4':
694 case '5':
695 case '6':
696 case '7':
697 case '8':
698 case '9':
699 case ' ':
700 ndigits++;
701 p++;
702 w--;
703 break;
704
705 case '-':
706 exponent_sign = -1;
707 /* Fall through */
708
709 case '+':
710 p++;
711 w--;
712 goto exp2;
713
714 case 'd':
715 case 'e':
716 case 'D':
717 case 'E':
718 p++;
719 w--;
720 goto exp1;
721
722 default:
723 goto bad_float;
724 }
725 }
726
727 /* No exponent has been seen, so we use the current scale factor */
728 exponent = -dtp->u.p.scale_factor;
729 goto done;
730
731 bad_float:
732 generate_error (&dtp->common, LIBERROR_READ_VALUE,
733 "Bad value during floating point read");
734 next_record (dtp, 1);
735 return;
736
737 /* The value read is zero */
738 zero:
739 switch (length)
740 {
741 case 4:
742 *((GFC_REAL_4 *) dest) = 0;
743 break;
744
745 case 8:
746 *((GFC_REAL_8 *) dest) = 0;
747 break;
748
749 #ifdef HAVE_GFC_REAL_10
750 case 10:
751 *((GFC_REAL_10 *) dest) = 0;
752 break;
753 #endif
754
755 #ifdef HAVE_GFC_REAL_16
756 case 16:
757 *((GFC_REAL_16 *) dest) = 0;
758 break;
759 #endif
760
761 default:
762 internal_error (&dtp->common, "Unsupported real kind during IO");
763 }
764 return;
765
766 /* At this point the start of an exponent has been found */
767 exp1:
768 while (w > 0 && *p == ' ')
769 {
770 w--;
771 p++;
772 }
773
774 switch (*p)
775 {
776 case '-':
777 exponent_sign = -1;
778 /* Fall through */
779
780 case '+':
781 p++;
782 w--;
783 break;
784 }
785
786 if (w == 0)
787 goto bad_float;
788
789 /* At this point a digit string is required. We calculate the value
790 of the exponent in order to take account of the scale factor and
791 the d parameter before explict conversion takes place. */
792 exp2:
793 if (!isdigit (*p))
794 goto bad_float;
795
796 exponent = *p - '0';
797 p++;
798 w--;
799
800 if (dtp->u.p.blank_status == BLANK_UNSPECIFIED) /* Normal processing of exponent */
801 {
802 while (w > 0 && isdigit (*p))
803 {
804 exponent = 10 * exponent + *p - '0';
805 p++;
806 w--;
807 }
808
809 /* Only allow trailing blanks */
810
811 while (w > 0)
812 {
813 if (*p != ' ')
814 goto bad_float;
815 p++;
816 w--;
817 }
818 }
819 else /* BZ or BN status is enabled */
820 {
821 while (w > 0)
822 {
823 if (*p == ' ')
824 {
825 if (dtp->u.p.blank_status == BLANK_ZERO) *p = '0';
826 if (dtp->u.p.blank_status == BLANK_NULL)
827 {
828 p++;
829 w--;
830 continue;
831 }
832 }
833 else if (!isdigit (*p))
834 goto bad_float;
835
836 exponent = 10 * exponent + *p - '0';
837 p++;
838 w--;
839 }
840 }
841
842 exponent = exponent * exponent_sign;
843
844 done:
845 /* Use the precision specified in the format if no decimal point has been
846 seen. */
847 if (!seen_dp)
848 exponent -= f->u.real.d;
849
850 if (exponent > 0)
851 {
852 edigits = 2;
853 i = exponent;
854 }
855 else
856 {
857 edigits = 3;
858 i = -exponent;
859 }
860
861 while (i >= 10)
862 {
863 i /= 10;
864 edigits++;
865 }
866
867 i = ndigits + edigits + 1;
868 if (val_sign < 0)
869 i++;
870
871 if (i < SCRATCH_SIZE)
872 buffer = scratch;
873 else
874 buffer = get_mem (i);
875
876 /* Reformat the string into a temporary buffer. As we're using atof it's
877 easiest to just leave the decimal point in place. */
878 p = buffer;
879 if (val_sign < 0)
880 *(p++) = '-';
881 for (; ndigits > 0; ndigits--)
882 {
883 if (*digits == ' ')
884 {
885 if (dtp->u.p.blank_status == BLANK_ZERO) *digits = '0';
886 if (dtp->u.p.blank_status == BLANK_NULL)
887 {
888 digits++;
889 continue;
890 }
891 }
892 *p = *digits;
893 p++;
894 digits++;
895 }
896 *(p++) = 'e';
897 sprintf (p, "%d", exponent);
898
899 /* Do the actual conversion. */
900 convert_real (dtp, dest, buffer, length);
901
902 if (buffer != scratch)
903 free_mem (buffer);
904
905 }
906
907
908 /* read_x()-- Deal with the X/TR descriptor. We just read some data
909 * and never look at it. */
910
911 void
912 read_x (st_parameter_dt * dtp, int n)
913 {
914 if ((dtp->u.p.pad_status == PAD_NO || is_internal_unit (dtp))
915 && dtp->u.p.current_unit->bytes_left < n)
916 n = dtp->u.p.current_unit->bytes_left;
917
918 dtp->u.p.sf_read_comma = 0;
919 if (n > 0)
920 read_sf (dtp, &n, 1);
921 dtp->u.p.sf_read_comma = 1;
922 dtp->u.p.current_unit->strm_pos += (gfc_offset) n;
923 }
924