1 /* Copyright (C) 2002-2013 Free Software Foundation, Inc.
2 Contributed by Andy Vaught
3 F2003 I/O support contributed by Jerry DeLisle
5 This file is part of the GNU Fortran runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
36 typedef unsigned char uchar
;
38 /* read.c -- Deal with formatted reads */
41 /* set_integer()-- All of the integer assignments come here to
42 actually place the value into memory. */
45 set_integer (void *dest
, GFC_INTEGER_LARGEST value
, int length
)
49 #ifdef HAVE_GFC_INTEGER_16
50 /* length=10 comes about for kind=10 real/complex BOZ, cf. PR41711. */
54 GFC_INTEGER_16 tmp
= value
;
55 memcpy (dest
, (void *) &tmp
, length
);
61 GFC_INTEGER_8 tmp
= value
;
62 memcpy (dest
, (void *) &tmp
, length
);
67 GFC_INTEGER_4 tmp
= value
;
68 memcpy (dest
, (void *) &tmp
, length
);
73 GFC_INTEGER_2 tmp
= value
;
74 memcpy (dest
, (void *) &tmp
, length
);
79 GFC_INTEGER_1 tmp
= value
;
80 memcpy (dest
, (void *) &tmp
, length
);
84 internal_error (NULL
, "Bad integer kind");
89 /* Max signed value of size give by length argument. */
94 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
95 GFC_UINTEGER_LARGEST value
;
100 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
104 for (int n
= 1; n
< 4 * length
; n
++)
105 value
= (value
<< 2) + 3;
109 return GFC_INTEGER_8_HUGE
;
111 return GFC_INTEGER_4_HUGE
;
113 return GFC_INTEGER_2_HUGE
;
115 return GFC_INTEGER_1_HUGE
;
117 internal_error (NULL
, "Bad integer kind");
122 /* convert_real()-- Convert a character representation of a floating
123 point number to the machine number. Returns nonzero if there is an
124 invalid input. Note: many architectures (e.g. IA-64, HP-PA)
125 require that the storage pointed to by the dest argument is
126 properly aligned for the type in question. */
129 convert_real (st_parameter_dt
*dtp
, void *dest
, const char *buffer
, int length
)
136 *((GFC_REAL_4
*) dest
) =
137 #if defined(HAVE_STRTOF)
138 gfc_strtof (buffer
, &endptr
);
140 (GFC_REAL_4
) gfc_strtod (buffer
, &endptr
);
145 *((GFC_REAL_8
*) dest
) = gfc_strtod (buffer
, &endptr
);
148 #if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
150 *((GFC_REAL_10
*) dest
) = gfc_strtold (buffer
, &endptr
);
154 #if defined(HAVE_GFC_REAL_16)
155 # if defined(GFC_REAL_16_IS_FLOAT128)
157 *((GFC_REAL_16
*) dest
) = __qmath_(strtoflt128
) (buffer
, &endptr
);
159 # elif defined(HAVE_STRTOLD)
161 *((GFC_REAL_16
*) dest
) = gfc_strtold (buffer
, &endptr
);
167 internal_error (&dtp
->common
, "Unsupported real kind during IO");
170 if (buffer
== endptr
)
172 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
173 "Error during floating point read");
174 next_record (dtp
, 1);
181 /* convert_infnan()-- Convert character INF/NAN representation to the
182 machine number. Note: many architectures (e.g. IA-64, HP-PA) require
183 that the storage pointed to by the dest argument is properly aligned
184 for the type in question. */
187 convert_infnan (st_parameter_dt
*dtp
, void *dest
, const char *buffer
,
190 const char *s
= buffer
;
191 int is_inf
, plus
= 1;
207 *((GFC_REAL_4
*) dest
) = plus
? __builtin_inff () : -__builtin_inff ();
209 *((GFC_REAL_4
*) dest
) = plus
? __builtin_nanf ("") : -__builtin_nanf ("");
214 *((GFC_REAL_8
*) dest
) = plus
? __builtin_inf () : -__builtin_inf ();
216 *((GFC_REAL_8
*) dest
) = plus
? __builtin_nan ("") : -__builtin_nan ("");
219 #if defined(HAVE_GFC_REAL_10)
222 *((GFC_REAL_10
*) dest
) = plus
? __builtin_infl () : -__builtin_infl ();
224 *((GFC_REAL_10
*) dest
) = plus
? __builtin_nanl ("") : -__builtin_nanl ("");
228 #if defined(HAVE_GFC_REAL_16)
229 # if defined(GFC_REAL_16_IS_FLOAT128)
231 *((GFC_REAL_16
*) dest
) = __qmath_(strtoflt128
) (buffer
, NULL
);
236 *((GFC_REAL_16
*) dest
) = plus
? __builtin_infl () : -__builtin_infl ();
238 *((GFC_REAL_16
*) dest
) = plus
? __builtin_nanl ("") : -__builtin_nanl ("");
244 internal_error (&dtp
->common
, "Unsupported real kind during IO");
251 /* read_l()-- Read a logical value */
254 read_l (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
261 p
= read_block_form (dtp
, &w
);
284 set_integer (dest
, (GFC_INTEGER_LARGEST
) 1, length
);
288 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
292 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
293 "Bad value on logical read");
294 next_record (dtp
, 1);
301 read_utf8 (st_parameter_dt
*dtp
, int *nbytes
)
303 static const uchar masks
[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
304 static const uchar patns
[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
311 s
= read_block_form (dtp
, nbytes
);
315 /* If this is a short read, just return. */
323 /* The number of leading 1-bits in the first byte indicates how many
325 for (nb
= 2; nb
< 7; nb
++)
326 if ((c
& ~masks
[nb
-1]) == patns
[nb
-1])
331 c
= (c
& masks
[nb
-1]);
334 s
= read_block_form (dtp
, &nread
);
337 /* Decode the bytes read. */
338 for (i
= 1; i
< nb
; i
++)
340 gfc_char4_t n
= *s
++;
342 if ((n
& 0xC0) != 0x80)
345 c
= ((c
<< 6) + (n
& 0x3F));
348 /* Make sure the shortest possible encoding was used. */
349 if (c
<= 0x7F && nb
> 1) goto invalid
;
350 if (c
<= 0x7FF && nb
> 2) goto invalid
;
351 if (c
<= 0xFFFF && nb
> 3) goto invalid
;
352 if (c
<= 0x1FFFFF && nb
> 4) goto invalid
;
353 if (c
<= 0x3FFFFFF && nb
> 5) goto invalid
;
355 /* Make sure the character is valid. */
356 if (c
> 0x7FFFFFFF || (c
>= 0xD800 && c
<= 0xDFFF))
362 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
, "Invalid UTF-8 encoding");
363 return (gfc_char4_t
) '?';
368 read_utf8_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
375 len
= (width
< len
) ? len
: width
;
379 /* Proceed with decoding one character at a time. */
380 for (j
= 0; j
< len
; j
++, dest
++)
382 c
= read_utf8 (dtp
, &nbytes
);
384 /* Check for a short read and if so, break out. */
388 *dest
= c
> 255 ? '?' : (uchar
) c
;
391 /* If there was a short read, pad the remaining characters. */
392 for (i
= j
; i
< len
; i
++)
398 read_default_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
403 s
= read_block_form (dtp
, &width
);
410 m
= (width
> len
) ? len
: width
;
415 memset (p
+ m
, ' ', n
);
420 read_utf8_char4 (st_parameter_dt
*dtp
, void *p
, int len
, int width
)
426 len
= (width
< len
) ? len
: width
;
428 dest
= (gfc_char4_t
*) p
;
430 /* Proceed with decoding one character at a time. */
431 for (j
= 0; j
< len
; j
++, dest
++)
433 *dest
= read_utf8 (dtp
, &nbytes
);
435 /* Check for a short read and if so, break out. */
440 /* If there was a short read, pad the remaining characters. */
441 for (i
= j
; i
< len
; i
++)
442 *dest
++ = (gfc_char4_t
) ' ';
448 read_default_char4 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
453 if (is_char4_unit(dtp
))
457 s4
= (gfc_char4_t
*) read_block_form4 (dtp
, &width
);
464 m
= ((int) width
> len
) ? len
: (int) width
;
466 dest
= (gfc_char4_t
*) p
;
468 for (n
= 0; n
< m
; n
++)
471 for (n
= 0; n
< len
- (int) width
; n
++)
472 *dest
++ = (gfc_char4_t
) ' ';
478 s
= read_block_form (dtp
, &width
);
485 m
= ((int) width
> len
) ? len
: (int) width
;
487 dest
= (gfc_char4_t
*) p
;
489 for (n
= 0; n
< m
; n
++, dest
++, s
++)
490 *dest
= (unsigned char ) *s
;
492 for (n
= 0; n
< len
- (int) width
; n
++, dest
++)
493 *dest
= (unsigned char) ' ';
498 /* read_a()-- Read a character record into a KIND=1 character destination,
499 processing UTF-8 encoding if necessary. */
502 read_a (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
508 if (wi
== -1) /* '(A)' edit descriptor */
512 /* Read in w characters, treating comma as not a separator. */
513 dtp
->u
.p
.sf_read_comma
= 0;
515 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
516 read_utf8_char1 (dtp
, p
, length
, w
);
518 read_default_char1 (dtp
, p
, length
, w
);
520 dtp
->u
.p
.sf_read_comma
=
521 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
525 /* read_a_char4()-- Read a character record into a KIND=4 character destination,
526 processing UTF-8 encoding if necessary. */
529 read_a_char4 (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
534 if (w
== -1) /* '(A)' edit descriptor */
537 /* Read in w characters, treating comma as not a separator. */
538 dtp
->u
.p
.sf_read_comma
= 0;
540 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
541 read_utf8_char4 (dtp
, p
, length
, w
);
543 read_default_char4 (dtp
, p
, length
, w
);
545 dtp
->u
.p
.sf_read_comma
=
546 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
549 /* eat_leading_spaces()-- Given a character pointer and a width,
550 * ignore the leading spaces. */
553 eat_leading_spaces (int *width
, char *p
)
557 if (*width
== 0 || *p
!= ' ')
569 next_char (st_parameter_dt
*dtp
, char **p
, int *w
)
584 if (dtp
->u
.p
.blank_status
!= BLANK_UNSPECIFIED
)
585 return ' '; /* return a blank to signal a null */
587 /* At this point, the rest of the field has to be trailing blanks */
601 /* read_decimal()-- Read a decimal integer value. The values here are
605 read_decimal (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
607 GFC_UINTEGER_LARGEST value
, maxv
, maxv_10
;
608 GFC_INTEGER_LARGEST v
;
614 p
= read_block_form (dtp
, &w
);
619 p
= eat_leading_spaces (&w
, p
);
622 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
644 maxv
= si_max (length
);
649 /* At this point we have a digit-string */
654 c
= next_char (dtp
, &p
, &w
);
660 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
661 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
664 if (c
< '0' || c
> '9')
673 if (value
> maxv
- c
)
683 set_integer (dest
, v
, length
);
687 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
688 "Bad value during integer read");
689 next_record (dtp
, 1);
693 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
694 "Value overflowed during integer read");
695 next_record (dtp
, 1);
700 /* read_radix()-- This function reads values for non-decimal radixes.
701 * The difference here is that we treat the values here as unsigned
702 * values for the purposes of overflow. If minus sign is present and
703 * the top bit is set, the value will be incorrect. */
706 read_radix (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
,
709 GFC_UINTEGER_LARGEST value
, maxv
, maxv_r
;
710 GFC_INTEGER_LARGEST v
;
716 p
= read_block_form (dtp
, &w
);
721 p
= eat_leading_spaces (&w
, p
);
724 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
728 /* Maximum unsigned value, assuming two's complement. */
729 maxv
= 2 * si_max (length
) + 1;
730 maxv_r
= maxv
/ radix
;
751 /* At this point we have a digit-string */
756 c
= next_char (dtp
, &p
, &w
);
761 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
762 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
768 if (c
< '0' || c
> '1')
773 if (c
< '0' || c
> '7')
798 c
= c
- 'a' + '9' + 1;
807 c
= c
- 'A' + '9' + 1;
821 value
= radix
* value
;
823 if (maxv
- c
< value
)
832 set_integer (dest
, v
, length
);
836 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
837 "Bad value during integer read");
838 next_record (dtp
, 1);
842 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
843 "Value overflowed during integer read");
844 next_record (dtp
, 1);
849 /* read_f()-- Read a floating point number with F-style editing, which
850 is what all of the other floating point descriptors behave as. The
851 tricky part is that optional spaces are allowed after an E or D,
852 and the implicit decimal point if a decimal point is not present in
856 read_f (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
858 int w
, seen_dp
, exponent
;
863 int seen_int_digit
; /* Seen a digit before the decimal point? */
864 int seen_dec_digit
; /* Seen a digit after the decimal point? */
873 /* Read in the next block. */
874 p
= read_block_form (dtp
, &w
);
877 p
= eat_leading_spaces (&w
, (char*) p
);
881 /* In this buffer we're going to re-format the number cleanly to be parsed
882 by convert_real in the end; this assures we're using strtod from the
883 C library for parsing and thus probably get the best accuracy possible.
884 This process may add a '+0.0' in front of the number as well as change the
885 exponent because of an implicit decimal point or the like. Thus allocating
886 strlen ("+0.0e-1000") == 10 characters plus one for NUL more than the
887 original buffer had should be enough. */
888 buffer
= gfc_alloca (w
+ 11);
892 if (*p
== '-' || *p
== '+')
900 p
= eat_leading_spaces (&w
, (char*) p
);
904 /* Check for Infinity or NaN. */
905 if (unlikely ((w
>= 3 && (*p
== 'i' || *p
== 'I' || *p
== 'n' || *p
== 'N'))))
910 /* Scan through the buffer keeping track of spaces and parenthesis. We
911 null terminate the string as soon as we see a left paren or if we are
912 BLANK_NULL mode. Leading spaces have already been skipped above,
913 trailing spaces are ignored by converting to '\0'. A space
914 between "NaN" and the optional perenthesis is not permitted. */
921 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
935 if (seen_paren
++ != 1)
949 if (seen_paren
!= 0 && seen_paren
!= 2)
952 if ((strcmp (save
, "inf") == 0) || (strcmp (save
, "infinity") == 0))
957 else if (strcmp (save
, "nan") != 0)
960 convert_infnan (dtp
, dest
, buffer
, length
);
964 /* Process the mantissa string. */
970 if (dtp
->u
.p
.current_unit
->decimal_status
!= DECIMAL_COMMA
)
983 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
988 else if (dtp
->u
.p
.blank_status
== BLANK_NULL
)
991 /* TODO: Should we check instead that there are only trailing
992 blanks here, as is done below for exponents? */
1035 /* No exponent has been seen, so we use the current scale factor. */
1036 exponent
= - dtp
->u
.p
.scale_factor
;
1039 /* At this point the start of an exponent has been found. */
1041 p
= eat_leading_spaces (&w
, (char*) p
);
1042 if (*p
== '-' || *p
== '+')
1050 /* At this point a digit string is required. We calculate the value
1051 of the exponent in order to take account of the scale factor and
1052 the d parameter before explict conversion takes place. */
1057 if (dtp
->u
.p
.blank_status
== BLANK_UNSPECIFIED
)
1059 while (w
> 0 && isdigit (*p
))
1062 exponent
+= *p
- '0';
1067 /* Only allow trailing blanks. */
1076 else /* BZ or BN status is enabled. */
1082 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
1085 assert (dtp
->u
.p
.blank_status
== BLANK_NULL
);
1087 else if (!isdigit (*p
))
1092 exponent
+= *p
- '0';
1100 exponent
*= exponent_sign
;
1103 /* Use the precision specified in the format if no decimal point has been
1106 exponent
-= f
->u
.real
.d
;
1108 /* Output a trailing '0' after decimal point if not yet found. */
1109 if (seen_dp
&& !seen_dec_digit
)
1111 /* Handle input of style "E+NN" by inserting a 0 for the
1113 else if (!seen_int_digit
&& !seen_dec_digit
)
1115 notify_std (&dtp
->common
, GFC_STD_LEGACY
,
1116 "REAL input of style 'E+NN'");
1120 /* Print out the exponent to finish the reformatted number. Maximum 4
1121 digits for the exponent. */
1130 exponent
= - exponent
;
1133 assert (exponent
< 10000);
1134 for (dig
= 3; dig
>= 0; --dig
)
1136 out
[dig
] = (char) ('0' + exponent
% 10);
1143 /* Do the actual conversion. */
1144 convert_real (dtp
, dest
, buffer
, length
);
1148 /* The value read is zero. */
1153 *((GFC_REAL_4
*) dest
) = 0.0;
1157 *((GFC_REAL_8
*) dest
) = 0.0;
1160 #ifdef HAVE_GFC_REAL_10
1162 *((GFC_REAL_10
*) dest
) = 0.0;
1166 #ifdef HAVE_GFC_REAL_16
1168 *((GFC_REAL_16
*) dest
) = 0.0;
1173 internal_error (&dtp
->common
, "Unsupported real kind during IO");
1178 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
1179 "Bad value during floating point read");
1180 next_record (dtp
, 1);
1185 /* read_x()-- Deal with the X/TR descriptor. We just read some data
1186 * and never look at it. */
1189 read_x (st_parameter_dt
*dtp
, int n
)
1193 if ((dtp
->u
.p
.current_unit
->pad_status
== PAD_NO
|| is_internal_unit (dtp
))
1194 && dtp
->u
.p
.current_unit
->bytes_left
< n
)
1195 n
= dtp
->u
.p
.current_unit
->bytes_left
;
1202 if (is_internal_unit (dtp
))
1204 mem_alloc_r (dtp
->u
.p
.current_unit
->s
, &length
);
1205 if (unlikely (length
< n
))
1210 if (dtp
->u
.p
.sf_seen_eor
)
1216 q
= fbuf_getc (dtp
->u
.p
.current_unit
);
1219 else if (q
== '\n' || q
== '\r')
1221 /* Unexpected end of line. Set the position. */
1222 dtp
->u
.p
.sf_seen_eor
= 1;
1224 /* If we see an EOR during non-advancing I/O, we need to skip
1225 the rest of the I/O statement. Set the corresponding flag. */
1226 if (dtp
->u
.p
.advance_status
== ADVANCE_NO
|| dtp
->u
.p
.seen_dollar
)
1227 dtp
->u
.p
.eor_condition
= 1;
1229 /* If we encounter a CR, it might be a CRLF. */
1230 if (q
== '\r') /* Probably a CRLF */
1232 /* See if there is an LF. */
1233 q2
= fbuf_getc (dtp
->u
.p
.current_unit
);
1235 dtp
->u
.p
.sf_seen_eor
= 2;
1236 else if (q2
!= EOF
) /* Oops, seek back. */
1237 fbuf_seek (dtp
->u
.p
.current_unit
, -1, SEEK_CUR
);
1245 if ((dtp
->common
.flags
& IOPARM_DT_HAS_SIZE
) != 0)
1246 dtp
->u
.p
.size_used
+= (GFC_IO_INT
) n
;
1247 dtp
->u
.p
.current_unit
->bytes_left
-= n
;
1248 dtp
->u
.p
.current_unit
->strm_pos
+= (gfc_offset
) n
;