Daily bump.
[gcc.git] / libgfortran / io / format.c
1 /* Copyright (C) 2002-2021 Free Software Foundation, Inc.
2 Contributed by Andy Vaught
3 F2003 I/O support contributed by Jerry DeLisle
4
5 This file is part of the GNU Fortran runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26
27 /* format.c-- parse a FORMAT string into a binary format suitable for
28 interpretation during I/O statements. */
29
30 #include "io.h"
31 #include "format.h"
32 #include <ctype.h>
33 #include <string.h>
34
35
36 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
37 NULL };
38
39 /* Error messages. */
40
41 static const char posint_required[] = "Positive integer required in format",
42 period_required[] = "Period required in format",
43 nonneg_required[] = "Nonnegative width required in format",
44 unexpected_element[] = "Unexpected element '%c' in format\n",
45 unexpected_end[] = "Unexpected end of format string",
46 bad_string[] = "Unterminated character constant in format",
47 bad_hollerith[] = "Hollerith constant extends past the end of the format",
48 reversion_error[] = "Exhausted data descriptors in format",
49 zero_width[] = "Zero width in format descriptor";
50
51 /* The following routines support caching format data from parsed format strings
52 into a hash table. This avoids repeatedly parsing duplicate format strings
53 or format strings in I/O statements that are repeated in loops. */
54
55
56 /* Traverse the table and free all data. */
57
58 void
59 free_format_hash_table (gfc_unit *u)
60 {
61 size_t i;
62
63 /* free_format_data handles any NULL pointers. */
64 for (i = 0; i < FORMAT_HASH_SIZE; i++)
65 {
66 if (u->format_hash_table[i].hashed_fmt != NULL)
67 {
68 free_format_data (u->format_hash_table[i].hashed_fmt);
69 free (u->format_hash_table[i].key);
70 }
71 u->format_hash_table[i].key = NULL;
72 u->format_hash_table[i].key_len = 0;
73 u->format_hash_table[i].hashed_fmt = NULL;
74 }
75 }
76
77 /* Traverse the format_data structure and reset the fnode counters. */
78
79 static void
80 reset_node (fnode *fn)
81 {
82 fnode *f;
83
84 fn->count = 0;
85 fn->current = NULL;
86
87 if (fn->format != FMT_LPAREN)
88 return;
89
90 for (f = fn->u.child; f; f = f->next)
91 {
92 if (f->format == FMT_RPAREN)
93 break;
94 reset_node (f);
95 }
96 }
97
98 static void
99 reset_fnode_counters (st_parameter_dt *dtp)
100 {
101 fnode *f;
102 format_data *fmt;
103
104 fmt = dtp->u.p.fmt;
105
106 /* Clear this pointer at the head so things start at the right place. */
107 fmt->array.array[0].current = NULL;
108
109 for (f = fmt->array.array[0].u.child; f; f = f->next)
110 reset_node (f);
111 }
112
113
114 /* A simple hashing function to generate an index into the hash table. */
115
116 static uint32_t
117 format_hash (st_parameter_dt *dtp)
118 {
119 char *key;
120 gfc_charlen_type key_len;
121 uint32_t hash = 0;
122 gfc_charlen_type i;
123
124 /* Hash the format string. Super simple, but what the heck! */
125 key = dtp->format;
126 key_len = dtp->format_len;
127 for (i = 0; i < key_len; i++)
128 hash ^= key[i];
129 hash &= (FORMAT_HASH_SIZE - 1);
130 return hash;
131 }
132
133
134 static void
135 save_parsed_format (st_parameter_dt *dtp)
136 {
137 uint32_t hash;
138 gfc_unit *u;
139
140 hash = format_hash (dtp);
141 u = dtp->u.p.current_unit;
142
143 /* Index into the hash table. We are simply replacing whatever is there
144 relying on probability. */
145 if (u->format_hash_table[hash].hashed_fmt != NULL)
146 free_format_data (u->format_hash_table[hash].hashed_fmt);
147 u->format_hash_table[hash].hashed_fmt = NULL;
148
149 free (u->format_hash_table[hash].key);
150 u->format_hash_table[hash].key = dtp->format;
151
152 u->format_hash_table[hash].key_len = dtp->format_len;
153 u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
154 }
155
156
157 static format_data *
158 find_parsed_format (st_parameter_dt *dtp)
159 {
160 uint32_t hash;
161 gfc_unit *u;
162
163 hash = format_hash (dtp);
164 u = dtp->u.p.current_unit;
165
166 if (u->format_hash_table[hash].key != NULL)
167 {
168 /* See if it matches. */
169 if (u->format_hash_table[hash].key_len == dtp->format_len)
170 {
171 /* So far so good. */
172 if (strncmp (u->format_hash_table[hash].key,
173 dtp->format, dtp->format_len) == 0)
174 return u->format_hash_table[hash].hashed_fmt;
175 }
176 }
177 return NULL;
178 }
179
180
181 /* next_char()-- Return the next character in the format string.
182 Returns -1 when the string is done. If the literal flag is set,
183 spaces are significant, otherwise they are not. */
184
185 static int
186 next_char (format_data *fmt, int literal)
187 {
188 int c;
189
190 do
191 {
192 if (fmt->format_string_len == 0)
193 return -1;
194
195 fmt->format_string_len--;
196 c = toupper (*fmt->format_string++);
197 fmt->error_element = c;
198 }
199 while ((c == ' ' || c == '\t') && !literal);
200
201 return c;
202 }
203
204
205 /* unget_char()-- Back up one character position. */
206
207 #define unget_char(fmt) \
208 { fmt->format_string--; fmt->format_string_len++; }
209
210
211 /* get_fnode()-- Allocate a new format node, inserting it into the
212 current singly linked list. These are initially allocated from the
213 static buffer. */
214
215 static fnode *
216 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
217 {
218 fnode *f;
219
220 if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
221 {
222 fmt->last->next = xmalloc (sizeof (fnode_array));
223 fmt->last = fmt->last->next;
224 fmt->last->next = NULL;
225 fmt->avail = &fmt->last->array[0];
226 }
227 f = fmt->avail++;
228 memset (f, '\0', sizeof (fnode));
229
230 if (*head == NULL)
231 *head = *tail = f;
232 else
233 {
234 (*tail)->next = f;
235 *tail = f;
236 }
237
238 f->format = t;
239 f->repeat = -1;
240 f->source = fmt->format_string;
241 return f;
242 }
243
244
245 /* free_format()-- Free allocated format string. */
246 void
247 free_format (st_parameter_dt *dtp)
248 {
249 if ((dtp->common.flags & IOPARM_DT_HAS_FORMAT) && dtp->format)
250 {
251 free (dtp->format);
252 dtp->format = NULL;
253 }
254 }
255
256
257 /* free_format_data()-- Free all allocated format data. */
258
259 void
260 free_format_data (format_data *fmt)
261 {
262 fnode_array *fa, *fa_next;
263 fnode *fnp;
264
265 if (fmt == NULL)
266 return;
267
268 /* Free vlist descriptors in the fnode_array if one was allocated. */
269 for (fnp = fmt->array.array; fnp < &fmt->array.array[FARRAY_SIZE] &&
270 fnp->format != FMT_NONE; fnp++)
271 if (fnp->format == FMT_DT)
272 {
273 if (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist))
274 free (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist));
275 free (fnp->u.udf.vlist);
276 }
277
278 for (fa = fmt->array.next; fa; fa = fa_next)
279 {
280 fa_next = fa->next;
281 free (fa);
282 }
283
284 free (fmt);
285 fmt = NULL;
286 }
287
288
289 /* format_lex()-- Simple lexical analyzer for getting the next token
290 in a FORMAT string. We support a one-level token pushback in the
291 fmt->saved_token variable. */
292
293 static format_token
294 format_lex (format_data *fmt)
295 {
296 format_token token;
297 int negative_flag;
298 int c;
299 char delim;
300
301 if (fmt->saved_token != FMT_NONE)
302 {
303 token = fmt->saved_token;
304 fmt->saved_token = FMT_NONE;
305 return token;
306 }
307
308 negative_flag = 0;
309 c = next_char (fmt, 0);
310
311 switch (c)
312 {
313 case '*':
314 token = FMT_STAR;
315 break;
316
317 case '(':
318 token = FMT_LPAREN;
319 break;
320
321 case ')':
322 token = FMT_RPAREN;
323 break;
324
325 case '-':
326 negative_flag = 1;
327 /* Fall Through */
328
329 case '+':
330 c = next_char (fmt, 0);
331 if (!isdigit (c))
332 {
333 token = FMT_UNKNOWN;
334 break;
335 }
336
337 fmt->value = c - '0';
338
339 for (;;)
340 {
341 c = next_char (fmt, 0);
342 if (!isdigit (c))
343 break;
344
345 fmt->value = 10 * fmt->value + c - '0';
346 }
347
348 unget_char (fmt);
349
350 if (negative_flag)
351 fmt->value = -fmt->value;
352 token = FMT_SIGNED_INT;
353 break;
354
355 case '0':
356 case '1':
357 case '2':
358 case '3':
359 case '4':
360 case '5':
361 case '6':
362 case '7':
363 case '8':
364 case '9':
365 fmt->value = c - '0';
366
367 for (;;)
368 {
369 c = next_char (fmt, 0);
370 if (!isdigit (c))
371 break;
372
373 fmt->value = 10 * fmt->value + c - '0';
374 }
375
376 unget_char (fmt);
377 token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
378 break;
379
380 case '.':
381 token = FMT_PERIOD;
382 break;
383
384 case ',':
385 token = FMT_COMMA;
386 break;
387
388 case ':':
389 token = FMT_COLON;
390 break;
391
392 case '/':
393 token = FMT_SLASH;
394 break;
395
396 case '$':
397 token = FMT_DOLLAR;
398 break;
399
400 case 'T':
401 switch (next_char (fmt, 0))
402 {
403 case 'L':
404 token = FMT_TL;
405 break;
406 case 'R':
407 token = FMT_TR;
408 break;
409 default:
410 token = FMT_T;
411 unget_char (fmt);
412 break;
413 }
414
415 break;
416
417 case 'X':
418 token = FMT_X;
419 break;
420
421 case 'S':
422 switch (next_char (fmt, 0))
423 {
424 case 'S':
425 token = FMT_SS;
426 break;
427 case 'P':
428 token = FMT_SP;
429 break;
430 default:
431 token = FMT_S;
432 unget_char (fmt);
433 break;
434 }
435
436 break;
437
438 case 'B':
439 switch (next_char (fmt, 0))
440 {
441 case 'N':
442 token = FMT_BN;
443 break;
444 case 'Z':
445 token = FMT_BZ;
446 break;
447 default:
448 token = FMT_B;
449 unget_char (fmt);
450 break;
451 }
452
453 break;
454
455 case '\'':
456 case '"':
457 delim = c;
458
459 fmt->string = fmt->format_string;
460 fmt->value = 0; /* This is the length of the string */
461
462 for (;;)
463 {
464 c = next_char (fmt, 1);
465 if (c == -1)
466 {
467 token = FMT_BADSTRING;
468 fmt->error = bad_string;
469 break;
470 }
471
472 if (c == delim)
473 {
474 c = next_char (fmt, 1);
475
476 if (c == -1)
477 {
478 token = FMT_BADSTRING;
479 fmt->error = bad_string;
480 break;
481 }
482
483 if (c != delim)
484 {
485 unget_char (fmt);
486 token = FMT_STRING;
487 break;
488 }
489 }
490
491 fmt->value++;
492 }
493
494 break;
495
496 case 'P':
497 token = FMT_P;
498 break;
499
500 case 'I':
501 token = FMT_I;
502 break;
503
504 case 'O':
505 token = FMT_O;
506 break;
507
508 case 'Z':
509 token = FMT_Z;
510 break;
511
512 case 'F':
513 token = FMT_F;
514 break;
515
516 case 'E':
517 switch (next_char (fmt, 0))
518 {
519 case 'N':
520 token = FMT_EN;
521 break;
522 case 'S':
523 token = FMT_ES;
524 break;
525 default:
526 token = FMT_E;
527 unget_char (fmt);
528 break;
529 }
530 break;
531
532 case 'G':
533 token = FMT_G;
534 break;
535
536 case 'H':
537 token = FMT_H;
538 break;
539
540 case 'L':
541 token = FMT_L;
542 break;
543
544 case 'A':
545 token = FMT_A;
546 break;
547
548 case 'D':
549 switch (next_char (fmt, 0))
550 {
551 case 'P':
552 token = FMT_DP;
553 break;
554 case 'C':
555 token = FMT_DC;
556 break;
557 case 'T':
558 token = FMT_DT;
559 break;
560 default:
561 token = FMT_D;
562 unget_char (fmt);
563 break;
564 }
565 break;
566
567 case 'R':
568 switch (next_char (fmt, 0))
569 {
570 case 'C':
571 token = FMT_RC;
572 break;
573 case 'D':
574 token = FMT_RD;
575 break;
576 case 'N':
577 token = FMT_RN;
578 break;
579 case 'P':
580 token = FMT_RP;
581 break;
582 case 'U':
583 token = FMT_RU;
584 break;
585 case 'Z':
586 token = FMT_RZ;
587 break;
588 default:
589 unget_char (fmt);
590 token = FMT_UNKNOWN;
591 break;
592 }
593 break;
594
595 case -1:
596 token = FMT_END;
597 break;
598
599 default:
600 token = FMT_UNKNOWN;
601 break;
602 }
603
604 return token;
605 }
606
607
608 /* parse_format_list()-- Parse a format list. Assumes that a left
609 paren has already been seen. Returns a list representing the
610 parenthesis node which contains the rest of the list. */
611
612 static fnode *
613 parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
614 {
615 fnode *head, *tail;
616 format_token t, u, t2;
617 int repeat;
618 format_data *fmt = dtp->u.p.fmt;
619 bool seen_data_desc = false;
620 int standard;
621
622 head = tail = NULL;
623
624 /* Get the next format item */
625 format_item:
626 t = format_lex (fmt);
627 format_item_1:
628 switch (t)
629 {
630 case FMT_STAR:
631 t = format_lex (fmt);
632 if (t != FMT_LPAREN)
633 {
634 fmt->error = "Left parenthesis required after '*'";
635 goto finished;
636 }
637 get_fnode (fmt, &head, &tail, FMT_LPAREN);
638 tail->repeat = -2; /* Signifies unlimited format. */
639 tail->u.child = parse_format_list (dtp, &seen_data_desc);
640 *seen_dd = seen_data_desc;
641 if (fmt->error != NULL)
642 goto finished;
643 if (!seen_data_desc)
644 {
645 fmt->error = "'*' requires at least one associated data descriptor";
646 goto finished;
647 }
648 goto between_desc;
649
650 case FMT_POSINT:
651 repeat = fmt->value;
652
653 t = format_lex (fmt);
654 switch (t)
655 {
656 case FMT_LPAREN:
657 get_fnode (fmt, &head, &tail, FMT_LPAREN);
658 tail->repeat = repeat;
659 tail->u.child = parse_format_list (dtp, &seen_data_desc);
660 *seen_dd = seen_data_desc;
661 if (fmt->error != NULL)
662 goto finished;
663
664 goto between_desc;
665
666 case FMT_SLASH:
667 get_fnode (fmt, &head, &tail, FMT_SLASH);
668 tail->repeat = repeat;
669 goto optional_comma;
670
671 case FMT_X:
672 get_fnode (fmt, &head, &tail, FMT_X);
673 tail->repeat = 1;
674 tail->u.k = fmt->value;
675 goto between_desc;
676
677 case FMT_P:
678 goto p_descriptor;
679
680 default:
681 goto data_desc;
682 }
683
684 case FMT_LPAREN:
685 get_fnode (fmt, &head, &tail, FMT_LPAREN);
686 tail->repeat = 1;
687 tail->u.child = parse_format_list (dtp, &seen_data_desc);
688 *seen_dd = seen_data_desc;
689 if (fmt->error != NULL)
690 goto finished;
691
692 goto between_desc;
693
694 case FMT_SIGNED_INT: /* Signed integer can only precede a P format. */
695 case FMT_ZERO: /* Same for zero. */
696 t = format_lex (fmt);
697 if (t != FMT_P)
698 {
699 fmt->error = "Expected P edit descriptor in format";
700 goto finished;
701 }
702
703 p_descriptor:
704 get_fnode (fmt, &head, &tail, FMT_P);
705 tail->u.k = fmt->value;
706 tail->repeat = 1;
707
708 t = format_lex (fmt);
709 if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
710 || t == FMT_G || t == FMT_E)
711 {
712 repeat = 1;
713 goto data_desc;
714 }
715
716 if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH
717 && t != FMT_POSINT)
718 {
719 fmt->error = "Comma required after P descriptor";
720 goto finished;
721 }
722
723 fmt->saved_token = t;
724 goto optional_comma;
725
726 case FMT_P: /* P and X require a prior number */
727 fmt->error = "P descriptor requires leading scale factor";
728 goto finished;
729
730 case FMT_X:
731 /*
732 EXTENSION!
733
734 If we would be pedantic in the library, we would have to reject
735 an X descriptor without an integer prefix:
736
737 fmt->error = "X descriptor requires leading space count";
738 goto finished;
739
740 However, this is an extension supported by many Fortran compilers,
741 including Cray, HP, AIX, and IRIX. Therefore, we allow it in the
742 runtime library, and make the front end reject it if the compiler
743 is in pedantic mode. The interpretation of 'X' is '1X'.
744 */
745 get_fnode (fmt, &head, &tail, FMT_X);
746 tail->repeat = 1;
747 tail->u.k = 1;
748 goto between_desc;
749
750 case FMT_STRING:
751 get_fnode (fmt, &head, &tail, FMT_STRING);
752 tail->u.string.p = fmt->string;
753 tail->u.string.length = fmt->value;
754 tail->repeat = 1;
755 goto optional_comma;
756
757 case FMT_RC:
758 case FMT_RD:
759 case FMT_RN:
760 case FMT_RP:
761 case FMT_RU:
762 case FMT_RZ:
763 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
764 "descriptor not allowed");
765 get_fnode (fmt, &head, &tail, t);
766 tail->repeat = 1;
767 goto between_desc;
768
769 case FMT_DC:
770 case FMT_DP:
771 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
772 "descriptor not allowed");
773 /* Fall through. */
774 case FMT_S:
775 case FMT_SS:
776 case FMT_SP:
777 case FMT_BN:
778 case FMT_BZ:
779 get_fnode (fmt, &head, &tail, t);
780 tail->repeat = 1;
781 goto between_desc;
782
783 case FMT_COLON:
784 get_fnode (fmt, &head, &tail, FMT_COLON);
785 tail->repeat = 1;
786 goto optional_comma;
787
788 case FMT_SLASH:
789 get_fnode (fmt, &head, &tail, FMT_SLASH);
790 tail->repeat = 1;
791 tail->u.r = 1;
792 goto optional_comma;
793
794 case FMT_DOLLAR:
795 get_fnode (fmt, &head, &tail, FMT_DOLLAR);
796 tail->repeat = 1;
797 notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
798 goto between_desc;
799
800 case FMT_T:
801 case FMT_TL:
802 case FMT_TR:
803 t2 = format_lex (fmt);
804 if (t2 != FMT_POSINT)
805 {
806 fmt->error = posint_required;
807 goto finished;
808 }
809 get_fnode (fmt, &head, &tail, t);
810 tail->u.n = fmt->value;
811 tail->repeat = 1;
812 goto between_desc;
813
814 case FMT_I:
815 case FMT_B:
816 case FMT_O:
817 case FMT_Z:
818 case FMT_E:
819 case FMT_EN:
820 case FMT_ES:
821 case FMT_D:
822 case FMT_DT:
823 case FMT_L:
824 case FMT_A:
825 case FMT_F:
826 case FMT_G:
827 repeat = 1;
828 *seen_dd = true;
829 goto data_desc;
830
831 case FMT_H:
832 get_fnode (fmt, &head, &tail, FMT_STRING);
833 if (fmt->format_string_len < 1)
834 {
835 fmt->error = bad_hollerith;
836 goto finished;
837 }
838
839 tail->u.string.p = fmt->format_string;
840 tail->u.string.length = 1;
841 tail->repeat = 1;
842
843 fmt->format_string++;
844 fmt->format_string_len--;
845
846 goto between_desc;
847
848 case FMT_END:
849 fmt->error = unexpected_end;
850 goto finished;
851
852 case FMT_BADSTRING:
853 goto finished;
854
855 case FMT_RPAREN:
856 goto finished;
857
858 default:
859 fmt->error = unexpected_element;
860 goto finished;
861 }
862
863 /* In this state, t must currently be a data descriptor. Deal with
864 things that can/must follow the descriptor */
865 data_desc:
866
867 switch (t)
868 {
869 case FMT_L:
870 *seen_dd = true;
871 t = format_lex (fmt);
872 if (t != FMT_POSINT)
873 {
874 if (t == FMT_ZERO)
875 {
876 if (notification_std(GFC_STD_GNU) == NOTIFICATION_ERROR)
877 {
878 fmt->error = "Extension: Zero width after L descriptor";
879 goto finished;
880 }
881 else
882 notify_std (&dtp->common, GFC_STD_GNU,
883 "Zero width after L descriptor");
884 }
885 else
886 {
887 fmt->saved_token = t;
888 notify_std (&dtp->common, GFC_STD_GNU,
889 "Positive width required with L descriptor");
890 }
891 fmt->value = 1; /* Default width */
892 }
893 get_fnode (fmt, &head, &tail, FMT_L);
894 tail->u.n = fmt->value;
895 tail->repeat = repeat;
896 break;
897
898 case FMT_A:
899 *seen_dd = true;
900 t = format_lex (fmt);
901 if (t == FMT_ZERO)
902 {
903 fmt->error = zero_width;
904 goto finished;
905 }
906
907 if (t != FMT_POSINT)
908 {
909 fmt->saved_token = t;
910 fmt->value = -1; /* Width not present */
911 }
912
913 get_fnode (fmt, &head, &tail, FMT_A);
914 tail->repeat = repeat;
915 tail->u.n = fmt->value;
916 break;
917
918 case FMT_D:
919 case FMT_E:
920 case FMT_F:
921 case FMT_G:
922 case FMT_EN:
923 case FMT_ES:
924 *seen_dd = true;
925 get_fnode (fmt, &head, &tail, t);
926 tail->repeat = repeat;
927
928 u = format_lex (fmt);
929
930 /* Processing for zero width formats. */
931 if (u == FMT_ZERO)
932 {
933 if (t == FMT_F)
934 standard = GFC_STD_F95;
935 else if (t == FMT_G)
936 standard = GFC_STD_F2008;
937 else
938 standard = GFC_STD_F2018;
939
940 if (notification_std (standard) == NOTIFICATION_ERROR
941 || dtp->u.p.mode == READING)
942 {
943 fmt->error = zero_width;
944 goto finished;
945 }
946 tail->u.real.w = 0;
947
948 /* Look for the dot seperator. */
949 u = format_lex (fmt);
950 if (u != FMT_PERIOD)
951 {
952 fmt->saved_token = u;
953 break;
954 }
955
956 /* Look for the precision. */
957 u = format_lex (fmt);
958 if (u != FMT_ZERO && u != FMT_POSINT)
959 {
960 fmt->error = nonneg_required;
961 goto finished;
962 }
963 tail->u.real.d = fmt->value;
964
965 /* Look for optional exponent, not allowed for FMT_D */
966 if (t == FMT_D)
967 break;
968 u = format_lex (fmt);
969 if (u != FMT_E)
970 fmt->saved_token = u;
971 else
972 {
973 u = format_lex (fmt);
974 if (u != FMT_POSINT)
975 {
976 if (u == FMT_ZERO)
977 {
978 notify_std (&dtp->common, GFC_STD_F2018,
979 "Positive exponent width required");
980 }
981 else
982 {
983 fmt->error = "Positive exponent width required in "
984 "format string at %L";
985 goto finished;
986 }
987 }
988 tail->u.real.e = fmt->value;
989 }
990 break;
991 }
992
993 /* Processing for positive width formats. */
994 if (u == FMT_POSINT)
995 {
996 tail->u.real.w = fmt->value;
997
998 /* Look for the dot separator. Because of legacy behaviors
999 we do some look ahead for missing things. */
1000 t2 = t;
1001 t = format_lex (fmt);
1002 if (t != FMT_PERIOD)
1003 {
1004 /* We treat a missing decimal descriptor as 0. Note: This is only
1005 allowed if -std=legacy, otherwise an error occurs. */
1006 if (compile_options.warn_std != 0)
1007 {
1008 fmt->error = period_required;
1009 goto finished;
1010 }
1011 fmt->saved_token = t;
1012 tail->u.real.d = 0;
1013 tail->u.real.e = -1;
1014 break;
1015 }
1016
1017 /* If we made it here, we should have the dot so look for the
1018 precision. */
1019 t = format_lex (fmt);
1020 if (t != FMT_ZERO && t != FMT_POSINT)
1021 {
1022 fmt->error = nonneg_required;
1023 goto finished;
1024 }
1025 tail->u.real.d = fmt->value;
1026 tail->u.real.e = -1;
1027
1028 /* Done with D and F formats. */
1029 if (t2 == FMT_D || t2 == FMT_F)
1030 {
1031 *seen_dd = true;
1032 break;
1033 }
1034
1035 /* Look for optional exponent */
1036 u = format_lex (fmt);
1037 if (u != FMT_E)
1038 fmt->saved_token = u;
1039 else
1040 {
1041 u = format_lex (fmt);
1042 if (u != FMT_POSINT)
1043 {
1044 if (u == FMT_ZERO)
1045 {
1046 notify_std (&dtp->common, GFC_STD_F2018,
1047 "Positive exponent width required");
1048 }
1049 else
1050 {
1051 fmt->error = "Positive exponent width required in "
1052 "format string at %L";
1053 goto finished;
1054 }
1055 }
1056 tail->u.real.e = fmt->value;
1057 }
1058 break;
1059 }
1060
1061 /* Old DEC codes may not have width or precision specified. */
1062 if (dtp->u.p.mode == WRITING && (dtp->common.flags & IOPARM_DT_DEC_EXT))
1063 {
1064 tail->u.real.w = DEFAULT_WIDTH;
1065 tail->u.real.d = 0;
1066 tail->u.real.e = -1;
1067 fmt->saved_token = u;
1068 }
1069 break;
1070
1071 case FMT_DT:
1072 *seen_dd = true;
1073 get_fnode (fmt, &head, &tail, t);
1074 tail->repeat = repeat;
1075
1076 t = format_lex (fmt);
1077
1078 /* Initialize the vlist to a zero size, rank-one array. */
1079 tail->u.udf.vlist= xmalloc (sizeof(gfc_array_i4)
1080 + sizeof (descriptor_dimension));
1081 GFC_DESCRIPTOR_DATA(tail->u.udf.vlist) = NULL;
1082 GFC_DIMENSION_SET(tail->u.udf.vlist->dim[0],1, 0, 0);
1083
1084 if (t == FMT_STRING)
1085 {
1086 /* Get pointer to the optional format string. */
1087 tail->u.udf.string = fmt->string;
1088 tail->u.udf.string_len = fmt->value;
1089 t = format_lex (fmt);
1090 }
1091 if (t == FMT_LPAREN)
1092 {
1093 /* Temporary buffer to hold the vlist values. */
1094 GFC_INTEGER_4 temp[FARRAY_SIZE];
1095 int i = 0;
1096 loop:
1097 t = format_lex (fmt);
1098 if (t != FMT_POSINT)
1099 {
1100 fmt->error = posint_required;
1101 goto finished;
1102 }
1103 /* Save the positive integer value. */
1104 temp[i++] = fmt->value;
1105 t = format_lex (fmt);
1106 if (t == FMT_COMMA)
1107 goto loop;
1108 if (t == FMT_RPAREN)
1109 {
1110 /* We have parsed the complete vlist so initialize the
1111 array descriptor and save it in the format node. */
1112 gfc_full_array_i4 *vp = tail->u.udf.vlist;
1113 GFC_DESCRIPTOR_DATA(vp) = xmalloc (i * sizeof(GFC_INTEGER_4));
1114 GFC_DIMENSION_SET(vp->dim[0],1, i, 1);
1115 memcpy (GFC_DESCRIPTOR_DATA(vp), temp, i * sizeof(GFC_INTEGER_4));
1116 break;
1117 }
1118 fmt->error = unexpected_element;
1119 goto finished;
1120 }
1121 fmt->saved_token = t;
1122 break;
1123 case FMT_H:
1124 if (repeat > fmt->format_string_len)
1125 {
1126 fmt->error = bad_hollerith;
1127 goto finished;
1128 }
1129
1130 get_fnode (fmt, &head, &tail, FMT_STRING);
1131 tail->u.string.p = fmt->format_string;
1132 tail->u.string.length = repeat;
1133 tail->repeat = 1;
1134
1135 fmt->format_string += fmt->value;
1136 fmt->format_string_len -= repeat;
1137
1138 break;
1139
1140 case FMT_I:
1141 case FMT_B:
1142 case FMT_O:
1143 case FMT_Z:
1144 *seen_dd = true;
1145 get_fnode (fmt, &head, &tail, t);
1146 tail->repeat = repeat;
1147
1148 t = format_lex (fmt);
1149
1150 if (dtp->u.p.mode == READING)
1151 {
1152 if (t != FMT_POSINT)
1153 {
1154 if (dtp->common.flags & IOPARM_DT_DEC_EXT)
1155 {
1156 tail->u.integer.w = DEFAULT_WIDTH;
1157 tail->u.integer.m = -1;
1158 fmt->saved_token = t;
1159 break;
1160 }
1161 fmt->error = posint_required;
1162 goto finished;
1163 }
1164 }
1165 else
1166 {
1167 if (t != FMT_ZERO && t != FMT_POSINT)
1168 {
1169 if (dtp->common.flags & IOPARM_DT_DEC_EXT)
1170 {
1171 tail->u.integer.w = DEFAULT_WIDTH;
1172 tail->u.integer.m = -1;
1173 fmt->saved_token = t;
1174 break;
1175 }
1176 fmt->error = nonneg_required;
1177 goto finished;
1178 }
1179 }
1180
1181 tail->u.integer.w = fmt->value;
1182 tail->u.integer.m = -1;
1183
1184 t = format_lex (fmt);
1185 if (t != FMT_PERIOD)
1186 {
1187 fmt->saved_token = t;
1188 }
1189 else
1190 {
1191 t = format_lex (fmt);
1192 if (t != FMT_ZERO && t != FMT_POSINT)
1193 {
1194 fmt->error = nonneg_required;
1195 goto finished;
1196 }
1197
1198 tail->u.integer.m = fmt->value;
1199 }
1200
1201 if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1202 {
1203 fmt->error = "Minimum digits exceeds field width";
1204 goto finished;
1205 }
1206
1207 break;
1208
1209 default:
1210 fmt->error = unexpected_element;
1211 goto finished;
1212 }
1213
1214 /* Between a descriptor and what comes next */
1215 between_desc:
1216 t = format_lex (fmt);
1217 switch (t)
1218 {
1219 case FMT_COMMA:
1220 goto format_item;
1221
1222 case FMT_RPAREN:
1223 goto finished;
1224
1225 case FMT_SLASH:
1226 case FMT_COLON:
1227 get_fnode (fmt, &head, &tail, t);
1228 tail->repeat = 1;
1229 goto optional_comma;
1230
1231 case FMT_END:
1232 fmt->error = unexpected_end;
1233 goto finished;
1234
1235 default:
1236 /* Assume a missing comma, this is a GNU extension */
1237 goto format_item_1;
1238 }
1239
1240 /* Optional comma is a weird between state where we've just finished
1241 reading a colon, slash or P descriptor. */
1242 optional_comma:
1243 t = format_lex (fmt);
1244 switch (t)
1245 {
1246 case FMT_COMMA:
1247 break;
1248
1249 case FMT_RPAREN:
1250 goto finished;
1251
1252 default: /* Assume that we have another format item */
1253 fmt->saved_token = t;
1254 break;
1255 }
1256
1257 goto format_item;
1258
1259 finished:
1260
1261 return head;
1262 }
1263
1264
1265 /* format_error()-- Generate an error message for a format statement.
1266 If the node that gives the location of the error is NULL, the error
1267 is assumed to happen at parse time, and the current location of the
1268 parser is shown.
1269
1270 We generate a message showing where the problem is. We take extra
1271 care to print only the relevant part of the format if it is longer
1272 than a standard 80 column display. */
1273
1274 void
1275 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1276 {
1277 int width, i, offset;
1278 #define BUFLEN 300
1279 char *p, buffer[BUFLEN];
1280 format_data *fmt = dtp->u.p.fmt;
1281
1282 if (f != NULL)
1283 p = f->source;
1284 else /* This should not happen. */
1285 p = dtp->format;
1286
1287 if (message == unexpected_element)
1288 snprintf (buffer, BUFLEN, message, fmt->error_element);
1289 else
1290 snprintf (buffer, BUFLEN, "%s\n", message);
1291
1292 /* Get the offset into the format string where the error occurred. */
1293 offset = dtp->format_len - (fmt->reversion_ok ?
1294 (int) strlen(p) : fmt->format_string_len);
1295
1296 width = dtp->format_len;
1297
1298 if (width > 80)
1299 width = 80;
1300
1301 /* Show the format */
1302
1303 p = strchr (buffer, '\0');
1304
1305 if (dtp->format)
1306 memcpy (p, dtp->format, width);
1307
1308 p += width;
1309 *p++ = '\n';
1310
1311 /* Show where the problem is */
1312
1313 for (i = 1; i < offset; i++)
1314 *p++ = ' ';
1315
1316 *p++ = '^';
1317 *p = '\0';
1318
1319 generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1320 }
1321
1322
1323 /* revert()-- Do reversion of the format. Control reverts to the left
1324 parenthesis that matches the rightmost right parenthesis. From our
1325 tree structure, we are looking for the rightmost parenthesis node
1326 at the second level, the first level always being a single
1327 parenthesis node. If this node doesn't exit, we use the top
1328 level. */
1329
1330 static void
1331 revert (st_parameter_dt *dtp)
1332 {
1333 fnode *f, *r;
1334 format_data *fmt = dtp->u.p.fmt;
1335
1336 dtp->u.p.reversion_flag = 1;
1337
1338 r = NULL;
1339
1340 for (f = fmt->array.array[0].u.child; f; f = f->next)
1341 if (f->format == FMT_LPAREN)
1342 r = f;
1343
1344 /* If r is NULL because no node was found, the whole tree will be used */
1345
1346 fmt->array.array[0].current = r;
1347 fmt->array.array[0].count = 0;
1348 }
1349
1350 /* parse_format()-- Parse a format string. */
1351
1352 void
1353 parse_format (st_parameter_dt *dtp)
1354 {
1355 format_data *fmt;
1356 bool format_cache_ok, seen_data_desc = false;
1357
1358 /* Don't cache for internal units and set an arbitrary limit on the
1359 size of format strings we will cache. (Avoids memory issues.)
1360 Also, the format_hash_table resides in the current_unit, so
1361 child_dtio procedures would overwrite the parent table */
1362 format_cache_ok = !is_internal_unit (dtp)
1363 && (dtp->u.p.current_unit->child_dtio == 0);
1364
1365 /* Lookup format string to see if it has already been parsed. */
1366 if (format_cache_ok)
1367 {
1368 dtp->u.p.fmt = find_parsed_format (dtp);
1369
1370 if (dtp->u.p.fmt != NULL)
1371 {
1372 dtp->u.p.fmt->reversion_ok = 0;
1373 dtp->u.p.fmt->saved_token = FMT_NONE;
1374 dtp->u.p.fmt->saved_format = NULL;
1375 reset_fnode_counters (dtp);
1376 return;
1377 }
1378 }
1379
1380 /* Not found so proceed as follows. */
1381
1382 char *fmt_string = fc_strdup_notrim (dtp->format, dtp->format_len);
1383 dtp->format = fmt_string;
1384
1385 dtp->u.p.fmt = fmt = xmalloc (sizeof (format_data));
1386 fmt->format_string = dtp->format;
1387 fmt->format_string_len = dtp->format_len;
1388
1389 fmt->string = NULL;
1390 fmt->saved_token = FMT_NONE;
1391 fmt->error = NULL;
1392 fmt->value = 0;
1393
1394 /* Initialize variables used during traversal of the tree. */
1395
1396 fmt->reversion_ok = 0;
1397 fmt->saved_format = NULL;
1398
1399 /* Initialize the fnode_array. */
1400
1401 memset (&(fmt->array), 0, sizeof(fmt->array));
1402
1403 /* Allocate the first format node as the root of the tree. */
1404
1405 fmt->last = &fmt->array;
1406 fmt->last->next = NULL;
1407 fmt->avail = &fmt->array.array[0];
1408
1409 memset (fmt->avail, 0, sizeof (*fmt->avail));
1410 fmt->avail->format = FMT_LPAREN;
1411 fmt->avail->repeat = 1;
1412 fmt->avail++;
1413
1414 if (format_lex (fmt) == FMT_LPAREN)
1415 fmt->array.array[0].u.child = parse_format_list (dtp, &seen_data_desc);
1416 else
1417 fmt->error = "Missing initial left parenthesis in format";
1418
1419 if (format_cache_ok)
1420 save_parsed_format (dtp);
1421 else
1422 dtp->u.p.format_not_saved = 1;
1423
1424 if (fmt->error)
1425 format_error (dtp, NULL, fmt->error);
1426 }
1427
1428
1429 /* next_format0()-- Get the next format node without worrying about
1430 reversion. Returns NULL when we hit the end of the list.
1431 Parenthesis nodes are incremented after the list has been
1432 exhausted, other nodes are incremented before they are returned. */
1433
1434 static const fnode *
1435 next_format0 (fnode *f)
1436 {
1437 const fnode *r;
1438
1439 if (f == NULL)
1440 return NULL;
1441
1442 if (f->format != FMT_LPAREN)
1443 {
1444 f->count++;
1445 if (f->count <= f->repeat)
1446 return f;
1447
1448 f->count = 0;
1449 return NULL;
1450 }
1451
1452 /* Deal with a parenthesis node with unlimited format. */
1453
1454 if (f->repeat == -2) /* -2 signifies unlimited. */
1455 for (;;)
1456 {
1457 if (f->current == NULL)
1458 f->current = f->u.child;
1459
1460 for (; f->current != NULL; f->current = f->current->next)
1461 {
1462 r = next_format0 (f->current);
1463 if (r != NULL)
1464 return r;
1465 }
1466 }
1467
1468 /* Deal with a parenthesis node with specific repeat count. */
1469 for (; f->count < f->repeat; f->count++)
1470 {
1471 if (f->current == NULL)
1472 f->current = f->u.child;
1473
1474 for (; f->current != NULL; f->current = f->current->next)
1475 {
1476 r = next_format0 (f->current);
1477 if (r != NULL)
1478 return r;
1479 }
1480 }
1481
1482 f->count = 0;
1483 return NULL;
1484 }
1485
1486
1487 /* next_format()-- Return the next format node. If the format list
1488 ends up being exhausted, we do reversion. Reversion is only
1489 allowed if we've seen a data descriptor since the
1490 initialization or the last reversion. We return NULL if there
1491 are no more data descriptors to return (which is an error
1492 condition). */
1493
1494 const fnode *
1495 next_format (st_parameter_dt *dtp)
1496 {
1497 format_token t;
1498 const fnode *f;
1499 format_data *fmt = dtp->u.p.fmt;
1500
1501 if (fmt->saved_format != NULL)
1502 { /* Deal with a pushed-back format node */
1503 f = fmt->saved_format;
1504 fmt->saved_format = NULL;
1505 goto done;
1506 }
1507
1508 f = next_format0 (&fmt->array.array[0]);
1509 if (f == NULL)
1510 {
1511 if (!fmt->reversion_ok)
1512 return NULL;
1513
1514 fmt->reversion_ok = 0;
1515 revert (dtp);
1516
1517 f = next_format0 (&fmt->array.array[0]);
1518 if (f == NULL)
1519 {
1520 format_error (dtp, NULL, reversion_error);
1521 return NULL;
1522 }
1523
1524 /* Push the first reverted token and return a colon node in case
1525 there are no more data items. */
1526
1527 fmt->saved_format = f;
1528 return &colon_node;
1529 }
1530
1531 /* If this is a data edit descriptor, then reversion has become OK. */
1532 done:
1533 t = f->format;
1534
1535 if (!fmt->reversion_ok &&
1536 (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1537 t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1538 t == FMT_A || t == FMT_D || t == FMT_DT))
1539 fmt->reversion_ok = 1;
1540 return f;
1541 }
1542
1543
1544 /* unget_format()-- Push the given format back so that it will be
1545 returned on the next call to next_format() without affecting
1546 counts. This is necessary when we've encountered a data
1547 descriptor, but don't know what the data item is yet. The format
1548 node is pushed back, and we return control to the main program,
1549 which calls the library back with the data item (or not). */
1550
1551 void
1552 unget_format (st_parameter_dt *dtp, const fnode *f)
1553 {
1554 dtp->u.p.fmt->saved_format = f;
1555 }
1556