re PR fortran/61632 (Improve error locus on large format strings)
[gcc.git] / libgfortran / io / format.c
1 /* Copyright (C) 2002-2014 Free Software Foundation, Inc.
2 Contributed by Andy Vaught
3 F2003 I/O support contributed by Jerry DeLisle
4
5 This file is part of the GNU Fortran runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26
27 /* format.c-- parse a FORMAT string into a binary format suitable for
28 * interpretation during I/O statements */
29
30 #include "io.h"
31 #include "format.h"
32 #include <ctype.h>
33 #include <string.h>
34 #include <stdlib.h>
35
36
37 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
38 NULL };
39
40 /* Error messages. */
41
42 static const char posint_required[] = "Positive width required in format",
43 period_required[] = "Period required in format",
44 nonneg_required[] = "Nonnegative width required in format",
45 unexpected_element[] = "Unexpected element '%c' in format\n",
46 unexpected_end[] = "Unexpected end of format string",
47 bad_string[] = "Unterminated character constant in format",
48 bad_hollerith[] = "Hollerith constant extends past the end of the format",
49 reversion_error[] = "Exhausted data descriptors in format",
50 zero_width[] = "Zero width in format descriptor";
51
52 /* The following routines support caching format data from parsed format strings
53 into a hash table. This avoids repeatedly parsing duplicate format strings
54 or format strings in I/O statements that are repeated in loops. */
55
56
57 /* Traverse the table and free all data. */
58
59 void
60 free_format_hash_table (gfc_unit *u)
61 {
62 size_t i;
63
64 /* free_format_data handles any NULL pointers. */
65 for (i = 0; i < FORMAT_HASH_SIZE; i++)
66 {
67 if (u->format_hash_table[i].hashed_fmt != NULL)
68 {
69 free_format_data (u->format_hash_table[i].hashed_fmt);
70 free (u->format_hash_table[i].key);
71 }
72 u->format_hash_table[i].key = NULL;
73 u->format_hash_table[i].key_len = 0;
74 u->format_hash_table[i].hashed_fmt = NULL;
75 }
76 }
77
78 /* Traverse the format_data structure and reset the fnode counters. */
79
80 static void
81 reset_node (fnode *fn)
82 {
83 fnode *f;
84
85 fn->count = 0;
86 fn->current = NULL;
87
88 if (fn->format != FMT_LPAREN)
89 return;
90
91 for (f = fn->u.child; f; f = f->next)
92 {
93 if (f->format == FMT_RPAREN)
94 break;
95 reset_node (f);
96 }
97 }
98
99 static void
100 reset_fnode_counters (st_parameter_dt *dtp)
101 {
102 fnode *f;
103 format_data *fmt;
104
105 fmt = dtp->u.p.fmt;
106
107 /* Clear this pointer at the head so things start at the right place. */
108 fmt->array.array[0].current = NULL;
109
110 for (f = fmt->array.array[0].u.child; f; f = f->next)
111 reset_node (f);
112 }
113
114
115 /* A simple hashing function to generate an index into the hash table. */
116
117 static uint32_t
118 format_hash (st_parameter_dt *dtp)
119 {
120 char *key;
121 gfc_charlen_type key_len;
122 uint32_t hash = 0;
123 gfc_charlen_type i;
124
125 /* Hash the format string. Super simple, but what the heck! */
126 key = dtp->format;
127 key_len = dtp->format_len;
128 for (i = 0; i < key_len; i++)
129 hash ^= key[i];
130 hash &= (FORMAT_HASH_SIZE - 1);
131 return hash;
132 }
133
134
135 static void
136 save_parsed_format (st_parameter_dt *dtp)
137 {
138 uint32_t hash;
139 gfc_unit *u;
140
141 hash = format_hash (dtp);
142 u = dtp->u.p.current_unit;
143
144 /* Index into the hash table. We are simply replacing whatever is there
145 relying on probability. */
146 if (u->format_hash_table[hash].hashed_fmt != NULL)
147 free_format_data (u->format_hash_table[hash].hashed_fmt);
148 u->format_hash_table[hash].hashed_fmt = NULL;
149
150 free (u->format_hash_table[hash].key);
151 u->format_hash_table[hash].key = dtp->format;
152
153 u->format_hash_table[hash].key_len = dtp->format_len;
154 u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
155 }
156
157
158 static format_data *
159 find_parsed_format (st_parameter_dt *dtp)
160 {
161 uint32_t hash;
162 gfc_unit *u;
163
164 hash = format_hash (dtp);
165 u = dtp->u.p.current_unit;
166
167 if (u->format_hash_table[hash].key != NULL)
168 {
169 /* See if it matches. */
170 if (u->format_hash_table[hash].key_len == dtp->format_len)
171 {
172 /* So far so good. */
173 if (strncmp (u->format_hash_table[hash].key,
174 dtp->format, dtp->format_len) == 0)
175 return u->format_hash_table[hash].hashed_fmt;
176 }
177 }
178 return NULL;
179 }
180
181
182 /* next_char()-- Return the next character in the format string.
183 * Returns -1 when the string is done. If the literal flag is set,
184 * spaces are significant, otherwise they are not. */
185
186 static int
187 next_char (format_data *fmt, int literal)
188 {
189 int c;
190
191 do
192 {
193 if (fmt->format_string_len == 0)
194 return -1;
195
196 fmt->format_string_len--;
197 c = toupper (*fmt->format_string++);
198 fmt->error_element = c;
199 }
200 while ((c == ' ' || c == '\t') && !literal);
201
202 return c;
203 }
204
205
206 /* unget_char()-- Back up one character position. */
207
208 #define unget_char(fmt) \
209 { fmt->format_string--; fmt->format_string_len++; }
210
211
212 /* get_fnode()-- Allocate a new format node, inserting it into the
213 * current singly linked list. These are initially allocated from the
214 * static buffer. */
215
216 static fnode *
217 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
218 {
219 fnode *f;
220
221 if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
222 {
223 fmt->last->next = xmalloc (sizeof (fnode_array));
224 fmt->last = fmt->last->next;
225 fmt->last->next = NULL;
226 fmt->avail = &fmt->last->array[0];
227 }
228 f = fmt->avail++;
229 memset (f, '\0', sizeof (fnode));
230
231 if (*head == NULL)
232 *head = *tail = f;
233 else
234 {
235 (*tail)->next = f;
236 *tail = f;
237 }
238
239 f->format = t;
240 f->repeat = -1;
241 f->source = fmt->format_string;
242 return f;
243 }
244
245
246 /* free_format_data()-- Free all allocated format data. */
247
248 void
249 free_format_data (format_data *fmt)
250 {
251 fnode_array *fa, *fa_next;
252
253
254 if (fmt == NULL)
255 return;
256
257 for (fa = fmt->array.next; fa; fa = fa_next)
258 {
259 fa_next = fa->next;
260 free (fa);
261 }
262
263 free (fmt);
264 fmt = NULL;
265 }
266
267
268 /* format_lex()-- Simple lexical analyzer for getting the next token
269 * in a FORMAT string. We support a one-level token pushback in the
270 * fmt->saved_token variable. */
271
272 static format_token
273 format_lex (format_data *fmt)
274 {
275 format_token token;
276 int negative_flag;
277 int c;
278 char delim;
279
280 if (fmt->saved_token != FMT_NONE)
281 {
282 token = fmt->saved_token;
283 fmt->saved_token = FMT_NONE;
284 return token;
285 }
286
287 negative_flag = 0;
288 c = next_char (fmt, 0);
289
290 switch (c)
291 {
292 case '*':
293 token = FMT_STAR;
294 break;
295
296 case '(':
297 token = FMT_LPAREN;
298 break;
299
300 case ')':
301 token = FMT_RPAREN;
302 break;
303
304 case '-':
305 negative_flag = 1;
306 /* Fall Through */
307
308 case '+':
309 c = next_char (fmt, 0);
310 if (!isdigit (c))
311 {
312 token = FMT_UNKNOWN;
313 break;
314 }
315
316 fmt->value = c - '0';
317
318 for (;;)
319 {
320 c = next_char (fmt, 0);
321 if (!isdigit (c))
322 break;
323
324 fmt->value = 10 * fmt->value + c - '0';
325 }
326
327 unget_char (fmt);
328
329 if (negative_flag)
330 fmt->value = -fmt->value;
331 token = FMT_SIGNED_INT;
332 break;
333
334 case '0':
335 case '1':
336 case '2':
337 case '3':
338 case '4':
339 case '5':
340 case '6':
341 case '7':
342 case '8':
343 case '9':
344 fmt->value = c - '0';
345
346 for (;;)
347 {
348 c = next_char (fmt, 0);
349 if (!isdigit (c))
350 break;
351
352 fmt->value = 10 * fmt->value + c - '0';
353 }
354
355 unget_char (fmt);
356 token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
357 break;
358
359 case '.':
360 token = FMT_PERIOD;
361 break;
362
363 case ',':
364 token = FMT_COMMA;
365 break;
366
367 case ':':
368 token = FMT_COLON;
369 break;
370
371 case '/':
372 token = FMT_SLASH;
373 break;
374
375 case '$':
376 token = FMT_DOLLAR;
377 break;
378
379 case 'T':
380 switch (next_char (fmt, 0))
381 {
382 case 'L':
383 token = FMT_TL;
384 break;
385 case 'R':
386 token = FMT_TR;
387 break;
388 default:
389 token = FMT_T;
390 unget_char (fmt);
391 break;
392 }
393
394 break;
395
396 case 'X':
397 token = FMT_X;
398 break;
399
400 case 'S':
401 switch (next_char (fmt, 0))
402 {
403 case 'S':
404 token = FMT_SS;
405 break;
406 case 'P':
407 token = FMT_SP;
408 break;
409 default:
410 token = FMT_S;
411 unget_char (fmt);
412 break;
413 }
414
415 break;
416
417 case 'B':
418 switch (next_char (fmt, 0))
419 {
420 case 'N':
421 token = FMT_BN;
422 break;
423 case 'Z':
424 token = FMT_BZ;
425 break;
426 default:
427 token = FMT_B;
428 unget_char (fmt);
429 break;
430 }
431
432 break;
433
434 case '\'':
435 case '"':
436 delim = c;
437
438 fmt->string = fmt->format_string;
439 fmt->value = 0; /* This is the length of the string */
440
441 for (;;)
442 {
443 c = next_char (fmt, 1);
444 if (c == -1)
445 {
446 token = FMT_BADSTRING;
447 fmt->error = bad_string;
448 break;
449 }
450
451 if (c == delim)
452 {
453 c = next_char (fmt, 1);
454
455 if (c == -1)
456 {
457 token = FMT_BADSTRING;
458 fmt->error = bad_string;
459 break;
460 }
461
462 if (c != delim)
463 {
464 unget_char (fmt);
465 token = FMT_STRING;
466 break;
467 }
468 }
469
470 fmt->value++;
471 }
472
473 break;
474
475 case 'P':
476 token = FMT_P;
477 break;
478
479 case 'I':
480 token = FMT_I;
481 break;
482
483 case 'O':
484 token = FMT_O;
485 break;
486
487 case 'Z':
488 token = FMT_Z;
489 break;
490
491 case 'F':
492 token = FMT_F;
493 break;
494
495 case 'E':
496 switch (next_char (fmt, 0))
497 {
498 case 'N':
499 token = FMT_EN;
500 break;
501 case 'S':
502 token = FMT_ES;
503 break;
504 default:
505 token = FMT_E;
506 unget_char (fmt);
507 break;
508 }
509 break;
510
511 case 'G':
512 token = FMT_G;
513 break;
514
515 case 'H':
516 token = FMT_H;
517 break;
518
519 case 'L':
520 token = FMT_L;
521 break;
522
523 case 'A':
524 token = FMT_A;
525 break;
526
527 case 'D':
528 switch (next_char (fmt, 0))
529 {
530 case 'P':
531 token = FMT_DP;
532 break;
533 case 'C':
534 token = FMT_DC;
535 break;
536 default:
537 token = FMT_D;
538 unget_char (fmt);
539 break;
540 }
541 break;
542
543 case 'R':
544 switch (next_char (fmt, 0))
545 {
546 case 'C':
547 token = FMT_RC;
548 break;
549 case 'D':
550 token = FMT_RD;
551 break;
552 case 'N':
553 token = FMT_RN;
554 break;
555 case 'P':
556 token = FMT_RP;
557 break;
558 case 'U':
559 token = FMT_RU;
560 break;
561 case 'Z':
562 token = FMT_RZ;
563 break;
564 default:
565 unget_char (fmt);
566 token = FMT_UNKNOWN;
567 break;
568 }
569 break;
570
571 case -1:
572 token = FMT_END;
573 break;
574
575 default:
576 token = FMT_UNKNOWN;
577 break;
578 }
579
580 return token;
581 }
582
583
584 /* parse_format_list()-- Parse a format list. Assumes that a left
585 * paren has already been seen. Returns a list representing the
586 * parenthesis node which contains the rest of the list. */
587
588 static fnode *
589 parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
590 {
591 fnode *head, *tail;
592 format_token t, u, t2;
593 int repeat;
594 format_data *fmt = dtp->u.p.fmt;
595 bool seen_data_desc = false;
596
597 head = tail = NULL;
598
599 /* Get the next format item */
600 format_item:
601 t = format_lex (fmt);
602 format_item_1:
603 switch (t)
604 {
605 case FMT_STAR:
606 t = format_lex (fmt);
607 if (t != FMT_LPAREN)
608 {
609 fmt->error = "Left parenthesis required after '*'";
610 goto finished;
611 }
612 get_fnode (fmt, &head, &tail, FMT_LPAREN);
613 tail->repeat = -2; /* Signifies unlimited format. */
614 tail->u.child = parse_format_list (dtp, &seen_data_desc);
615 if (fmt->error != NULL)
616 goto finished;
617 if (!seen_data_desc)
618 {
619 fmt->error = "'*' requires at least one associated data descriptor";
620 goto finished;
621 }
622 goto between_desc;
623
624 case FMT_POSINT:
625 repeat = fmt->value;
626
627 t = format_lex (fmt);
628 switch (t)
629 {
630 case FMT_LPAREN:
631 get_fnode (fmt, &head, &tail, FMT_LPAREN);
632 tail->repeat = repeat;
633 tail->u.child = parse_format_list (dtp, &seen_data_desc);
634 *seen_dd = seen_data_desc;
635 if (fmt->error != NULL)
636 goto finished;
637
638 goto between_desc;
639
640 case FMT_SLASH:
641 get_fnode (fmt, &head, &tail, FMT_SLASH);
642 tail->repeat = repeat;
643 goto optional_comma;
644
645 case FMT_X:
646 get_fnode (fmt, &head, &tail, FMT_X);
647 tail->repeat = 1;
648 tail->u.k = fmt->value;
649 goto between_desc;
650
651 case FMT_P:
652 goto p_descriptor;
653
654 default:
655 goto data_desc;
656 }
657
658 case FMT_LPAREN:
659 get_fnode (fmt, &head, &tail, FMT_LPAREN);
660 tail->repeat = 1;
661 tail->u.child = parse_format_list (dtp, &seen_data_desc);
662 *seen_dd = seen_data_desc;
663 if (fmt->error != NULL)
664 goto finished;
665
666 goto between_desc;
667
668 case FMT_SIGNED_INT: /* Signed integer can only precede a P format. */
669 case FMT_ZERO: /* Same for zero. */
670 t = format_lex (fmt);
671 if (t != FMT_P)
672 {
673 fmt->error = "Expected P edit descriptor in format";
674 goto finished;
675 }
676
677 p_descriptor:
678 get_fnode (fmt, &head, &tail, FMT_P);
679 tail->u.k = fmt->value;
680 tail->repeat = 1;
681
682 t = format_lex (fmt);
683 if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
684 || t == FMT_G || t == FMT_E)
685 {
686 repeat = 1;
687 goto data_desc;
688 }
689
690 if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH
691 && t != FMT_POSINT)
692 {
693 fmt->error = "Comma required after P descriptor";
694 goto finished;
695 }
696
697 fmt->saved_token = t;
698 goto optional_comma;
699
700 case FMT_P: /* P and X require a prior number */
701 fmt->error = "P descriptor requires leading scale factor";
702 goto finished;
703
704 case FMT_X:
705 /*
706 EXTENSION!
707
708 If we would be pedantic in the library, we would have to reject
709 an X descriptor without an integer prefix:
710
711 fmt->error = "X descriptor requires leading space count";
712 goto finished;
713
714 However, this is an extension supported by many Fortran compilers,
715 including Cray, HP, AIX, and IRIX. Therefore, we allow it in the
716 runtime library, and make the front end reject it if the compiler
717 is in pedantic mode. The interpretation of 'X' is '1X'.
718 */
719 get_fnode (fmt, &head, &tail, FMT_X);
720 tail->repeat = 1;
721 tail->u.k = 1;
722 goto between_desc;
723
724 case FMT_STRING:
725 get_fnode (fmt, &head, &tail, FMT_STRING);
726 tail->u.string.p = fmt->string;
727 tail->u.string.length = fmt->value;
728 tail->repeat = 1;
729 goto optional_comma;
730
731 case FMT_RC:
732 case FMT_RD:
733 case FMT_RN:
734 case FMT_RP:
735 case FMT_RU:
736 case FMT_RZ:
737 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
738 "descriptor not allowed");
739 get_fnode (fmt, &head, &tail, t);
740 tail->repeat = 1;
741 goto between_desc;
742
743 case FMT_DC:
744 case FMT_DP:
745 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
746 "descriptor not allowed");
747 /* Fall through. */
748 case FMT_S:
749 case FMT_SS:
750 case FMT_SP:
751 case FMT_BN:
752 case FMT_BZ:
753 get_fnode (fmt, &head, &tail, t);
754 tail->repeat = 1;
755 goto between_desc;
756
757 case FMT_COLON:
758 get_fnode (fmt, &head, &tail, FMT_COLON);
759 tail->repeat = 1;
760 goto optional_comma;
761
762 case FMT_SLASH:
763 get_fnode (fmt, &head, &tail, FMT_SLASH);
764 tail->repeat = 1;
765 tail->u.r = 1;
766 goto optional_comma;
767
768 case FMT_DOLLAR:
769 get_fnode (fmt, &head, &tail, FMT_DOLLAR);
770 tail->repeat = 1;
771 notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
772 goto between_desc;
773
774 case FMT_T:
775 case FMT_TL:
776 case FMT_TR:
777 t2 = format_lex (fmt);
778 if (t2 != FMT_POSINT)
779 {
780 fmt->error = posint_required;
781 goto finished;
782 }
783 get_fnode (fmt, &head, &tail, t);
784 tail->u.n = fmt->value;
785 tail->repeat = 1;
786 goto between_desc;
787
788 case FMT_I:
789 case FMT_B:
790 case FMT_O:
791 case FMT_Z:
792 case FMT_E:
793 case FMT_EN:
794 case FMT_ES:
795 case FMT_D:
796 case FMT_L:
797 case FMT_A:
798 case FMT_F:
799 case FMT_G:
800 repeat = 1;
801 *seen_dd = true;
802 goto data_desc;
803
804 case FMT_H:
805 get_fnode (fmt, &head, &tail, FMT_STRING);
806 if (fmt->format_string_len < 1)
807 {
808 fmt->error = bad_hollerith;
809 goto finished;
810 }
811
812 tail->u.string.p = fmt->format_string;
813 tail->u.string.length = 1;
814 tail->repeat = 1;
815
816 fmt->format_string++;
817 fmt->format_string_len--;
818
819 goto between_desc;
820
821 case FMT_END:
822 fmt->error = unexpected_end;
823 goto finished;
824
825 case FMT_BADSTRING:
826 goto finished;
827
828 case FMT_RPAREN:
829 goto finished;
830
831 default:
832 fmt->error = unexpected_element;
833 goto finished;
834 }
835
836 /* In this state, t must currently be a data descriptor. Deal with
837 things that can/must follow the descriptor */
838 data_desc:
839 switch (t)
840 {
841 case FMT_L:
842 t = format_lex (fmt);
843 if (t != FMT_POSINT)
844 {
845 if (notification_std(GFC_STD_GNU) == NOTIFICATION_ERROR)
846 {
847 fmt->error = posint_required;
848 goto finished;
849 }
850 else
851 {
852 fmt->saved_token = t;
853 fmt->value = 1; /* Default width */
854 notify_std (&dtp->common, GFC_STD_GNU, posint_required);
855 }
856 }
857
858 get_fnode (fmt, &head, &tail, FMT_L);
859 tail->u.n = fmt->value;
860 tail->repeat = repeat;
861 break;
862
863 case FMT_A:
864 t = format_lex (fmt);
865 if (t == FMT_ZERO)
866 {
867 fmt->error = zero_width;
868 goto finished;
869 }
870
871 if (t != FMT_POSINT)
872 {
873 fmt->saved_token = t;
874 fmt->value = -1; /* Width not present */
875 }
876
877 get_fnode (fmt, &head, &tail, FMT_A);
878 tail->repeat = repeat;
879 tail->u.n = fmt->value;
880 break;
881
882 case FMT_D:
883 case FMT_E:
884 case FMT_F:
885 case FMT_G:
886 case FMT_EN:
887 case FMT_ES:
888 get_fnode (fmt, &head, &tail, t);
889 tail->repeat = repeat;
890
891 u = format_lex (fmt);
892 if (t == FMT_G && u == FMT_ZERO)
893 {
894 if (notification_std (GFC_STD_F2008) == NOTIFICATION_ERROR
895 || dtp->u.p.mode == READING)
896 {
897 fmt->error = zero_width;
898 goto finished;
899 }
900 tail->u.real.w = 0;
901 u = format_lex (fmt);
902 if (u != FMT_PERIOD)
903 {
904 fmt->saved_token = u;
905 break;
906 }
907
908 u = format_lex (fmt);
909 if (u != FMT_POSINT)
910 {
911 fmt->error = posint_required;
912 goto finished;
913 }
914 tail->u.real.d = fmt->value;
915 break;
916 }
917 if (t == FMT_F && dtp->u.p.mode == WRITING)
918 {
919 if (u != FMT_POSINT && u != FMT_ZERO)
920 {
921 fmt->error = nonneg_required;
922 goto finished;
923 }
924 }
925 else if (u != FMT_POSINT)
926 {
927 fmt->error = posint_required;
928 goto finished;
929 }
930
931 tail->u.real.w = fmt->value;
932 t2 = t;
933 t = format_lex (fmt);
934 if (t != FMT_PERIOD)
935 {
936 /* We treat a missing decimal descriptor as 0. Note: This is only
937 allowed if -std=legacy, otherwise an error occurs. */
938 if (compile_options.warn_std != 0)
939 {
940 fmt->error = period_required;
941 goto finished;
942 }
943 fmt->saved_token = t;
944 tail->u.real.d = 0;
945 tail->u.real.e = -1;
946 break;
947 }
948
949 t = format_lex (fmt);
950 if (t != FMT_ZERO && t != FMT_POSINT)
951 {
952 fmt->error = nonneg_required;
953 goto finished;
954 }
955
956 tail->u.real.d = fmt->value;
957 tail->u.real.e = -1;
958
959 if (t2 == FMT_D || t2 == FMT_F)
960 break;
961
962
963 /* Look for optional exponent */
964 t = format_lex (fmt);
965 if (t != FMT_E)
966 fmt->saved_token = t;
967 else
968 {
969 t = format_lex (fmt);
970 if (t != FMT_POSINT)
971 {
972 fmt->error = "Positive exponent width required in format";
973 goto finished;
974 }
975
976 tail->u.real.e = fmt->value;
977 }
978
979 break;
980
981 case FMT_H:
982 if (repeat > fmt->format_string_len)
983 {
984 fmt->error = bad_hollerith;
985 goto finished;
986 }
987
988 get_fnode (fmt, &head, &tail, FMT_STRING);
989 tail->u.string.p = fmt->format_string;
990 tail->u.string.length = repeat;
991 tail->repeat = 1;
992
993 fmt->format_string += fmt->value;
994 fmt->format_string_len -= repeat;
995
996 break;
997
998 case FMT_I:
999 case FMT_B:
1000 case FMT_O:
1001 case FMT_Z:
1002 get_fnode (fmt, &head, &tail, t);
1003 tail->repeat = repeat;
1004
1005 t = format_lex (fmt);
1006
1007 if (dtp->u.p.mode == READING)
1008 {
1009 if (t != FMT_POSINT)
1010 {
1011 fmt->error = posint_required;
1012 goto finished;
1013 }
1014 }
1015 else
1016 {
1017 if (t != FMT_ZERO && t != FMT_POSINT)
1018 {
1019 fmt->error = nonneg_required;
1020 goto finished;
1021 }
1022 }
1023
1024 tail->u.integer.w = fmt->value;
1025 tail->u.integer.m = -1;
1026
1027 t = format_lex (fmt);
1028 if (t != FMT_PERIOD)
1029 {
1030 fmt->saved_token = t;
1031 }
1032 else
1033 {
1034 t = format_lex (fmt);
1035 if (t != FMT_ZERO && t != FMT_POSINT)
1036 {
1037 fmt->error = nonneg_required;
1038 goto finished;
1039 }
1040
1041 tail->u.integer.m = fmt->value;
1042 }
1043
1044 if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1045 {
1046 fmt->error = "Minimum digits exceeds field width";
1047 goto finished;
1048 }
1049
1050 break;
1051
1052 default:
1053 fmt->error = unexpected_element;
1054 goto finished;
1055 }
1056
1057 /* Between a descriptor and what comes next */
1058 between_desc:
1059 t = format_lex (fmt);
1060 switch (t)
1061 {
1062 case FMT_COMMA:
1063 goto format_item;
1064
1065 case FMT_RPAREN:
1066 goto finished;
1067
1068 case FMT_SLASH:
1069 case FMT_COLON:
1070 get_fnode (fmt, &head, &tail, t);
1071 tail->repeat = 1;
1072 goto optional_comma;
1073
1074 case FMT_END:
1075 fmt->error = unexpected_end;
1076 goto finished;
1077
1078 default:
1079 /* Assume a missing comma, this is a GNU extension */
1080 goto format_item_1;
1081 }
1082
1083 /* Optional comma is a weird between state where we've just finished
1084 reading a colon, slash or P descriptor. */
1085 optional_comma:
1086 t = format_lex (fmt);
1087 switch (t)
1088 {
1089 case FMT_COMMA:
1090 break;
1091
1092 case FMT_RPAREN:
1093 goto finished;
1094
1095 default: /* Assume that we have another format item */
1096 fmt->saved_token = t;
1097 break;
1098 }
1099
1100 goto format_item;
1101
1102 finished:
1103
1104 return head;
1105 }
1106
1107
1108 /* format_error()-- Generate an error message for a format statement.
1109 * If the node that gives the location of the error is NULL, the error
1110 * is assumed to happen at parse time, and the current location of the
1111 * parser is shown.
1112 *
1113 * We generate a message showing where the problem is. We take extra
1114 * care to print only the relevant part of the format if it is longer
1115 * than a standard 80 column display. */
1116
1117 void
1118 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1119 {
1120 int width, i, offset;
1121 #define BUFLEN 300
1122 char *p, buffer[BUFLEN];
1123 format_data *fmt = dtp->u.p.fmt;
1124
1125 if (f != NULL)
1126 p = f->source;
1127 else /* This should not happen. */
1128 p = dtp->format;
1129
1130 if (message == unexpected_element)
1131 snprintf (buffer, BUFLEN, message, fmt->error_element);
1132 else
1133 snprintf (buffer, BUFLEN, "%s\n", message);
1134
1135 /* Get the offset into the format string where the error occurred. */
1136 offset = dtp->format_len - (fmt->reversion_ok ?
1137 (int) strlen(p) : fmt->format_string_len);
1138
1139 width = dtp->format_len;
1140
1141 if (width > 80)
1142 width = 80;
1143
1144 /* Show the format */
1145
1146 p = strchr (buffer, '\0');
1147
1148 memcpy (p, dtp->format, width);
1149
1150 p += width;
1151 *p++ = '\n';
1152
1153 /* Show where the problem is */
1154
1155 for (i = 1; i < offset; i++)
1156 *p++ = ' ';
1157
1158 *p++ = '^';
1159 *p = '\0';
1160
1161 generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1162 }
1163
1164
1165 /* revert()-- Do reversion of the format. Control reverts to the left
1166 * parenthesis that matches the rightmost right parenthesis. From our
1167 * tree structure, we are looking for the rightmost parenthesis node
1168 * at the second level, the first level always being a single
1169 * parenthesis node. If this node doesn't exit, we use the top
1170 * level. */
1171
1172 static void
1173 revert (st_parameter_dt *dtp)
1174 {
1175 fnode *f, *r;
1176 format_data *fmt = dtp->u.p.fmt;
1177
1178 dtp->u.p.reversion_flag = 1;
1179
1180 r = NULL;
1181
1182 for (f = fmt->array.array[0].u.child; f; f = f->next)
1183 if (f->format == FMT_LPAREN)
1184 r = f;
1185
1186 /* If r is NULL because no node was found, the whole tree will be used */
1187
1188 fmt->array.array[0].current = r;
1189 fmt->array.array[0].count = 0;
1190 }
1191
1192 /* parse_format()-- Parse a format string. */
1193
1194 void
1195 parse_format (st_parameter_dt *dtp)
1196 {
1197 format_data *fmt;
1198 bool format_cache_ok, seen_data_desc = false;
1199
1200 /* Don't cache for internal units and set an arbitrary limit on the size of
1201 format strings we will cache. (Avoids memory issues.) */
1202 format_cache_ok = !is_internal_unit (dtp);
1203
1204 /* Lookup format string to see if it has already been parsed. */
1205 if (format_cache_ok)
1206 {
1207 dtp->u.p.fmt = find_parsed_format (dtp);
1208
1209 if (dtp->u.p.fmt != NULL)
1210 {
1211 dtp->u.p.fmt->reversion_ok = 0;
1212 dtp->u.p.fmt->saved_token = FMT_NONE;
1213 dtp->u.p.fmt->saved_format = NULL;
1214 reset_fnode_counters (dtp);
1215 return;
1216 }
1217 }
1218
1219 /* Not found so proceed as follows. */
1220
1221 if (format_cache_ok)
1222 {
1223 char *fmt_string = xmalloc (dtp->format_len + 1);
1224 memcpy (fmt_string, dtp->format, dtp->format_len);
1225 dtp->format = fmt_string;
1226 dtp->format[dtp->format_len] = '\0';
1227 }
1228
1229 dtp->u.p.fmt = fmt = xmalloc (sizeof (format_data));
1230 fmt->format_string = dtp->format;
1231 fmt->format_string_len = dtp->format_len;
1232
1233 fmt->string = NULL;
1234 fmt->saved_token = FMT_NONE;
1235 fmt->error = NULL;
1236 fmt->value = 0;
1237
1238 /* Initialize variables used during traversal of the tree. */
1239
1240 fmt->reversion_ok = 0;
1241 fmt->saved_format = NULL;
1242
1243 /* Allocate the first format node as the root of the tree. */
1244
1245 fmt->last = &fmt->array;
1246 fmt->last->next = NULL;
1247 fmt->avail = &fmt->array.array[0];
1248
1249 memset (fmt->avail, 0, sizeof (*fmt->avail));
1250 fmt->avail->format = FMT_LPAREN;
1251 fmt->avail->repeat = 1;
1252 fmt->avail++;
1253
1254 if (format_lex (fmt) == FMT_LPAREN)
1255 fmt->array.array[0].u.child = parse_format_list (dtp, &seen_data_desc);
1256 else
1257 fmt->error = "Missing initial left parenthesis in format";
1258
1259 if (fmt->error)
1260 {
1261 format_error (dtp, NULL, fmt->error);
1262 if (format_cache_ok)
1263 free (dtp->format);
1264 free_format_hash_table (dtp->u.p.current_unit);
1265 return;
1266 }
1267
1268 if (format_cache_ok)
1269 save_parsed_format (dtp);
1270 else
1271 dtp->u.p.format_not_saved = 1;
1272 }
1273
1274
1275 /* next_format0()-- Get the next format node without worrying about
1276 * reversion. Returns NULL when we hit the end of the list.
1277 * Parenthesis nodes are incremented after the list has been
1278 * exhausted, other nodes are incremented before they are returned. */
1279
1280 static const fnode *
1281 next_format0 (fnode * f)
1282 {
1283 const fnode *r;
1284
1285 if (f == NULL)
1286 return NULL;
1287
1288 if (f->format != FMT_LPAREN)
1289 {
1290 f->count++;
1291 if (f->count <= f->repeat)
1292 return f;
1293
1294 f->count = 0;
1295 return NULL;
1296 }
1297
1298 /* Deal with a parenthesis node with unlimited format. */
1299
1300 if (f->repeat == -2) /* -2 signifies unlimited. */
1301 for (;;)
1302 {
1303 if (f->current == NULL)
1304 f->current = f->u.child;
1305
1306 for (; f->current != NULL; f->current = f->current->next)
1307 {
1308 r = next_format0 (f->current);
1309 if (r != NULL)
1310 return r;
1311 }
1312 }
1313
1314 /* Deal with a parenthesis node with specific repeat count. */
1315 for (; f->count < f->repeat; f->count++)
1316 {
1317 if (f->current == NULL)
1318 f->current = f->u.child;
1319
1320 for (; f->current != NULL; f->current = f->current->next)
1321 {
1322 r = next_format0 (f->current);
1323 if (r != NULL)
1324 return r;
1325 }
1326 }
1327
1328 f->count = 0;
1329 return NULL;
1330 }
1331
1332
1333 /* next_format()-- Return the next format node. If the format list
1334 * ends up being exhausted, we do reversion. Reversion is only
1335 * allowed if we've seen a data descriptor since the
1336 * initialization or the last reversion. We return NULL if there
1337 * are no more data descriptors to return (which is an error
1338 * condition). */
1339
1340 const fnode *
1341 next_format (st_parameter_dt *dtp)
1342 {
1343 format_token t;
1344 const fnode *f;
1345 format_data *fmt = dtp->u.p.fmt;
1346
1347 if (fmt->saved_format != NULL)
1348 { /* Deal with a pushed-back format node */
1349 f = fmt->saved_format;
1350 fmt->saved_format = NULL;
1351 goto done;
1352 }
1353
1354 f = next_format0 (&fmt->array.array[0]);
1355 if (f == NULL)
1356 {
1357 if (!fmt->reversion_ok)
1358 return NULL;
1359
1360 fmt->reversion_ok = 0;
1361 revert (dtp);
1362
1363 f = next_format0 (&fmt->array.array[0]);
1364 if (f == NULL)
1365 {
1366 format_error (dtp, NULL, reversion_error);
1367 return NULL;
1368 }
1369
1370 /* Push the first reverted token and return a colon node in case
1371 * there are no more data items. */
1372
1373 fmt->saved_format = f;
1374 return &colon_node;
1375 }
1376
1377 /* If this is a data edit descriptor, then reversion has become OK. */
1378 done:
1379 t = f->format;
1380
1381 if (!fmt->reversion_ok &&
1382 (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1383 t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1384 t == FMT_A || t == FMT_D))
1385 fmt->reversion_ok = 1;
1386 return f;
1387 }
1388
1389
1390 /* unget_format()-- Push the given format back so that it will be
1391 * returned on the next call to next_format() without affecting
1392 * counts. This is necessary when we've encountered a data
1393 * descriptor, but don't know what the data item is yet. The format
1394 * node is pushed back, and we return control to the main program,
1395 * which calls the library back with the data item (or not). */
1396
1397 void
1398 unget_format (st_parameter_dt *dtp, const fnode *f)
1399 {
1400 dtp->u.p.fmt->saved_format = f;
1401 }
1402