Update copyright years in libgfortran.
[gcc.git] / libgfortran / io / format.c
1 /* Copyright (C) 2002-2013 Free Software Foundation, Inc.
2 Contributed by Andy Vaught
3 F2003 I/O support contributed by Jerry DeLisle
4
5 This file is part of the GNU Fortran runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26
27 /* format.c-- parse a FORMAT string into a binary format suitable for
28 * interpretation during I/O statements */
29
30 #include "io.h"
31 #include "format.h"
32 #include <ctype.h>
33 #include <string.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36
37
38 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
39 NULL };
40
41 /* Error messages. */
42
43 static const char posint_required[] = "Positive width required in format",
44 period_required[] = "Period required in format",
45 nonneg_required[] = "Nonnegative width required in format",
46 unexpected_element[] = "Unexpected element '%c' in format\n",
47 unexpected_end[] = "Unexpected end of format string",
48 bad_string[] = "Unterminated character constant in format",
49 bad_hollerith[] = "Hollerith constant extends past the end of the format",
50 reversion_error[] = "Exhausted data descriptors in format",
51 zero_width[] = "Zero width in format descriptor";
52
53 /* The following routines support caching format data from parsed format strings
54 into a hash table. This avoids repeatedly parsing duplicate format strings
55 or format strings in I/O statements that are repeated in loops. */
56
57
58 /* Traverse the table and free all data. */
59
60 void
61 free_format_hash_table (gfc_unit *u)
62 {
63 size_t i;
64
65 /* free_format_data handles any NULL pointers. */
66 for (i = 0; i < FORMAT_HASH_SIZE; i++)
67 {
68 if (u->format_hash_table[i].hashed_fmt != NULL)
69 {
70 free_format_data (u->format_hash_table[i].hashed_fmt);
71 free (u->format_hash_table[i].key);
72 }
73 u->format_hash_table[i].key = NULL;
74 u->format_hash_table[i].key_len = 0;
75 u->format_hash_table[i].hashed_fmt = NULL;
76 }
77 }
78
79 /* Traverse the format_data structure and reset the fnode counters. */
80
81 static void
82 reset_node (fnode *fn)
83 {
84 fnode *f;
85
86 fn->count = 0;
87 fn->current = NULL;
88
89 if (fn->format != FMT_LPAREN)
90 return;
91
92 for (f = fn->u.child; f; f = f->next)
93 {
94 if (f->format == FMT_RPAREN)
95 break;
96 reset_node (f);
97 }
98 }
99
100 static void
101 reset_fnode_counters (st_parameter_dt *dtp)
102 {
103 fnode *f;
104 format_data *fmt;
105
106 fmt = dtp->u.p.fmt;
107
108 /* Clear this pointer at the head so things start at the right place. */
109 fmt->array.array[0].current = NULL;
110
111 for (f = fmt->array.array[0].u.child; f; f = f->next)
112 reset_node (f);
113 }
114
115
116 /* A simple hashing function to generate an index into the hash table. */
117
118 static uint32_t
119 format_hash (st_parameter_dt *dtp)
120 {
121 char *key;
122 gfc_charlen_type key_len;
123 uint32_t hash = 0;
124 gfc_charlen_type i;
125
126 /* Hash the format string. Super simple, but what the heck! */
127 key = dtp->format;
128 key_len = dtp->format_len;
129 for (i = 0; i < key_len; i++)
130 hash ^= key[i];
131 hash &= (FORMAT_HASH_SIZE - 1);
132 return hash;
133 }
134
135
136 static void
137 save_parsed_format (st_parameter_dt *dtp)
138 {
139 uint32_t hash;
140 gfc_unit *u;
141
142 hash = format_hash (dtp);
143 u = dtp->u.p.current_unit;
144
145 /* Index into the hash table. We are simply replacing whatever is there
146 relying on probability. */
147 if (u->format_hash_table[hash].hashed_fmt != NULL)
148 free_format_data (u->format_hash_table[hash].hashed_fmt);
149 u->format_hash_table[hash].hashed_fmt = NULL;
150
151 free (u->format_hash_table[hash].key);
152 u->format_hash_table[hash].key = xmalloc (dtp->format_len);
153 memcpy (u->format_hash_table[hash].key, dtp->format, dtp->format_len);
154
155 u->format_hash_table[hash].key_len = dtp->format_len;
156 u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
157 }
158
159
160 static format_data *
161 find_parsed_format (st_parameter_dt *dtp)
162 {
163 uint32_t hash;
164 gfc_unit *u;
165
166 hash = format_hash (dtp);
167 u = dtp->u.p.current_unit;
168
169 if (u->format_hash_table[hash].key != NULL)
170 {
171 /* See if it matches. */
172 if (u->format_hash_table[hash].key_len == dtp->format_len)
173 {
174 /* So far so good. */
175 if (strncmp (u->format_hash_table[hash].key,
176 dtp->format, dtp->format_len) == 0)
177 return u->format_hash_table[hash].hashed_fmt;
178 }
179 }
180 return NULL;
181 }
182
183
184 /* next_char()-- Return the next character in the format string.
185 * Returns -1 when the string is done. If the literal flag is set,
186 * spaces are significant, otherwise they are not. */
187
188 static int
189 next_char (format_data *fmt, int literal)
190 {
191 int c;
192
193 do
194 {
195 if (fmt->format_string_len == 0)
196 return -1;
197
198 fmt->format_string_len--;
199 c = toupper (*fmt->format_string++);
200 fmt->error_element = c;
201 }
202 while ((c == ' ' || c == '\t') && !literal);
203
204 return c;
205 }
206
207
208 /* unget_char()-- Back up one character position. */
209
210 #define unget_char(fmt) \
211 { fmt->format_string--; fmt->format_string_len++; }
212
213
214 /* get_fnode()-- Allocate a new format node, inserting it into the
215 * current singly linked list. These are initially allocated from the
216 * static buffer. */
217
218 static fnode *
219 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
220 {
221 fnode *f;
222
223 if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
224 {
225 fmt->last->next = xmalloc (sizeof (fnode_array));
226 fmt->last = fmt->last->next;
227 fmt->last->next = NULL;
228 fmt->avail = &fmt->last->array[0];
229 }
230 f = fmt->avail++;
231 memset (f, '\0', sizeof (fnode));
232
233 if (*head == NULL)
234 *head = *tail = f;
235 else
236 {
237 (*tail)->next = f;
238 *tail = f;
239 }
240
241 f->format = t;
242 f->repeat = -1;
243 f->source = fmt->format_string;
244 return f;
245 }
246
247
248 /* free_format_data()-- Free all allocated format data. */
249
250 void
251 free_format_data (format_data *fmt)
252 {
253 fnode_array *fa, *fa_next;
254
255
256 if (fmt == NULL)
257 return;
258
259 for (fa = fmt->array.next; fa; fa = fa_next)
260 {
261 fa_next = fa->next;
262 free (fa);
263 }
264
265 free (fmt);
266 fmt = NULL;
267 }
268
269
270 /* format_lex()-- Simple lexical analyzer for getting the next token
271 * in a FORMAT string. We support a one-level token pushback in the
272 * fmt->saved_token variable. */
273
274 static format_token
275 format_lex (format_data *fmt)
276 {
277 format_token token;
278 int negative_flag;
279 int c;
280 char delim;
281
282 if (fmt->saved_token != FMT_NONE)
283 {
284 token = fmt->saved_token;
285 fmt->saved_token = FMT_NONE;
286 return token;
287 }
288
289 negative_flag = 0;
290 c = next_char (fmt, 0);
291
292 switch (c)
293 {
294 case '*':
295 token = FMT_STAR;
296 break;
297
298 case '(':
299 token = FMT_LPAREN;
300 break;
301
302 case ')':
303 token = FMT_RPAREN;
304 break;
305
306 case '-':
307 negative_flag = 1;
308 /* Fall Through */
309
310 case '+':
311 c = next_char (fmt, 0);
312 if (!isdigit (c))
313 {
314 token = FMT_UNKNOWN;
315 break;
316 }
317
318 fmt->value = c - '0';
319
320 for (;;)
321 {
322 c = next_char (fmt, 0);
323 if (!isdigit (c))
324 break;
325
326 fmt->value = 10 * fmt->value + c - '0';
327 }
328
329 unget_char (fmt);
330
331 if (negative_flag)
332 fmt->value = -fmt->value;
333 token = FMT_SIGNED_INT;
334 break;
335
336 case '0':
337 case '1':
338 case '2':
339 case '3':
340 case '4':
341 case '5':
342 case '6':
343 case '7':
344 case '8':
345 case '9':
346 fmt->value = c - '0';
347
348 for (;;)
349 {
350 c = next_char (fmt, 0);
351 if (!isdigit (c))
352 break;
353
354 fmt->value = 10 * fmt->value + c - '0';
355 }
356
357 unget_char (fmt);
358 token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
359 break;
360
361 case '.':
362 token = FMT_PERIOD;
363 break;
364
365 case ',':
366 token = FMT_COMMA;
367 break;
368
369 case ':':
370 token = FMT_COLON;
371 break;
372
373 case '/':
374 token = FMT_SLASH;
375 break;
376
377 case '$':
378 token = FMT_DOLLAR;
379 break;
380
381 case 'T':
382 switch (next_char (fmt, 0))
383 {
384 case 'L':
385 token = FMT_TL;
386 break;
387 case 'R':
388 token = FMT_TR;
389 break;
390 default:
391 token = FMT_T;
392 unget_char (fmt);
393 break;
394 }
395
396 break;
397
398 case 'X':
399 token = FMT_X;
400 break;
401
402 case 'S':
403 switch (next_char (fmt, 0))
404 {
405 case 'S':
406 token = FMT_SS;
407 break;
408 case 'P':
409 token = FMT_SP;
410 break;
411 default:
412 token = FMT_S;
413 unget_char (fmt);
414 break;
415 }
416
417 break;
418
419 case 'B':
420 switch (next_char (fmt, 0))
421 {
422 case 'N':
423 token = FMT_BN;
424 break;
425 case 'Z':
426 token = FMT_BZ;
427 break;
428 default:
429 token = FMT_B;
430 unget_char (fmt);
431 break;
432 }
433
434 break;
435
436 case '\'':
437 case '"':
438 delim = c;
439
440 fmt->string = fmt->format_string;
441 fmt->value = 0; /* This is the length of the string */
442
443 for (;;)
444 {
445 c = next_char (fmt, 1);
446 if (c == -1)
447 {
448 token = FMT_BADSTRING;
449 fmt->error = bad_string;
450 break;
451 }
452
453 if (c == delim)
454 {
455 c = next_char (fmt, 1);
456
457 if (c == -1)
458 {
459 token = FMT_BADSTRING;
460 fmt->error = bad_string;
461 break;
462 }
463
464 if (c != delim)
465 {
466 unget_char (fmt);
467 token = FMT_STRING;
468 break;
469 }
470 }
471
472 fmt->value++;
473 }
474
475 break;
476
477 case 'P':
478 token = FMT_P;
479 break;
480
481 case 'I':
482 token = FMT_I;
483 break;
484
485 case 'O':
486 token = FMT_O;
487 break;
488
489 case 'Z':
490 token = FMT_Z;
491 break;
492
493 case 'F':
494 token = FMT_F;
495 break;
496
497 case 'E':
498 switch (next_char (fmt, 0))
499 {
500 case 'N':
501 token = FMT_EN;
502 break;
503 case 'S':
504 token = FMT_ES;
505 break;
506 default:
507 token = FMT_E;
508 unget_char (fmt);
509 break;
510 }
511 break;
512
513 case 'G':
514 token = FMT_G;
515 break;
516
517 case 'H':
518 token = FMT_H;
519 break;
520
521 case 'L':
522 token = FMT_L;
523 break;
524
525 case 'A':
526 token = FMT_A;
527 break;
528
529 case 'D':
530 switch (next_char (fmt, 0))
531 {
532 case 'P':
533 token = FMT_DP;
534 break;
535 case 'C':
536 token = FMT_DC;
537 break;
538 default:
539 token = FMT_D;
540 unget_char (fmt);
541 break;
542 }
543 break;
544
545 case 'R':
546 switch (next_char (fmt, 0))
547 {
548 case 'C':
549 token = FMT_RC;
550 break;
551 case 'D':
552 token = FMT_RD;
553 break;
554 case 'N':
555 token = FMT_RN;
556 break;
557 case 'P':
558 token = FMT_RP;
559 break;
560 case 'U':
561 token = FMT_RU;
562 break;
563 case 'Z':
564 token = FMT_RZ;
565 break;
566 default:
567 unget_char (fmt);
568 token = FMT_UNKNOWN;
569 break;
570 }
571 break;
572
573 case -1:
574 token = FMT_END;
575 break;
576
577 default:
578 token = FMT_UNKNOWN;
579 break;
580 }
581
582 return token;
583 }
584
585
586 /* parse_format_list()-- Parse a format list. Assumes that a left
587 * paren has already been seen. Returns a list representing the
588 * parenthesis node which contains the rest of the list. */
589
590 static fnode *
591 parse_format_list (st_parameter_dt *dtp, bool *save_ok, bool *seen_dd)
592 {
593 fnode *head, *tail;
594 format_token t, u, t2;
595 int repeat;
596 format_data *fmt = dtp->u.p.fmt;
597 bool saveit, seen_data_desc = false;
598
599 head = tail = NULL;
600 saveit = *save_ok;
601
602 /* Get the next format item */
603 format_item:
604 t = format_lex (fmt);
605 format_item_1:
606 switch (t)
607 {
608 case FMT_STAR:
609 t = format_lex (fmt);
610 if (t != FMT_LPAREN)
611 {
612 fmt->error = "Left parenthesis required after '*'";
613 goto finished;
614 }
615 get_fnode (fmt, &head, &tail, FMT_LPAREN);
616 tail->repeat = -2; /* Signifies unlimited format. */
617 tail->u.child = parse_format_list (dtp, &saveit, &seen_data_desc);
618 if (fmt->error != NULL)
619 goto finished;
620 if (!seen_data_desc)
621 {
622 fmt->error = "'*' requires at least one associated data descriptor";
623 goto finished;
624 }
625 goto between_desc;
626
627 case FMT_POSINT:
628 repeat = fmt->value;
629
630 t = format_lex (fmt);
631 switch (t)
632 {
633 case FMT_LPAREN:
634 get_fnode (fmt, &head, &tail, FMT_LPAREN);
635 tail->repeat = repeat;
636 tail->u.child = parse_format_list (dtp, &saveit, &seen_data_desc);
637 *seen_dd = seen_data_desc;
638 if (fmt->error != NULL)
639 goto finished;
640
641 goto between_desc;
642
643 case FMT_SLASH:
644 get_fnode (fmt, &head, &tail, FMT_SLASH);
645 tail->repeat = repeat;
646 goto optional_comma;
647
648 case FMT_X:
649 get_fnode (fmt, &head, &tail, FMT_X);
650 tail->repeat = 1;
651 tail->u.k = fmt->value;
652 goto between_desc;
653
654 case FMT_P:
655 goto p_descriptor;
656
657 default:
658 goto data_desc;
659 }
660
661 case FMT_LPAREN:
662 get_fnode (fmt, &head, &tail, FMT_LPAREN);
663 tail->repeat = 1;
664 tail->u.child = parse_format_list (dtp, &saveit, &seen_data_desc);
665 *seen_dd = seen_data_desc;
666 if (fmt->error != NULL)
667 goto finished;
668
669 goto between_desc;
670
671 case FMT_SIGNED_INT: /* Signed integer can only precede a P format. */
672 case FMT_ZERO: /* Same for zero. */
673 t = format_lex (fmt);
674 if (t != FMT_P)
675 {
676 fmt->error = "Expected P edit descriptor in format";
677 goto finished;
678 }
679
680 p_descriptor:
681 get_fnode (fmt, &head, &tail, FMT_P);
682 tail->u.k = fmt->value;
683 tail->repeat = 1;
684
685 t = format_lex (fmt);
686 if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
687 || t == FMT_G || t == FMT_E)
688 {
689 repeat = 1;
690 goto data_desc;
691 }
692
693 if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH
694 && t != FMT_POSINT)
695 {
696 fmt->error = "Comma required after P descriptor";
697 goto finished;
698 }
699
700 fmt->saved_token = t;
701 goto optional_comma;
702
703 case FMT_P: /* P and X require a prior number */
704 fmt->error = "P descriptor requires leading scale factor";
705 goto finished;
706
707 case FMT_X:
708 /*
709 EXTENSION!
710
711 If we would be pedantic in the library, we would have to reject
712 an X descriptor without an integer prefix:
713
714 fmt->error = "X descriptor requires leading space count";
715 goto finished;
716
717 However, this is an extension supported by many Fortran compilers,
718 including Cray, HP, AIX, and IRIX. Therefore, we allow it in the
719 runtime library, and make the front end reject it if the compiler
720 is in pedantic mode. The interpretation of 'X' is '1X'.
721 */
722 get_fnode (fmt, &head, &tail, FMT_X);
723 tail->repeat = 1;
724 tail->u.k = 1;
725 goto between_desc;
726
727 case FMT_STRING:
728 /* TODO: Find out why it is necessary to turn off format caching. */
729 saveit = false;
730 get_fnode (fmt, &head, &tail, FMT_STRING);
731 tail->u.string.p = fmt->string;
732 tail->u.string.length = fmt->value;
733 tail->repeat = 1;
734 goto optional_comma;
735
736 case FMT_RC:
737 case FMT_RD:
738 case FMT_RN:
739 case FMT_RP:
740 case FMT_RU:
741 case FMT_RZ:
742 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
743 "descriptor not allowed");
744 get_fnode (fmt, &head, &tail, t);
745 tail->repeat = 1;
746 goto between_desc;
747
748 case FMT_DC:
749 case FMT_DP:
750 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
751 "descriptor not allowed");
752 /* Fall through. */
753 case FMT_S:
754 case FMT_SS:
755 case FMT_SP:
756 case FMT_BN:
757 case FMT_BZ:
758 get_fnode (fmt, &head, &tail, t);
759 tail->repeat = 1;
760 goto between_desc;
761
762 case FMT_COLON:
763 get_fnode (fmt, &head, &tail, FMT_COLON);
764 tail->repeat = 1;
765 goto optional_comma;
766
767 case FMT_SLASH:
768 get_fnode (fmt, &head, &tail, FMT_SLASH);
769 tail->repeat = 1;
770 tail->u.r = 1;
771 goto optional_comma;
772
773 case FMT_DOLLAR:
774 get_fnode (fmt, &head, &tail, FMT_DOLLAR);
775 tail->repeat = 1;
776 notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
777 goto between_desc;
778
779 case FMT_T:
780 case FMT_TL:
781 case FMT_TR:
782 t2 = format_lex (fmt);
783 if (t2 != FMT_POSINT)
784 {
785 fmt->error = posint_required;
786 goto finished;
787 }
788 get_fnode (fmt, &head, &tail, t);
789 tail->u.n = fmt->value;
790 tail->repeat = 1;
791 goto between_desc;
792
793 case FMT_I:
794 case FMT_B:
795 case FMT_O:
796 case FMT_Z:
797 case FMT_E:
798 case FMT_EN:
799 case FMT_ES:
800 case FMT_D:
801 case FMT_L:
802 case FMT_A:
803 case FMT_F:
804 case FMT_G:
805 repeat = 1;
806 *seen_dd = true;
807 goto data_desc;
808
809 case FMT_H:
810 get_fnode (fmt, &head, &tail, FMT_STRING);
811 if (fmt->format_string_len < 1)
812 {
813 fmt->error = bad_hollerith;
814 goto finished;
815 }
816
817 tail->u.string.p = fmt->format_string;
818 tail->u.string.length = 1;
819 tail->repeat = 1;
820
821 fmt->format_string++;
822 fmt->format_string_len--;
823
824 goto between_desc;
825
826 case FMT_END:
827 fmt->error = unexpected_end;
828 goto finished;
829
830 case FMT_BADSTRING:
831 goto finished;
832
833 case FMT_RPAREN:
834 goto finished;
835
836 default:
837 fmt->error = unexpected_element;
838 goto finished;
839 }
840
841 /* In this state, t must currently be a data descriptor. Deal with
842 things that can/must follow the descriptor */
843 data_desc:
844 switch (t)
845 {
846 case FMT_L:
847 t = format_lex (fmt);
848 if (t != FMT_POSINT)
849 {
850 if (notification_std(GFC_STD_GNU) == NOTIFICATION_ERROR)
851 {
852 fmt->error = posint_required;
853 goto finished;
854 }
855 else
856 {
857 fmt->saved_token = t;
858 fmt->value = 1; /* Default width */
859 notify_std (&dtp->common, GFC_STD_GNU, posint_required);
860 }
861 }
862
863 get_fnode (fmt, &head, &tail, FMT_L);
864 tail->u.n = fmt->value;
865 tail->repeat = repeat;
866 break;
867
868 case FMT_A:
869 t = format_lex (fmt);
870 if (t == FMT_ZERO)
871 {
872 fmt->error = zero_width;
873 goto finished;
874 }
875
876 if (t != FMT_POSINT)
877 {
878 fmt->saved_token = t;
879 fmt->value = -1; /* Width not present */
880 }
881
882 get_fnode (fmt, &head, &tail, FMT_A);
883 tail->repeat = repeat;
884 tail->u.n = fmt->value;
885 break;
886
887 case FMT_D:
888 case FMT_E:
889 case FMT_F:
890 case FMT_G:
891 case FMT_EN:
892 case FMT_ES:
893 get_fnode (fmt, &head, &tail, t);
894 tail->repeat = repeat;
895
896 u = format_lex (fmt);
897 if (t == FMT_G && u == FMT_ZERO)
898 {
899 if (notification_std (GFC_STD_F2008) == NOTIFICATION_ERROR
900 || dtp->u.p.mode == READING)
901 {
902 fmt->error = zero_width;
903 goto finished;
904 }
905 tail->u.real.w = 0;
906 u = format_lex (fmt);
907 if (u != FMT_PERIOD)
908 {
909 fmt->saved_token = u;
910 break;
911 }
912
913 u = format_lex (fmt);
914 if (u != FMT_POSINT)
915 {
916 fmt->error = posint_required;
917 goto finished;
918 }
919 tail->u.real.d = fmt->value;
920 break;
921 }
922 if (t == FMT_F && dtp->u.p.mode == WRITING)
923 {
924 if (u != FMT_POSINT && u != FMT_ZERO)
925 {
926 fmt->error = nonneg_required;
927 goto finished;
928 }
929 }
930 else if (u != FMT_POSINT)
931 {
932 fmt->error = posint_required;
933 goto finished;
934 }
935
936 tail->u.real.w = fmt->value;
937 t2 = t;
938 t = format_lex (fmt);
939 if (t != FMT_PERIOD)
940 {
941 /* We treat a missing decimal descriptor as 0. Note: This is only
942 allowed if -std=legacy, otherwise an error occurs. */
943 if (compile_options.warn_std != 0)
944 {
945 fmt->error = period_required;
946 goto finished;
947 }
948 fmt->saved_token = t;
949 tail->u.real.d = 0;
950 tail->u.real.e = -1;
951 break;
952 }
953
954 t = format_lex (fmt);
955 if (t != FMT_ZERO && t != FMT_POSINT)
956 {
957 fmt->error = nonneg_required;
958 goto finished;
959 }
960
961 tail->u.real.d = fmt->value;
962 tail->u.real.e = -1;
963
964 if (t2 == FMT_D || t2 == FMT_F)
965 break;
966
967
968 /* Look for optional exponent */
969 t = format_lex (fmt);
970 if (t != FMT_E)
971 fmt->saved_token = t;
972 else
973 {
974 t = format_lex (fmt);
975 if (t != FMT_POSINT)
976 {
977 fmt->error = "Positive exponent width required in format";
978 goto finished;
979 }
980
981 tail->u.real.e = fmt->value;
982 }
983
984 break;
985
986 case FMT_H:
987 if (repeat > fmt->format_string_len)
988 {
989 fmt->error = bad_hollerith;
990 goto finished;
991 }
992
993 get_fnode (fmt, &head, &tail, FMT_STRING);
994 tail->u.string.p = fmt->format_string;
995 tail->u.string.length = repeat;
996 tail->repeat = 1;
997
998 fmt->format_string += fmt->value;
999 fmt->format_string_len -= repeat;
1000
1001 break;
1002
1003 case FMT_I:
1004 case FMT_B:
1005 case FMT_O:
1006 case FMT_Z:
1007 get_fnode (fmt, &head, &tail, t);
1008 tail->repeat = repeat;
1009
1010 t = format_lex (fmt);
1011
1012 if (dtp->u.p.mode == READING)
1013 {
1014 if (t != FMT_POSINT)
1015 {
1016 fmt->error = posint_required;
1017 goto finished;
1018 }
1019 }
1020 else
1021 {
1022 if (t != FMT_ZERO && t != FMT_POSINT)
1023 {
1024 fmt->error = nonneg_required;
1025 goto finished;
1026 }
1027 }
1028
1029 tail->u.integer.w = fmt->value;
1030 tail->u.integer.m = -1;
1031
1032 t = format_lex (fmt);
1033 if (t != FMT_PERIOD)
1034 {
1035 fmt->saved_token = t;
1036 }
1037 else
1038 {
1039 t = format_lex (fmt);
1040 if (t != FMT_ZERO && t != FMT_POSINT)
1041 {
1042 fmt->error = nonneg_required;
1043 goto finished;
1044 }
1045
1046 tail->u.integer.m = fmt->value;
1047 }
1048
1049 if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1050 {
1051 fmt->error = "Minimum digits exceeds field width";
1052 goto finished;
1053 }
1054
1055 break;
1056
1057 default:
1058 fmt->error = unexpected_element;
1059 goto finished;
1060 }
1061
1062 /* Between a descriptor and what comes next */
1063 between_desc:
1064 t = format_lex (fmt);
1065 switch (t)
1066 {
1067 case FMT_COMMA:
1068 goto format_item;
1069
1070 case FMT_RPAREN:
1071 goto finished;
1072
1073 case FMT_SLASH:
1074 case FMT_COLON:
1075 get_fnode (fmt, &head, &tail, t);
1076 tail->repeat = 1;
1077 goto optional_comma;
1078
1079 case FMT_END:
1080 fmt->error = unexpected_end;
1081 goto finished;
1082
1083 default:
1084 /* Assume a missing comma, this is a GNU extension */
1085 goto format_item_1;
1086 }
1087
1088 /* Optional comma is a weird between state where we've just finished
1089 reading a colon, slash or P descriptor. */
1090 optional_comma:
1091 t = format_lex (fmt);
1092 switch (t)
1093 {
1094 case FMT_COMMA:
1095 break;
1096
1097 case FMT_RPAREN:
1098 goto finished;
1099
1100 default: /* Assume that we have another format item */
1101 fmt->saved_token = t;
1102 break;
1103 }
1104
1105 goto format_item;
1106
1107 finished:
1108
1109 *save_ok = saveit;
1110
1111 return head;
1112 }
1113
1114
1115 /* format_error()-- Generate an error message for a format statement.
1116 * If the node that gives the location of the error is NULL, the error
1117 * is assumed to happen at parse time, and the current location of the
1118 * parser is shown.
1119 *
1120 * We generate a message showing where the problem is. We take extra
1121 * care to print only the relevant part of the format if it is longer
1122 * than a standard 80 column display. */
1123
1124 void
1125 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1126 {
1127 int width, i, j, offset;
1128 #define BUFLEN 300
1129 char *p, buffer[BUFLEN];
1130 format_data *fmt = dtp->u.p.fmt;
1131
1132 if (f != NULL)
1133 fmt->format_string = f->source;
1134
1135 if (message == unexpected_element)
1136 snprintf (buffer, BUFLEN, message, fmt->error_element);
1137 else
1138 snprintf (buffer, BUFLEN, "%s\n", message);
1139
1140 j = fmt->format_string - dtp->format;
1141
1142 offset = (j > 60) ? j - 40 : 0;
1143
1144 j -= offset;
1145 width = dtp->format_len - offset;
1146
1147 if (width > 80)
1148 width = 80;
1149
1150 /* Show the format */
1151
1152 p = strchr (buffer, '\0');
1153
1154 memcpy (p, dtp->format + offset, width);
1155
1156 p += width;
1157 *p++ = '\n';
1158
1159 /* Show where the problem is */
1160
1161 for (i = 1; i < j; i++)
1162 *p++ = ' ';
1163
1164 *p++ = '^';
1165 *p = '\0';
1166
1167 generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1168 }
1169
1170
1171 /* revert()-- Do reversion of the format. Control reverts to the left
1172 * parenthesis that matches the rightmost right parenthesis. From our
1173 * tree structure, we are looking for the rightmost parenthesis node
1174 * at the second level, the first level always being a single
1175 * parenthesis node. If this node doesn't exit, we use the top
1176 * level. */
1177
1178 static void
1179 revert (st_parameter_dt *dtp)
1180 {
1181 fnode *f, *r;
1182 format_data *fmt = dtp->u.p.fmt;
1183
1184 dtp->u.p.reversion_flag = 1;
1185
1186 r = NULL;
1187
1188 for (f = fmt->array.array[0].u.child; f; f = f->next)
1189 if (f->format == FMT_LPAREN)
1190 r = f;
1191
1192 /* If r is NULL because no node was found, the whole tree will be used */
1193
1194 fmt->array.array[0].current = r;
1195 fmt->array.array[0].count = 0;
1196 }
1197
1198 /* parse_format()-- Parse a format string. */
1199
1200 void
1201 parse_format (st_parameter_dt *dtp)
1202 {
1203 format_data *fmt;
1204 bool format_cache_ok, seen_data_desc = false;
1205
1206 /* Don't cache for internal units and set an arbitrary limit on the size of
1207 format strings we will cache. (Avoids memory issues.) */
1208 format_cache_ok = !is_internal_unit (dtp);
1209
1210 /* Lookup format string to see if it has already been parsed. */
1211 if (format_cache_ok)
1212 {
1213 dtp->u.p.fmt = find_parsed_format (dtp);
1214
1215 if (dtp->u.p.fmt != NULL)
1216 {
1217 dtp->u.p.fmt->reversion_ok = 0;
1218 dtp->u.p.fmt->saved_token = FMT_NONE;
1219 dtp->u.p.fmt->saved_format = NULL;
1220 reset_fnode_counters (dtp);
1221 return;
1222 }
1223 }
1224
1225 /* Not found so proceed as follows. */
1226
1227 dtp->u.p.fmt = fmt = xmalloc (sizeof (format_data));
1228 fmt->format_string = dtp->format;
1229 fmt->format_string_len = dtp->format_len;
1230
1231 fmt->string = NULL;
1232 fmt->saved_token = FMT_NONE;
1233 fmt->error = NULL;
1234 fmt->value = 0;
1235
1236 /* Initialize variables used during traversal of the tree. */
1237
1238 fmt->reversion_ok = 0;
1239 fmt->saved_format = NULL;
1240
1241 /* Allocate the first format node as the root of the tree. */
1242
1243 fmt->last = &fmt->array;
1244 fmt->last->next = NULL;
1245 fmt->avail = &fmt->array.array[0];
1246
1247 memset (fmt->avail, 0, sizeof (*fmt->avail));
1248 fmt->avail->format = FMT_LPAREN;
1249 fmt->avail->repeat = 1;
1250 fmt->avail++;
1251
1252 if (format_lex (fmt) == FMT_LPAREN)
1253 fmt->array.array[0].u.child = parse_format_list (dtp, &format_cache_ok,
1254 &seen_data_desc);
1255 else
1256 fmt->error = "Missing initial left parenthesis in format";
1257
1258 if (fmt->error)
1259 {
1260 format_error (dtp, NULL, fmt->error);
1261 free_format_hash_table (dtp->u.p.current_unit);
1262 return;
1263 }
1264
1265 if (format_cache_ok)
1266 save_parsed_format (dtp);
1267 else
1268 dtp->u.p.format_not_saved = 1;
1269 }
1270
1271
1272 /* next_format0()-- Get the next format node without worrying about
1273 * reversion. Returns NULL when we hit the end of the list.
1274 * Parenthesis nodes are incremented after the list has been
1275 * exhausted, other nodes are incremented before they are returned. */
1276
1277 static const fnode *
1278 next_format0 (fnode * f)
1279 {
1280 const fnode *r;
1281
1282 if (f == NULL)
1283 return NULL;
1284
1285 if (f->format != FMT_LPAREN)
1286 {
1287 f->count++;
1288 if (f->count <= f->repeat)
1289 return f;
1290
1291 f->count = 0;
1292 return NULL;
1293 }
1294
1295 /* Deal with a parenthesis node with unlimited format. */
1296
1297 if (f->repeat == -2) /* -2 signifies unlimited. */
1298 for (;;)
1299 {
1300 if (f->current == NULL)
1301 f->current = f->u.child;
1302
1303 for (; f->current != NULL; f->current = f->current->next)
1304 {
1305 r = next_format0 (f->current);
1306 if (r != NULL)
1307 return r;
1308 }
1309 }
1310
1311 /* Deal with a parenthesis node with specific repeat count. */
1312 for (; f->count < f->repeat; f->count++)
1313 {
1314 if (f->current == NULL)
1315 f->current = f->u.child;
1316
1317 for (; f->current != NULL; f->current = f->current->next)
1318 {
1319 r = next_format0 (f->current);
1320 if (r != NULL)
1321 return r;
1322 }
1323 }
1324
1325 f->count = 0;
1326 return NULL;
1327 }
1328
1329
1330 /* next_format()-- Return the next format node. If the format list
1331 * ends up being exhausted, we do reversion. Reversion is only
1332 * allowed if we've seen a data descriptor since the
1333 * initialization or the last reversion. We return NULL if there
1334 * are no more data descriptors to return (which is an error
1335 * condition). */
1336
1337 const fnode *
1338 next_format (st_parameter_dt *dtp)
1339 {
1340 format_token t;
1341 const fnode *f;
1342 format_data *fmt = dtp->u.p.fmt;
1343
1344 if (fmt->saved_format != NULL)
1345 { /* Deal with a pushed-back format node */
1346 f = fmt->saved_format;
1347 fmt->saved_format = NULL;
1348 goto done;
1349 }
1350
1351 f = next_format0 (&fmt->array.array[0]);
1352 if (f == NULL)
1353 {
1354 if (!fmt->reversion_ok)
1355 return NULL;
1356
1357 fmt->reversion_ok = 0;
1358 revert (dtp);
1359
1360 f = next_format0 (&fmt->array.array[0]);
1361 if (f == NULL)
1362 {
1363 format_error (dtp, NULL, reversion_error);
1364 return NULL;
1365 }
1366
1367 /* Push the first reverted token and return a colon node in case
1368 * there are no more data items. */
1369
1370 fmt->saved_format = f;
1371 return &colon_node;
1372 }
1373
1374 /* If this is a data edit descriptor, then reversion has become OK. */
1375 done:
1376 t = f->format;
1377
1378 if (!fmt->reversion_ok &&
1379 (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1380 t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1381 t == FMT_A || t == FMT_D))
1382 fmt->reversion_ok = 1;
1383 return f;
1384 }
1385
1386
1387 /* unget_format()-- Push the given format back so that it will be
1388 * returned on the next call to next_format() without affecting
1389 * counts. This is necessary when we've encountered a data
1390 * descriptor, but don't know what the data item is yet. The format
1391 * node is pushed back, and we return control to the main program,
1392 * which calls the library back with the data item (or not). */
1393
1394 void
1395 unget_format (st_parameter_dt *dtp, const fnode *f)
1396 {
1397 dtp->u.p.fmt->saved_format = f;
1398 }
1399