Implement real literal extension for Ada
[binutils-gdb.git] / gdb / ada-lex.l
1 /* FLEX lexer for Ada expressions, for GDB. -*- c++ -*-
2 Copyright (C) 1994-2022 Free Software Foundation, Inc.
3
4 This file is part of GDB.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18
19 /*----------------------------------------------------------------------*/
20
21 /* The converted version of this file is to be included in ada-exp.y, */
22 /* the Ada parser for gdb. The function yylex obtains characters from */
23 /* the global pointer lexptr. It returns a syntactic category for */
24 /* each successive token and places a semantic value into yylval */
25 /* (ada-lval), defined by the parser. */
26
27 DIG [0-9]
28 NUM10 ({DIG}({DIG}|_)*)
29 HEXDIG [0-9a-f]
30 NUM16 ({HEXDIG}({HEXDIG}|_)*)
31 OCTDIG [0-7]
32 LETTER [a-z_]
33 ID ({LETTER}({LETTER}|{DIG}|[\x80-\xff])*|"<"{LETTER}({LETTER}|{DIG})*">")
34 WHITE [ \t\n]
35 TICK ("'"{WHITE}*)
36 GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
37 OPER ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
38
39 EXP (e[+-]{NUM10})
40 POSEXP (e"+"?{NUM10})
41
42 %{
43
44 #include "diagnostics.h"
45
46 /* Some old versions of flex generate code that uses the "register" keyword,
47 which clang warns about. This was observed for example with flex 2.5.35,
48 as shipped with macOS 10.12. The same happens with flex 2.5.37 and g++ 11
49 which defaults to ISO C++17, that does not allow register storage class
50 specifiers. */
51 DIAGNOSTIC_PUSH
52 DIAGNOSTIC_IGNORE_DEPRECATED_REGISTER
53
54 #define NUMERAL_WIDTH 256
55 #define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
56
57 /* Temporary staging for numeric literals. */
58 static char numbuf[NUMERAL_WIDTH];
59 static void canonicalizeNumeral (char *s1, const char *);
60 static struct stoken processString (const char*, int);
61 static int processInt (struct parser_state *, const char *, const char *,
62 const char *);
63 static int processReal (struct parser_state *, const char *);
64 static struct stoken processId (const char *, int);
65 static int processAttribute (const char *);
66 static int find_dot_all (const char *);
67 static void rewind_to_char (int);
68
69 #undef YY_DECL
70 #define YY_DECL static int yylex ( void )
71
72 /* Flex generates a static function "input" which is not used.
73 Defining YY_NO_INPUT comments it out. */
74 #define YY_NO_INPUT
75
76 #undef YY_INPUT
77 #define YY_INPUT(BUF, RESULT, MAX_SIZE) \
78 if ( *pstate->lexptr == '\000' ) \
79 (RESULT) = YY_NULL; \
80 else \
81 { \
82 *(BUF) = *pstate->lexptr; \
83 (RESULT) = 1; \
84 pstate->lexptr += 1; \
85 }
86
87 static int find_dot_all (const char *);
88
89 /* Depth of parentheses. */
90 static int paren_depth;
91
92 %}
93
94 %option case-insensitive interactive nodefault noyywrap
95
96 %s BEFORE_QUAL_QUOTE
97
98 %%
99
100 {WHITE} { }
101
102 "--".* { yyterminate(); }
103
104 {NUM10}{POSEXP} {
105 canonicalizeNumeral (numbuf, yytext);
106 char *e_ptr = strrchr (numbuf, 'e');
107 *e_ptr = '\0';
108 return processInt (pstate, nullptr, numbuf, e_ptr + 1);
109 }
110
111 {NUM10} {
112 canonicalizeNumeral (numbuf, yytext);
113 return processInt (pstate, NULL, numbuf, NULL);
114 }
115
116 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
117 canonicalizeNumeral (numbuf, yytext);
118 char *e_ptr = strrchr (numbuf, 'e');
119 *e_ptr = '\0';
120 return processInt (pstate, numbuf,
121 strchr (numbuf, '#') + 1,
122 e_ptr + 1);
123 }
124
125 /* The "llf" is a gdb extension to allow a floating-point
126 constant to be written in some other base. The
127 floating-point number is formed by reinterpreting the
128 bytes, allowing direct control over the bits. */
129 {NUM10}(l{0,2}f)?"#"{HEXDIG}({HEXDIG}|_)*"#" {
130 canonicalizeNumeral (numbuf, yytext);
131 return processInt (pstate, numbuf, strchr (numbuf, '#') + 1,
132 NULL);
133 }
134
135 "0x"{HEXDIG}+ {
136 canonicalizeNumeral (numbuf, yytext+2);
137 return processInt (pstate, "16#", numbuf, NULL);
138 }
139
140
141 {NUM10}"."{NUM10}{EXP} {
142 canonicalizeNumeral (numbuf, yytext);
143 return processReal (pstate, numbuf);
144 }
145
146 {NUM10}"."{NUM10} {
147 canonicalizeNumeral (numbuf, yytext);
148 return processReal (pstate, numbuf);
149 }
150
151 {NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
152 error (_("Based real literals not implemented yet."));
153 }
154
155 {NUM10}"#"{NUM16}"."{NUM16}"#" {
156 error (_("Based real literals not implemented yet."));
157 }
158
159 <INITIAL>"'"({GRAPHIC}|\")"'" {
160 yylval.typed_val.val = yytext[1];
161 yylval.typed_val.type = type_for_char (pstate, yytext[1]);
162 return CHARLIT;
163 }
164
165 <INITIAL>"'[\""{HEXDIG}{2,}"\"]'" {
166 ULONGEST v = strtoulst (yytext+3, nullptr, 16);
167 yylval.typed_val.val = v;
168 yylval.typed_val.type = type_for_char (pstate, v);
169 return CHARLIT;
170 }
171
172 /* Note that we don't handle bracket sequences of more than 2
173 digits here. Currently there's no support for wide or
174 wide-wide strings. */
175 \"({GRAPHIC}|"[\""({HEXDIG}{2,}|\")"\"]")*\" {
176 yylval.sval = processString (yytext+1, yyleng-2);
177 return STRING;
178 }
179
180 \" {
181 error (_("ill-formed or non-terminated string literal"));
182 }
183
184
185 if {
186 rewind_to_char ('i');
187 return 0;
188 }
189
190 task {
191 rewind_to_char ('t');
192 return 0;
193 }
194
195 thread{WHITE}+{DIG} {
196 /* This keyword signals the end of the expression and
197 will be processed separately. */
198 rewind_to_char ('t');
199 return 0;
200 }
201
202 /* ADA KEYWORDS */
203
204 abs { return ABS; }
205 and { return _AND_; }
206 else { return ELSE; }
207 in { return IN; }
208 mod { return MOD; }
209 new { return NEW; }
210 not { return NOT; }
211 null { return NULL_PTR; }
212 or { return OR; }
213 others { return OTHERS; }
214 rem { return REM; }
215 then { return THEN; }
216 xor { return XOR; }
217
218 /* BOOLEAN "KEYWORDS" */
219
220 /* True and False are not keywords in Ada, but rather enumeration constants.
221 However, the boolean type is no longer represented as an enum, so True
222 and False are no longer defined in symbol tables. We compromise by
223 making them keywords (when bare). */
224
225 true { return TRUEKEYWORD; }
226 false { return FALSEKEYWORD; }
227
228 /* ATTRIBUTES */
229
230 {TICK}[a-z][a-z_]+ { BEGIN INITIAL; return processAttribute (yytext+1); }
231
232 /* PUNCTUATION */
233
234 "=>" { return ARROW; }
235 ".." { return DOTDOT; }
236 "**" { return STARSTAR; }
237 ":=" { return ASSIGN; }
238 "/=" { return NOTEQUAL; }
239 "<=" { return LEQ; }
240 ">=" { return GEQ; }
241
242 <BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
243
244 [-&*+./:<>=|;\[\]] { return yytext[0]; }
245
246 "," { if (paren_depth == 0 && pstate->comma_terminates)
247 {
248 rewind_to_char (',');
249 return 0;
250 }
251 else
252 return ',';
253 }
254
255 "(" { paren_depth += 1; return '('; }
256 ")" { if (paren_depth == 0)
257 {
258 rewind_to_char (')');
259 return 0;
260 }
261 else
262 {
263 paren_depth -= 1;
264 return ')';
265 }
266 }
267
268 "."{WHITE}*all { return DOT_ALL; }
269
270 "."{WHITE}*{ID} {
271 yylval.sval = processId (yytext+1, yyleng-1);
272 return DOT_ID;
273 }
274
275 {ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")? {
276 int all_posn = find_dot_all (yytext);
277
278 if (all_posn == -1 && yytext[yyleng-1] == '\'')
279 {
280 BEGIN BEFORE_QUAL_QUOTE;
281 yyless (yyleng-1);
282 }
283 else if (all_posn >= 0)
284 yyless (all_posn);
285 yylval.sval = processId (yytext, yyleng);
286 return NAME;
287 }
288
289
290 /* GDB EXPRESSION CONSTRUCTS */
291
292 "'"[^']+"'"{WHITE}*:: {
293 yyless (yyleng - 2);
294 yylval.sval = processId (yytext, yyleng);
295 return NAME;
296 }
297
298 "::" { return COLONCOLON; }
299
300 [{}@] { return yytext[0]; }
301
302 /* REGISTERS AND GDB CONVENIENCE VARIABLES */
303
304 "$"({LETTER}|{DIG}|"$")* {
305 yylval.sval.ptr = yytext;
306 yylval.sval.length = yyleng;
307 return DOLLAR_VARIABLE;
308 }
309
310 /* CATCH-ALL ERROR CASE */
311
312 . { error (_("Invalid character '%s' in expression."), yytext); }
313 %%
314
315 #include <ctype.h>
316 /* Initialize the lexer for processing new expression. */
317
318 static void
319 lexer_init (FILE *inp)
320 {
321 BEGIN INITIAL;
322 paren_depth = 0;
323 yyrestart (inp);
324 }
325
326
327 /* Copy S2 to S1, removing all underscores, and downcasing all letters. */
328
329 static void
330 canonicalizeNumeral (char *s1, const char *s2)
331 {
332 for (; *s2 != '\000'; s2 += 1)
333 {
334 if (*s2 != '_')
335 {
336 *s1 = tolower(*s2);
337 s1 += 1;
338 }
339 }
340 s1[0] = '\000';
341 }
342
343 /* Interprets the prefix of NUM that consists of digits of the given BASE
344 as an integer of that BASE, with the string EXP as an exponent.
345 Puts value in yylval, and returns INT, if the string is valid. Causes
346 an error if the number is improperly formated. BASE, if NULL, defaults
347 to "10", and EXP to "1". The EXP does not contain a leading 'e' or 'E'.
348 */
349
350 static int
351 processInt (struct parser_state *par_state, const char *base0,
352 const char *num0, const char *exp0)
353 {
354 long exp;
355 int base;
356 /* For the based literal with an "f" prefix, we'll return a
357 floating-point number. This counts the the number of "l"s seen,
358 to decide the width of the floating-point number to return. -1
359 means no "f". */
360 int floating_point_l_count = -1;
361
362 if (base0 == NULL)
363 base = 10;
364 else
365 {
366 char *end_of_base;
367 base = strtol (base0, &end_of_base, 10);
368 if (base < 2 || base > 16)
369 error (_("Invalid base: %d."), base);
370 while (*end_of_base == 'l')
371 {
372 ++floating_point_l_count;
373 ++end_of_base;
374 }
375 /* This assertion is ensured by the pattern. */
376 gdb_assert (floating_point_l_count == -1 || *end_of_base == 'f');
377 if (*end_of_base == 'f')
378 {
379 ++end_of_base;
380 ++floating_point_l_count;
381 }
382 /* This assertion is ensured by the pattern. */
383 gdb_assert (*end_of_base == '#');
384 }
385
386 if (exp0 == NULL)
387 exp = 0;
388 else
389 exp = strtol(exp0, (char **) NULL, 10);
390
391 gdb_mpz result;
392 while (isxdigit (*num0))
393 {
394 int dig = fromhex (*num0);
395 if (dig >= base)
396 error (_("Invalid digit `%c' in based literal"), *num0);
397 mpz_mul_ui (result.val, result.val, base);
398 mpz_add_ui (result.val, result.val, dig);
399 ++num0;
400 }
401
402 while (exp > 0)
403 {
404 mpz_mul_ui (result.val, result.val, base);
405 exp -= 1;
406 }
407
408 if (floating_point_l_count > -1)
409 {
410 struct type *fp_type;
411 if (floating_point_l_count == 0)
412 fp_type = language_lookup_primitive_type (par_state->language (),
413 par_state->gdbarch (),
414 "float");
415 else if (floating_point_l_count == 1)
416 fp_type = language_lookup_primitive_type (par_state->language (),
417 par_state->gdbarch (),
418 "long_float");
419 else
420 {
421 /* This assertion is ensured by the pattern. */
422 gdb_assert (floating_point_l_count == 2);
423 fp_type = language_lookup_primitive_type (par_state->language (),
424 par_state->gdbarch (),
425 "long_long_float");
426 }
427
428 yylval.typed_val_float.type = fp_type;
429 result.write (gdb::make_array_view (yylval.typed_val_float.val,
430 TYPE_LENGTH (fp_type)),
431 type_byte_order (fp_type),
432 true);
433
434 return FLOAT;
435 }
436
437 gdb_mpz maxval (ULONGEST_MAX / base);
438 if (mpz_cmp (result.val, maxval.val) > 0)
439 error (_("Integer literal out of range"));
440
441 LONGEST value = result.as_integer<LONGEST> ();
442 if ((value >> (gdbarch_int_bit (par_state->gdbarch ())-1)) == 0)
443 yylval.typed_val.type = type_int (par_state);
444 else if ((value >> (gdbarch_long_bit (par_state->gdbarch ())-1)) == 0)
445 yylval.typed_val.type = type_long (par_state);
446 else if (((value >> (gdbarch_long_bit (par_state->gdbarch ())-1)) >> 1) == 0)
447 {
448 /* We have a number representable as an unsigned integer quantity.
449 For consistency with the C treatment, we will treat it as an
450 anonymous modular (unsigned) quantity. Alas, the types are such
451 that we need to store .val as a signed quantity. Sorry
452 for the mess, but C doesn't officially guarantee that a simple
453 assignment does the trick (no, it doesn't; read the reference manual).
454 */
455 yylval.typed_val.type
456 = builtin_type (par_state->gdbarch ())->builtin_unsigned_long;
457 if (value & LONGEST_SIGN)
458 yylval.typed_val.val =
459 (LONGEST) (value & ~LONGEST_SIGN)
460 - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
461 else
462 yylval.typed_val.val = (LONGEST) value;
463 return INT;
464 }
465 else
466 yylval.typed_val.type = type_long_long (par_state);
467
468 yylval.typed_val.val = value;
469 return INT;
470 }
471
472 static int
473 processReal (struct parser_state *par_state, const char *num0)
474 {
475 yylval.typed_val_float.type = type_long_double (par_state);
476
477 bool parsed = parse_float (num0, strlen (num0),
478 yylval.typed_val_float.type,
479 yylval.typed_val_float.val);
480 gdb_assert (parsed);
481 return FLOAT;
482 }
483
484
485 /* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym. The
486 resulting string is valid until the next call to ada_parse. If
487 NAME0 contains the substring "___", it is assumed to be already
488 encoded and the resulting name is equal to it. Similarly, if the name
489 starts with '<', it is copied verbatim. Otherwise, it differs
490 from NAME0 in that:
491 + Characters between '...' are transfered verbatim to yylval.ssym.
492 + Trailing "'" characters in quoted sequences are removed (a leading quote is
493 preserved to indicate that the name is not to be GNAT-encoded).
494 + Unquoted whitespace is removed.
495 + Unquoted alphabetic characters are mapped to lower case.
496 Result is returned as a struct stoken, but for convenience, the string
497 is also null-terminated. Result string valid until the next call of
498 ada_parse.
499 */
500 static struct stoken
501 processId (const char *name0, int len)
502 {
503 char *name = (char *) obstack_alloc (&temp_parse_space, len + 11);
504 int i0, i;
505 struct stoken result;
506
507 result.ptr = name;
508 while (len > 0 && isspace (name0[len-1]))
509 len -= 1;
510
511 if (name0[0] == '<' || strstr (name0, "___") != NULL)
512 {
513 strncpy (name, name0, len);
514 name[len] = '\000';
515 result.length = len;
516 return result;
517 }
518
519 i = i0 = 0;
520 while (i0 < len)
521 {
522 if (isalnum (name0[i0]))
523 {
524 name[i] = tolower (name0[i0]);
525 i += 1; i0 += 1;
526 }
527 else switch (name0[i0])
528 {
529 default:
530 name[i] = name0[i0];
531 i += 1; i0 += 1;
532 break;
533 case ' ': case '\t':
534 i0 += 1;
535 break;
536 case '\'':
537 do
538 {
539 name[i] = name0[i0];
540 i += 1; i0 += 1;
541 }
542 while (i0 < len && name0[i0] != '\'');
543 i0 += 1;
544 break;
545 }
546 }
547 name[i] = '\000';
548
549 result.length = i;
550 return result;
551 }
552
553 /* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
554 with special hex character notations replaced with characters.
555 Result valid until the next call to ada_parse. */
556
557 static struct stoken
558 processString (const char *text, int len)
559 {
560 const char *p;
561 char *q;
562 const char *lim = text + len;
563 struct stoken result;
564
565 q = (char *) obstack_alloc (&temp_parse_space, len);
566 result.ptr = q;
567 p = text;
568 while (p < lim)
569 {
570 if (p[0] == '[' && p[1] == '"' && p+2 < lim)
571 {
572 if (p[2] == '"') /* "...["""]... */
573 {
574 *q = '"';
575 p += 4;
576 }
577 else
578 {
579 const char *end;
580 ULONGEST chr = strtoulst (p + 2, &end, 16);
581 if (chr > 0xff)
582 error (_("wide strings are not yet supported"));
583 *q = (char) chr;
584 p = end + 1;
585 }
586 }
587 else
588 *q = *p;
589 q += 1;
590 p += 1;
591 }
592 result.length = q - result.ptr;
593 return result;
594 }
595
596 /* Returns the position within STR of the '.' in a
597 '.{WHITE}*all' component of a dotted name, or -1 if there is none.
598 Note: we actually don't need this routine, since 'all' can never be an
599 Ada identifier. Thus, looking up foo.all or foo.all.x as a name
600 must fail, and will eventually be interpreted as (foo).all or
601 (foo).all.x. However, this does avoid an extraneous lookup. */
602
603 static int
604 find_dot_all (const char *str)
605 {
606 int i;
607
608 for (i = 0; str[i] != '\000'; i++)
609 if (str[i] == '.')
610 {
611 int i0 = i;
612
613 do
614 i += 1;
615 while (isspace (str[i]));
616
617 if (strncasecmp (str + i, "all", 3) == 0
618 && !isalnum (str[i + 3]) && str[i + 3] != '_')
619 return i0;
620 }
621 return -1;
622 }
623
624 /* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
625 case. */
626
627 static int
628 subseqMatch (const char *subseq, const char *str)
629 {
630 if (subseq[0] == '\0')
631 return 1;
632 else if (str[0] == '\0')
633 return 0;
634 else if (tolower (subseq[0]) == tolower (str[0]))
635 return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
636 else
637 return subseqMatch (subseq, str+1);
638 }
639
640
641 static struct { const char *name; int code; }
642 attributes[] = {
643 { "address", TICK_ADDRESS },
644 { "unchecked_access", TICK_ACCESS },
645 { "unrestricted_access", TICK_ACCESS },
646 { "access", TICK_ACCESS },
647 { "first", TICK_FIRST },
648 { "last", TICK_LAST },
649 { "length", TICK_LENGTH },
650 { "max", TICK_MAX },
651 { "min", TICK_MIN },
652 { "modulus", TICK_MODULUS },
653 { "pos", TICK_POS },
654 { "range", TICK_RANGE },
655 { "size", TICK_SIZE },
656 { "tag", TICK_TAG },
657 { "val", TICK_VAL },
658 { NULL, -1 }
659 };
660
661 /* Return the syntactic code corresponding to the attribute name or
662 abbreviation STR. */
663
664 static int
665 processAttribute (const char *str)
666 {
667 int i, k;
668
669 for (i = 0; attributes[i].code != -1; i += 1)
670 if (strcasecmp (str, attributes[i].name) == 0)
671 return attributes[i].code;
672
673 for (i = 0, k = -1; attributes[i].code != -1; i += 1)
674 if (subseqMatch (str, attributes[i].name))
675 {
676 if (k == -1)
677 k = i;
678 else
679 error (_("ambiguous attribute name: `%s'"), str);
680 }
681 if (k == -1)
682 error (_("unrecognized attribute: `%s'"), str);
683
684 return attributes[k].code;
685 }
686
687 /* Back up lexptr by yyleng and then to the rightmost occurrence of
688 character CH, case-folded (there must be one). WARNING: since
689 lexptr points to the next input character that Flex has not yet
690 transferred to its internal buffer, the use of this function
691 depends on the assumption that Flex calls YY_INPUT only when it is
692 logically necessary to do so (thus, there is no reading ahead
693 farther than needed to identify the next token.) */
694
695 static void
696 rewind_to_char (int ch)
697 {
698 pstate->lexptr -= yyleng;
699 while (toupper (*pstate->lexptr) != toupper (ch))
700 pstate->lexptr -= 1;
701 yyrestart (NULL);
702 }
703
704 /* Dummy definition to suppress warnings about unused static definitions. */
705 typedef void (*dummy_function) ();
706 dummy_function ada_flex_use[] =
707 {
708 (dummy_function) yyunput
709 };
710
711 DIAGNOSTIC_POP