Fix Ada integer literals with exponents
[binutils-gdb.git] / gdb / ada-lex.l
1 /* FLEX lexer for Ada expressions, for GDB. -*- c++ -*-
2 Copyright (C) 1994-2022 Free Software Foundation, Inc.
3
4 This file is part of GDB.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18
19 /*----------------------------------------------------------------------*/
20
21 /* The converted version of this file is to be included in ada-exp.y, */
22 /* the Ada parser for gdb. The function yylex obtains characters from */
23 /* the global pointer lexptr. It returns a syntactic category for */
24 /* each successive token and places a semantic value into yylval */
25 /* (ada-lval), defined by the parser. */
26
27 DIG [0-9]
28 NUM10 ({DIG}({DIG}|_)*)
29 HEXDIG [0-9a-f]
30 NUM16 ({HEXDIG}({HEXDIG}|_)*)
31 OCTDIG [0-7]
32 LETTER [a-z_]
33 ID ({LETTER}({LETTER}|{DIG}|[\x80-\xff])*|"<"{LETTER}({LETTER}|{DIG})*">")
34 WHITE [ \t\n]
35 TICK ("'"{WHITE}*)
36 GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
37 OPER ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
38
39 EXP (e[+-]{NUM10})
40 POSEXP (e"+"?{NUM10})
41
42 %{
43
44 #include "diagnostics.h"
45
46 /* Some old versions of flex generate code that uses the "register" keyword,
47 which clang warns about. This was observed for example with flex 2.5.35,
48 as shipped with macOS 10.12. The same happens with flex 2.5.37 and g++ 11
49 which defaults to ISO C++17, that does not allow register storage class
50 specifiers. */
51 DIAGNOSTIC_PUSH
52 DIAGNOSTIC_IGNORE_DEPRECATED_REGISTER
53
54 #define NUMERAL_WIDTH 256
55 #define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
56
57 /* Temporary staging for numeric literals. */
58 static char numbuf[NUMERAL_WIDTH];
59 static void canonicalizeNumeral (char *s1, const char *);
60 static struct stoken processString (const char*, int);
61 static int processInt (struct parser_state *, const char *, const char *,
62 const char *);
63 static int processReal (struct parser_state *, const char *);
64 static struct stoken processId (const char *, int);
65 static int processAttribute (const char *);
66 static int find_dot_all (const char *);
67 static void rewind_to_char (int);
68
69 #undef YY_DECL
70 #define YY_DECL static int yylex ( void )
71
72 /* Flex generates a static function "input" which is not used.
73 Defining YY_NO_INPUT comments it out. */
74 #define YY_NO_INPUT
75
76 #undef YY_INPUT
77 #define YY_INPUT(BUF, RESULT, MAX_SIZE) \
78 if ( *pstate->lexptr == '\000' ) \
79 (RESULT) = YY_NULL; \
80 else \
81 { \
82 *(BUF) = *pstate->lexptr; \
83 (RESULT) = 1; \
84 pstate->lexptr += 1; \
85 }
86
87 static int find_dot_all (const char *);
88
89 /* Depth of parentheses. */
90 static int paren_depth;
91
92 %}
93
94 %option case-insensitive interactive nodefault noyywrap
95
96 %s BEFORE_QUAL_QUOTE
97
98 %%
99
100 {WHITE} { }
101
102 "--".* { yyterminate(); }
103
104 {NUM10}{POSEXP} {
105 canonicalizeNumeral (numbuf, yytext);
106 char *e_ptr = strrchr (numbuf, 'e');
107 *e_ptr = '\0';
108 return processInt (pstate, nullptr, numbuf, e_ptr + 1);
109 }
110
111 {NUM10} {
112 canonicalizeNumeral (numbuf, yytext);
113 return processInt (pstate, NULL, numbuf, NULL);
114 }
115
116 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
117 canonicalizeNumeral (numbuf, yytext);
118 char *e_ptr = strrchr (numbuf, 'e');
119 *e_ptr = '\0';
120 return processInt (pstate, numbuf,
121 strchr (numbuf, '#') + 1,
122 e_ptr + 1);
123 }
124
125 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#" {
126 canonicalizeNumeral (numbuf, yytext);
127 return processInt (pstate, numbuf, strchr (numbuf, '#') + 1,
128 NULL);
129 }
130
131 "0x"{HEXDIG}+ {
132 canonicalizeNumeral (numbuf, yytext+2);
133 return processInt (pstate, "16#", numbuf, NULL);
134 }
135
136
137 {NUM10}"."{NUM10}{EXP} {
138 canonicalizeNumeral (numbuf, yytext);
139 return processReal (pstate, numbuf);
140 }
141
142 {NUM10}"."{NUM10} {
143 canonicalizeNumeral (numbuf, yytext);
144 return processReal (pstate, numbuf);
145 }
146
147 {NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
148 error (_("Based real literals not implemented yet."));
149 }
150
151 {NUM10}"#"{NUM16}"."{NUM16}"#" {
152 error (_("Based real literals not implemented yet."));
153 }
154
155 <INITIAL>"'"({GRAPHIC}|\")"'" {
156 yylval.typed_val.val = yytext[1];
157 yylval.typed_val.type = type_for_char (pstate, yytext[1]);
158 return CHARLIT;
159 }
160
161 <INITIAL>"'[\""{HEXDIG}{2,}"\"]'" {
162 ULONGEST v = strtoulst (yytext+3, nullptr, 16);
163 yylval.typed_val.val = v;
164 yylval.typed_val.type = type_for_char (pstate, v);
165 return CHARLIT;
166 }
167
168 /* Note that we don't handle bracket sequences of more than 2
169 digits here. Currently there's no support for wide or
170 wide-wide strings. */
171 \"({GRAPHIC}|"[\""({HEXDIG}{2,}|\")"\"]")*\" {
172 yylval.sval = processString (yytext+1, yyleng-2);
173 return STRING;
174 }
175
176 \" {
177 error (_("ill-formed or non-terminated string literal"));
178 }
179
180
181 if {
182 rewind_to_char ('i');
183 return 0;
184 }
185
186 task {
187 rewind_to_char ('t');
188 return 0;
189 }
190
191 thread{WHITE}+{DIG} {
192 /* This keyword signals the end of the expression and
193 will be processed separately. */
194 rewind_to_char ('t');
195 return 0;
196 }
197
198 /* ADA KEYWORDS */
199
200 abs { return ABS; }
201 and { return _AND_; }
202 else { return ELSE; }
203 in { return IN; }
204 mod { return MOD; }
205 new { return NEW; }
206 not { return NOT; }
207 null { return NULL_PTR; }
208 or { return OR; }
209 others { return OTHERS; }
210 rem { return REM; }
211 then { return THEN; }
212 xor { return XOR; }
213
214 /* BOOLEAN "KEYWORDS" */
215
216 /* True and False are not keywords in Ada, but rather enumeration constants.
217 However, the boolean type is no longer represented as an enum, so True
218 and False are no longer defined in symbol tables. We compromise by
219 making them keywords (when bare). */
220
221 true { return TRUEKEYWORD; }
222 false { return FALSEKEYWORD; }
223
224 /* ATTRIBUTES */
225
226 {TICK}[a-z][a-z_]+ { BEGIN INITIAL; return processAttribute (yytext+1); }
227
228 /* PUNCTUATION */
229
230 "=>" { return ARROW; }
231 ".." { return DOTDOT; }
232 "**" { return STARSTAR; }
233 ":=" { return ASSIGN; }
234 "/=" { return NOTEQUAL; }
235 "<=" { return LEQ; }
236 ">=" { return GEQ; }
237
238 <BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
239
240 [-&*+./:<>=|;\[\]] { return yytext[0]; }
241
242 "," { if (paren_depth == 0 && pstate->comma_terminates)
243 {
244 rewind_to_char (',');
245 return 0;
246 }
247 else
248 return ',';
249 }
250
251 "(" { paren_depth += 1; return '('; }
252 ")" { if (paren_depth == 0)
253 {
254 rewind_to_char (')');
255 return 0;
256 }
257 else
258 {
259 paren_depth -= 1;
260 return ')';
261 }
262 }
263
264 "."{WHITE}*all { return DOT_ALL; }
265
266 "."{WHITE}*{ID} {
267 yylval.sval = processId (yytext+1, yyleng-1);
268 return DOT_ID;
269 }
270
271 {ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")? {
272 int all_posn = find_dot_all (yytext);
273
274 if (all_posn == -1 && yytext[yyleng-1] == '\'')
275 {
276 BEGIN BEFORE_QUAL_QUOTE;
277 yyless (yyleng-1);
278 }
279 else if (all_posn >= 0)
280 yyless (all_posn);
281 yylval.sval = processId (yytext, yyleng);
282 return NAME;
283 }
284
285
286 /* GDB EXPRESSION CONSTRUCTS */
287
288 "'"[^']+"'"{WHITE}*:: {
289 yyless (yyleng - 2);
290 yylval.sval = processId (yytext, yyleng);
291 return NAME;
292 }
293
294 "::" { return COLONCOLON; }
295
296 [{}@] { return yytext[0]; }
297
298 /* REGISTERS AND GDB CONVENIENCE VARIABLES */
299
300 "$"({LETTER}|{DIG}|"$")* {
301 yylval.sval.ptr = yytext;
302 yylval.sval.length = yyleng;
303 return DOLLAR_VARIABLE;
304 }
305
306 /* CATCH-ALL ERROR CASE */
307
308 . { error (_("Invalid character '%s' in expression."), yytext); }
309 %%
310
311 #include <ctype.h>
312 /* Initialize the lexer for processing new expression. */
313
314 static void
315 lexer_init (FILE *inp)
316 {
317 BEGIN INITIAL;
318 paren_depth = 0;
319 yyrestart (inp);
320 }
321
322
323 /* Copy S2 to S1, removing all underscores, and downcasing all letters. */
324
325 static void
326 canonicalizeNumeral (char *s1, const char *s2)
327 {
328 for (; *s2 != '\000'; s2 += 1)
329 {
330 if (*s2 != '_')
331 {
332 *s1 = tolower(*s2);
333 s1 += 1;
334 }
335 }
336 s1[0] = '\000';
337 }
338
339 /* Interprets the prefix of NUM that consists of digits of the given BASE
340 as an integer of that BASE, with the string EXP as an exponent.
341 Puts value in yylval, and returns INT, if the string is valid. Causes
342 an error if the number is improperly formated. BASE, if NULL, defaults
343 to "10", and EXP to "1". The EXP does not contain a leading 'e' or 'E'.
344 */
345
346 static int
347 processInt (struct parser_state *par_state, const char *base0,
348 const char *num0, const char *exp0)
349 {
350 ULONGEST result;
351 long exp;
352 int base;
353 const char *trailer;
354
355 if (base0 == NULL)
356 base = 10;
357 else
358 {
359 base = strtol (base0, (char **) NULL, 10);
360 if (base < 2 || base > 16)
361 error (_("Invalid base: %d."), base);
362 }
363
364 if (exp0 == NULL)
365 exp = 0;
366 else
367 exp = strtol(exp0, (char **) NULL, 10);
368
369 errno = 0;
370 result = strtoulst (num0, &trailer, base);
371 if (errno == ERANGE)
372 error (_("Integer literal out of range"));
373 if (isxdigit(*trailer))
374 error (_("Invalid digit `%c' in based literal"), *trailer);
375
376 while (exp > 0)
377 {
378 if (result > (ULONG_MAX / base))
379 error (_("Integer literal out of range"));
380 result *= base;
381 exp -= 1;
382 }
383
384 if ((result >> (gdbarch_int_bit (par_state->gdbarch ())-1)) == 0)
385 yylval.typed_val.type = type_int (par_state);
386 else if ((result >> (gdbarch_long_bit (par_state->gdbarch ())-1)) == 0)
387 yylval.typed_val.type = type_long (par_state);
388 else if (((result >> (gdbarch_long_bit (par_state->gdbarch ())-1)) >> 1) == 0)
389 {
390 /* We have a number representable as an unsigned integer quantity.
391 For consistency with the C treatment, we will treat it as an
392 anonymous modular (unsigned) quantity. Alas, the types are such
393 that we need to store .val as a signed quantity. Sorry
394 for the mess, but C doesn't officially guarantee that a simple
395 assignment does the trick (no, it doesn't; read the reference manual).
396 */
397 yylval.typed_val.type
398 = builtin_type (par_state->gdbarch ())->builtin_unsigned_long;
399 if (result & LONGEST_SIGN)
400 yylval.typed_val.val =
401 (LONGEST) (result & ~LONGEST_SIGN)
402 - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
403 else
404 yylval.typed_val.val = (LONGEST) result;
405 return INT;
406 }
407 else
408 yylval.typed_val.type = type_long_long (par_state);
409
410 yylval.typed_val.val = (LONGEST) result;
411 return INT;
412 }
413
414 static int
415 processReal (struct parser_state *par_state, const char *num0)
416 {
417 yylval.typed_val_float.type = type_long_double (par_state);
418
419 bool parsed = parse_float (num0, strlen (num0),
420 yylval.typed_val_float.type,
421 yylval.typed_val_float.val);
422 gdb_assert (parsed);
423 return FLOAT;
424 }
425
426
427 /* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym. The
428 resulting string is valid until the next call to ada_parse. If
429 NAME0 contains the substring "___", it is assumed to be already
430 encoded and the resulting name is equal to it. Similarly, if the name
431 starts with '<', it is copied verbatim. Otherwise, it differs
432 from NAME0 in that:
433 + Characters between '...' are transfered verbatim to yylval.ssym.
434 + Trailing "'" characters in quoted sequences are removed (a leading quote is
435 preserved to indicate that the name is not to be GNAT-encoded).
436 + Unquoted whitespace is removed.
437 + Unquoted alphabetic characters are mapped to lower case.
438 Result is returned as a struct stoken, but for convenience, the string
439 is also null-terminated. Result string valid until the next call of
440 ada_parse.
441 */
442 static struct stoken
443 processId (const char *name0, int len)
444 {
445 char *name = (char *) obstack_alloc (&temp_parse_space, len + 11);
446 int i0, i;
447 struct stoken result;
448
449 result.ptr = name;
450 while (len > 0 && isspace (name0[len-1]))
451 len -= 1;
452
453 if (name0[0] == '<' || strstr (name0, "___") != NULL)
454 {
455 strncpy (name, name0, len);
456 name[len] = '\000';
457 result.length = len;
458 return result;
459 }
460
461 i = i0 = 0;
462 while (i0 < len)
463 {
464 if (isalnum (name0[i0]))
465 {
466 name[i] = tolower (name0[i0]);
467 i += 1; i0 += 1;
468 }
469 else switch (name0[i0])
470 {
471 default:
472 name[i] = name0[i0];
473 i += 1; i0 += 1;
474 break;
475 case ' ': case '\t':
476 i0 += 1;
477 break;
478 case '\'':
479 do
480 {
481 name[i] = name0[i0];
482 i += 1; i0 += 1;
483 }
484 while (i0 < len && name0[i0] != '\'');
485 i0 += 1;
486 break;
487 }
488 }
489 name[i] = '\000';
490
491 result.length = i;
492 return result;
493 }
494
495 /* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
496 with special hex character notations replaced with characters.
497 Result valid until the next call to ada_parse. */
498
499 static struct stoken
500 processString (const char *text, int len)
501 {
502 const char *p;
503 char *q;
504 const char *lim = text + len;
505 struct stoken result;
506
507 q = (char *) obstack_alloc (&temp_parse_space, len);
508 result.ptr = q;
509 p = text;
510 while (p < lim)
511 {
512 if (p[0] == '[' && p[1] == '"' && p+2 < lim)
513 {
514 if (p[2] == '"') /* "...["""]... */
515 {
516 *q = '"';
517 p += 4;
518 }
519 else
520 {
521 const char *end;
522 ULONGEST chr = strtoulst (p + 2, &end, 16);
523 if (chr > 0xff)
524 error (_("wide strings are not yet supported"));
525 *q = (char) chr;
526 p = end + 1;
527 }
528 }
529 else
530 *q = *p;
531 q += 1;
532 p += 1;
533 }
534 result.length = q - result.ptr;
535 return result;
536 }
537
538 /* Returns the position within STR of the '.' in a
539 '.{WHITE}*all' component of a dotted name, or -1 if there is none.
540 Note: we actually don't need this routine, since 'all' can never be an
541 Ada identifier. Thus, looking up foo.all or foo.all.x as a name
542 must fail, and will eventually be interpreted as (foo).all or
543 (foo).all.x. However, this does avoid an extraneous lookup. */
544
545 static int
546 find_dot_all (const char *str)
547 {
548 int i;
549
550 for (i = 0; str[i] != '\000'; i++)
551 if (str[i] == '.')
552 {
553 int i0 = i;
554
555 do
556 i += 1;
557 while (isspace (str[i]));
558
559 if (strncasecmp (str + i, "all", 3) == 0
560 && !isalnum (str[i + 3]) && str[i + 3] != '_')
561 return i0;
562 }
563 return -1;
564 }
565
566 /* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
567 case. */
568
569 static int
570 subseqMatch (const char *subseq, const char *str)
571 {
572 if (subseq[0] == '\0')
573 return 1;
574 else if (str[0] == '\0')
575 return 0;
576 else if (tolower (subseq[0]) == tolower (str[0]))
577 return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
578 else
579 return subseqMatch (subseq, str+1);
580 }
581
582
583 static struct { const char *name; int code; }
584 attributes[] = {
585 { "address", TICK_ADDRESS },
586 { "unchecked_access", TICK_ACCESS },
587 { "unrestricted_access", TICK_ACCESS },
588 { "access", TICK_ACCESS },
589 { "first", TICK_FIRST },
590 { "last", TICK_LAST },
591 { "length", TICK_LENGTH },
592 { "max", TICK_MAX },
593 { "min", TICK_MIN },
594 { "modulus", TICK_MODULUS },
595 { "pos", TICK_POS },
596 { "range", TICK_RANGE },
597 { "size", TICK_SIZE },
598 { "tag", TICK_TAG },
599 { "val", TICK_VAL },
600 { NULL, -1 }
601 };
602
603 /* Return the syntactic code corresponding to the attribute name or
604 abbreviation STR. */
605
606 static int
607 processAttribute (const char *str)
608 {
609 int i, k;
610
611 for (i = 0; attributes[i].code != -1; i += 1)
612 if (strcasecmp (str, attributes[i].name) == 0)
613 return attributes[i].code;
614
615 for (i = 0, k = -1; attributes[i].code != -1; i += 1)
616 if (subseqMatch (str, attributes[i].name))
617 {
618 if (k == -1)
619 k = i;
620 else
621 error (_("ambiguous attribute name: `%s'"), str);
622 }
623 if (k == -1)
624 error (_("unrecognized attribute: `%s'"), str);
625
626 return attributes[k].code;
627 }
628
629 /* Back up lexptr by yyleng and then to the rightmost occurrence of
630 character CH, case-folded (there must be one). WARNING: since
631 lexptr points to the next input character that Flex has not yet
632 transferred to its internal buffer, the use of this function
633 depends on the assumption that Flex calls YY_INPUT only when it is
634 logically necessary to do so (thus, there is no reading ahead
635 farther than needed to identify the next token.) */
636
637 static void
638 rewind_to_char (int ch)
639 {
640 pstate->lexptr -= yyleng;
641 while (toupper (*pstate->lexptr) != toupper (ch))
642 pstate->lexptr -= 1;
643 yyrestart (NULL);
644 }
645
646 /* Dummy definition to suppress warnings about unused static definitions. */
647 typedef void (*dummy_function) ();
648 dummy_function ada_flex_use[] =
649 {
650 (dummy_function) yyunput
651 };
652
653 DIAGNOSTIC_POP