gdb/ada-lex.l

   1 /* FLEX lexer for Ada expressions, for GDB. -*- c++ -*-
   2    Copyright (C) 1994-2022 Free Software Foundation, Inc.
   3
   4    This file is part of GDB.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 /*----------------------------------------------------------------------*/
  20
  21 /* The converted version of this file is to be included in ada-exp.y, */
  22 /* the Ada parser for gdb.  The function yylex obtains characters from */
  23 /* the global pointer lexptr.  It returns a syntactic category for */
  24 /* each successive token and places a semantic value into yylval */
  25 /* (ada-lval), defined by the parser.   */
  26
  27 DIG     [0-9]
  28 NUM10   ({DIG}({DIG}|_)*)
  29 HEXDIG  [0-9a-f]
  30 NUM16   ({HEXDIG}({HEXDIG}|_)*)
  31 OCTDIG  [0-7]
  32 LETTER  [a-z_]
  33 ID      ({LETTER}({LETTER}|{DIG}|[\x80-\xff])*|"<"{LETTER}({LETTER}|{DIG})*">")
  34 WHITE   [ \t\n]
  35 TICK    ("'"{WHITE}*)
  36 GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
  37 OPER    ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
  38
  39 EXP     (e[+-]{NUM10})
  40 POSEXP  (e"+"?{NUM10})
  41
  42 %{
  43
  44 #include "diagnostics.h"
  45
  46 /* Some old versions of flex generate code that uses the "register" keyword,
  47    which clang warns about.  This was observed for example with flex 2.5.35,
  48    as shipped with macOS 10.12.  The same happens with flex 2.5.37 and g++ 11
  49    which defaults to ISO C++17, that does not allow register storage class
  50    specifiers.  */
  51 DIAGNOSTIC_PUSH
  52 DIAGNOSTIC_IGNORE_DEPRECATED_REGISTER
  53
  54 #define NUMERAL_WIDTH 256
  55 #define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
  56
  57 /* Temporary staging for numeric literals.  */
  58 static char numbuf[NUMERAL_WIDTH];
  59  static void canonicalizeNumeral (char *s1, const char *);
  60 static struct stoken processString (const char*, int);
  61 static int processInt (struct parser_state *, const char *, const char *,
  62                        const char *);
  63 static int processReal (struct parser_state *, const char *);
  64 static struct stoken processId (const char *, int);
  65 static int processAttribute (const char *);
  66 static int find_dot_all (const char *);
  67 static void rewind_to_char (int);
  68
  69 #undef YY_DECL
  70 #define YY_DECL static int yylex ( void )
  71
  72 /* Flex generates a static function "input" which is not used.
  73    Defining YY_NO_INPUT comments it out.  */
  74 #define YY_NO_INPUT
  75
  76 #undef YY_INPUT
  77 #define YY_INPUT(BUF, RESULT, MAX_SIZE) \
  78     if ( *pstate->lexptr == '\000' ) \
  79       (RESULT) = YY_NULL; \
  80     else \
  81       { \
  82         *(BUF) = *pstate->lexptr; \
  83         (RESULT) = 1; \
  84         pstate->lexptr += 1; \
  85       }
  86
  87 static int find_dot_all (const char *);
  88
  89 /* Depth of parentheses.  */
  90 static int paren_depth;
  91
  92 %}
  93
  94 %option case-insensitive interactive nodefault noyywrap
  95
  96 %s BEFORE_QUAL_QUOTE
  97
  98 %%
  99
 100 {WHITE}          { }
 101
 102 "--".*           { yyterminate(); }
 103
 104 {NUM10}{POSEXP}  {
 105                    canonicalizeNumeral (numbuf, yytext);
 106                    char *e_ptr = strrchr (numbuf, 'e');
 107                    *e_ptr = '\0';
 108                    return processInt (pstate, nullptr, numbuf, e_ptr + 1);
 109                  }
 110
 111 {NUM10}          {
 112                    canonicalizeNumeral (numbuf, yytext);
 113                    return processInt (pstate, NULL, numbuf, NULL);
 114                  }
 115
 116 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
 117                    canonicalizeNumeral (numbuf, yytext);
 118                    char *e_ptr = strrchr (numbuf, 'e');
 119                    *e_ptr = '\0';
 120                    return processInt (pstate, numbuf,
 121                                       strchr (numbuf, '#') + 1,
 122                                       e_ptr + 1);
 123                  }
 124
 125 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#" {
 126                    canonicalizeNumeral (numbuf, yytext);
 127                    return processInt (pstate, numbuf, strchr (numbuf, '#') + 1,
 128                                       NULL);
 129                  }
 130
 131 "0x"{HEXDIG}+   {
 132                   canonicalizeNumeral (numbuf, yytext+2);
 133                   return processInt (pstate, "16#", numbuf, NULL);
 134                 }
 135
 136
 137 {NUM10}"."{NUM10}{EXP} {
 138                    canonicalizeNumeral (numbuf, yytext);
 139                    return processReal (pstate, numbuf);
 140                 }
 141
 142 {NUM10}"."{NUM10} {
 143                    canonicalizeNumeral (numbuf, yytext);
 144                    return processReal (pstate, numbuf);
 145                 }
 146
 147 {NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
 148                    error (_("Based real literals not implemented yet."));
 149                 }
 150
 151 {NUM10}"#"{NUM16}"."{NUM16}"#" {
 152                    error (_("Based real literals not implemented yet."));
 153                 }
 154
 155 <INITIAL>"'"({GRAPHIC}|\")"'" {
 156                    yylval.typed_val.val = yytext[1];
 157                    yylval.typed_val.type = type_for_char (pstate, yytext[1]);
 158                    return CHARLIT;
 159                 }
 160
 161 <INITIAL>"'[\""{HEXDIG}{2,}"\"]'"   {
 162                    ULONGEST v = strtoulst (yytext+3, nullptr, 16);
 163                    yylval.typed_val.val = v;
 164                    yylval.typed_val.type = type_for_char (pstate, v);
 165                    return CHARLIT;
 166                 }
 167
 168         /* Note that we don't handle bracket sequences of more than 2
 169            digits here.  Currently there's no support for wide or
 170            wide-wide strings.  */
 171 \"({GRAPHIC}|"[\""({HEXDIG}{2,}|\")"\"]")*\"   {
 172                    yylval.sval = processString (yytext+1, yyleng-2);
 173                    return STRING;
 174                 }
 175
 176 \"              {
 177                    error (_("ill-formed or non-terminated string literal"));
 178                 }
 179
 180
 181 if              {
 182                   rewind_to_char ('i');
 183                   return 0;
 184                 }
 185
 186 task            {
 187                   rewind_to_char ('t');
 188                   return 0;
 189                 }
 190
 191 thread{WHITE}+{DIG} {
 192                   /* This keyword signals the end of the expression and
 193                      will be processed separately.  */
 194                   rewind_to_char ('t');
 195                   return 0;
 196                 }
 197
 198         /* ADA KEYWORDS */
 199
 200 abs             { return ABS; }
 201 and             { return _AND_; }
 202 else            { return ELSE; }
 203 in              { return IN; }
 204 mod             { return MOD; }
 205 new             { return NEW; }
 206 not             { return NOT; }
 207 null            { return NULL_PTR; }
 208 or              { return OR; }
 209 others          { return OTHERS; }
 210 rem             { return REM; }
 211 then            { return THEN; }
 212 xor             { return XOR; }
 213
 214         /* BOOLEAN "KEYWORDS" */
 215
 216  /* True and False are not keywords in Ada, but rather enumeration constants.
 217     However, the boolean type is no longer represented as an enum, so True
 218     and False are no longer defined in symbol tables.  We compromise by
 219     making them keywords (when bare). */
 220
 221 true            { return TRUEKEYWORD; }
 222 false           { return FALSEKEYWORD; }
 223
 224         /* ATTRIBUTES */
 225
 226 {TICK}[a-z][a-z_]+ { BEGIN INITIAL; return processAttribute (yytext+1); }
 227
 228         /* PUNCTUATION */
 229
 230 "=>"            { return ARROW; }
 231 ".."            { return DOTDOT; }
 232 "**"            { return STARSTAR; }
 233 ":="            { return ASSIGN; }
 234 "/="            { return NOTEQUAL; }
 235 "<="            { return LEQ; }
 236 ">="            { return GEQ; }
 237
 238 <BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
 239
 240 [-&*+./:<>=|;\[\]] { return yytext[0]; }
 241
 242 ","             { if (paren_depth == 0 && pstate->comma_terminates)
 243                     {
 244                       rewind_to_char (',');
 245                       return 0;
 246                     }
 247                   else
 248                     return ',';
 249                 }
 250
 251 "("             { paren_depth += 1; return '('; }
 252 ")"             { if (paren_depth == 0)
 253                     {
 254                       rewind_to_char (')');
 255                       return 0;
 256                     }
 257                   else
 258                     {
 259                       paren_depth -= 1;
 260                       return ')';
 261                     }
 262                 }
 263
 264 "."{WHITE}*all  { return DOT_ALL; }
 265
 266 "."{WHITE}*{ID} {
 267                   yylval.sval = processId (yytext+1, yyleng-1);
 268                   return DOT_ID;
 269                 }
 270
 271 {ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")?  {
 272                   int all_posn = find_dot_all (yytext);
 273
 274                   if (all_posn == -1 && yytext[yyleng-1] == '\'')
 275                     {
 276                       BEGIN BEFORE_QUAL_QUOTE;
 277                       yyless (yyleng-1);
 278                     }
 279                   else if (all_posn >= 0)
 280                     yyless (all_posn);
 281                   yylval.sval = processId (yytext, yyleng);
 282                   return NAME;
 283                }
 284
 285
 286         /* GDB EXPRESSION CONSTRUCTS  */
 287
 288 "'"[^']+"'"{WHITE}*:: {
 289                   yyless (yyleng - 2);
 290                   yylval.sval = processId (yytext, yyleng);
 291                   return NAME;
 292                 }
 293
 294 "::"            { return COLONCOLON; }
 295
 296 [{}@]           { return yytext[0]; }
 297
 298         /* REGISTERS AND GDB CONVENIENCE VARIABLES */
 299
 300 "$"({LETTER}|{DIG}|"$")*  {
 301                   yylval.sval.ptr = yytext;
 302                   yylval.sval.length = yyleng;
 303                   return DOLLAR_VARIABLE;
 304                 }
 305
 306         /* CATCH-ALL ERROR CASE */
 307
 308 .               { error (_("Invalid character '%s' in expression."), yytext); }
 309 %%
 310
 311 #include <ctype.h>
 312 /* Initialize the lexer for processing new expression. */
 313
 314 static void
 315 lexer_init (FILE *inp)
 316 {
 317   BEGIN INITIAL;
 318   paren_depth = 0;
 319   yyrestart (inp);
 320 }
 321
 322
 323 /* Copy S2 to S1, removing all underscores, and downcasing all letters.  */
 324
 325 static void
 326 canonicalizeNumeral (char *s1, const char *s2)
 327 {
 328   for (; *s2 != '\000'; s2 += 1)
 329     {
 330       if (*s2 != '_')
 331         {
 332           *s1 = tolower(*s2);
 333           s1 += 1;
 334         }
 335     }
 336   s1[0] = '\000';
 337 }
 338
 339 /* Interprets the prefix of NUM that consists of digits of the given BASE
 340    as an integer of that BASE, with the string EXP as an exponent.
 341    Puts value in yylval, and returns INT, if the string is valid.  Causes
 342    an error if the number is improperly formated.   BASE, if NULL, defaults
 343    to "10", and EXP to "1".  The EXP does not contain a leading 'e' or 'E'.
 344  */
 345
 346 static int
 347 processInt (struct parser_state *par_state, const char *base0,
 348             const char *num0, const char *exp0)
 349 {
 350   ULONGEST result;
 351   long exp;
 352   int base;
 353   const char *trailer;
 354
 355   if (base0 == NULL)
 356     base = 10;
 357   else
 358     {
 359       base = strtol (base0, (char **) NULL, 10);
 360       if (base < 2 || base > 16)
 361         error (_("Invalid base: %d."), base);
 362     }
 363
 364   if (exp0 == NULL)
 365     exp = 0;
 366   else
 367     exp = strtol(exp0, (char **) NULL, 10);
 368
 369   errno = 0;
 370   result = strtoulst (num0, &trailer, base);
 371   if (errno == ERANGE)
 372     error (_("Integer literal out of range"));
 373   if (isxdigit(*trailer))
 374     error (_("Invalid digit `%c' in based literal"), *trailer);
 375
 376   while (exp > 0)
 377     {
 378       if (result > (ULONG_MAX / base))
 379         error (_("Integer literal out of range"));
 380       result *= base;
 381       exp -= 1;
 382     }
 383
 384   if ((result >> (gdbarch_int_bit (par_state->gdbarch ())-1)) == 0)
 385     yylval.typed_val.type = type_int (par_state);
 386   else if ((result >> (gdbarch_long_bit (par_state->gdbarch ())-1)) == 0)
 387     yylval.typed_val.type = type_long (par_state);
 388   else if (((result >> (gdbarch_long_bit (par_state->gdbarch ())-1)) >> 1) == 0)
 389     {
 390       /* We have a number representable as an unsigned integer quantity.
 391          For consistency with the C treatment, we will treat it as an
 392          anonymous modular (unsigned) quantity.  Alas, the types are such
 393          that we need to store .val as a signed quantity.  Sorry
 394          for the mess, but C doesn't officially guarantee that a simple
 395          assignment does the trick (no, it doesn't; read the reference manual).
 396        */
 397       yylval.typed_val.type
 398         = builtin_type (par_state->gdbarch ())->builtin_unsigned_long;
 399       if (result & LONGEST_SIGN)
 400         yylval.typed_val.val =
 401           (LONGEST) (result & ~LONGEST_SIGN)
 402           - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
 403       else
 404         yylval.typed_val.val = (LONGEST) result;
 405       return INT;
 406     }
 407   else
 408     yylval.typed_val.type = type_long_long (par_state);
 409
 410   yylval.typed_val.val = (LONGEST) result;
 411   return INT;
 412 }
 413
 414 static int
 415 processReal (struct parser_state *par_state, const char *num0)
 416 {
 417   yylval.typed_val_float.type = type_long_double (par_state);
 418
 419   bool parsed = parse_float (num0, strlen (num0),
 420                              yylval.typed_val_float.type,
 421                              yylval.typed_val_float.val);
 422   gdb_assert (parsed);
 423   return FLOAT;
 424 }
 425
 426
 427 /* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym.  The
 428    resulting string is valid until the next call to ada_parse.  If
 429    NAME0 contains the substring "___", it is assumed to be already
 430    encoded and the resulting name is equal to it.  Similarly, if the name
 431    starts with '<', it is copied verbatim.  Otherwise, it differs
 432    from NAME0 in that:
 433     + Characters between '...' are transfered verbatim to yylval.ssym.
 434     + Trailing "'" characters in quoted sequences are removed (a leading quote is
 435       preserved to indicate that the name is not to be GNAT-encoded).
 436     + Unquoted whitespace is removed.
 437     + Unquoted alphabetic characters are mapped to lower case.
 438    Result is returned as a struct stoken, but for convenience, the string
 439    is also null-terminated.  Result string valid until the next call of
 440    ada_parse.
 441  */
 442 static struct stoken
 443 processId (const char *name0, int len)
 444 {
 445   char *name = (char *) obstack_alloc (&temp_parse_space, len + 11);
 446   int i0, i;
 447   struct stoken result;
 448
 449   result.ptr = name;
 450   while (len > 0 && isspace (name0[len-1]))
 451     len -= 1;
 452
 453   if (name0[0] == '<' || strstr (name0, "___") != NULL)
 454     {
 455       strncpy (name, name0, len);
 456       name[len] = '\000';
 457       result.length = len;
 458       return result;
 459     }
 460
 461   i = i0 = 0;
 462   while (i0 < len)
 463     {
 464       if (isalnum (name0[i0]))
 465         {
 466           name[i] = tolower (name0[i0]);
 467           i += 1; i0 += 1;
 468         }
 469       else switch (name0[i0])
 470         {
 471         default:
 472           name[i] = name0[i0];
 473           i += 1; i0 += 1;
 474           break;
 475         case ' ': case '\t':
 476           i0 += 1;
 477           break;
 478         case '\'':
 479           do
 480             {
 481               name[i] = name0[i0];
 482               i += 1; i0 += 1;
 483             }
 484           while (i0 < len && name0[i0] != '\'');
 485           i0 += 1;
 486           break;
 487         }
 488     }
 489   name[i] = '\000';
 490
 491   result.length = i;
 492   return result;
 493 }
 494
 495 /* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
 496    with special hex character notations replaced with characters.
 497    Result valid until the next call to ada_parse.  */
 498
 499 static struct stoken
 500 processString (const char *text, int len)
 501 {
 502   const char *p;
 503   char *q;
 504   const char *lim = text + len;
 505   struct stoken result;
 506
 507   q = (char *) obstack_alloc (&temp_parse_space, len);
 508   result.ptr = q;
 509   p = text;
 510   while (p < lim)
 511     {
 512       if (p[0] == '[' && p[1] == '"' && p+2 < lim)
 513          {
 514            if (p[2] == '"')  /* "...["""]... */
 515              {
 516                *q = '"';
 517                p += 4;
 518              }
 519            else
 520              {
 521                const char *end;
 522                ULONGEST chr = strtoulst (p + 2, &end, 16);
 523                if (chr > 0xff)
 524                  error (_("wide strings are not yet supported"));
 525                *q = (char) chr;
 526                p = end + 1;
 527              }
 528          }
 529        else
 530          *q = *p;
 531        q += 1;
 532        p += 1;
 533      }
 534   result.length = q - result.ptr;
 535   return result;
 536 }
 537
 538 /* Returns the position within STR of the '.' in a
 539    '.{WHITE}*all' component of a dotted name, or -1 if there is none.
 540    Note: we actually don't need this routine, since 'all' can never be an
 541    Ada identifier.  Thus, looking up foo.all or foo.all.x as a name
 542    must fail, and will eventually be interpreted as (foo).all or
 543    (foo).all.x.  However, this does avoid an extraneous lookup. */
 544
 545 static int
 546 find_dot_all (const char *str)
 547 {
 548   int i;
 549
 550   for (i = 0; str[i] != '\000'; i++)
 551     if (str[i] == '.')
 552       {
 553         int i0 = i;
 554
 555         do
 556           i += 1;
 557         while (isspace (str[i]));
 558
 559         if (strncasecmp (str + i, "all", 3) == 0
 560             && !isalnum (str[i + 3]) && str[i + 3] != '_')
 561           return i0;
 562       }
 563   return -1;
 564 }
 565
 566 /* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
 567    case.  */
 568
 569 static int
 570 subseqMatch (const char *subseq, const char *str)
 571 {
 572   if (subseq[0] == '\0')
 573     return 1;
 574   else if (str[0] == '\0')
 575     return 0;
 576   else if (tolower (subseq[0]) == tolower (str[0]))
 577     return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
 578   else
 579     return subseqMatch (subseq, str+1);
 580 }
 581
 582
 583 static struct { const char *name; int code; }
 584 attributes[] = {
 585   { "address", TICK_ADDRESS },
 586   { "unchecked_access", TICK_ACCESS },
 587   { "unrestricted_access", TICK_ACCESS },
 588   { "access", TICK_ACCESS },
 589   { "first", TICK_FIRST },
 590   { "last", TICK_LAST },
 591   { "length", TICK_LENGTH },
 592   { "max", TICK_MAX },
 593   { "min", TICK_MIN },
 594   { "modulus", TICK_MODULUS },
 595   { "pos", TICK_POS },
 596   { "range", TICK_RANGE },
 597   { "size", TICK_SIZE },
 598   { "tag", TICK_TAG },
 599   { "val", TICK_VAL },
 600   { NULL, -1 }
 601 };
 602
 603 /* Return the syntactic code corresponding to the attribute name or
 604    abbreviation STR.  */
 605
 606 static int
 607 processAttribute (const char *str)
 608 {
 609   int i, k;
 610
 611   for (i = 0; attributes[i].code != -1; i += 1)
 612     if (strcasecmp (str, attributes[i].name) == 0)
 613       return attributes[i].code;
 614
 615   for (i = 0, k = -1; attributes[i].code != -1; i += 1)
 616     if (subseqMatch (str, attributes[i].name))
 617       {
 618         if (k == -1)
 619           k = i;
 620         else
 621           error (_("ambiguous attribute name: `%s'"), str);
 622       }
 623   if (k == -1)
 624     error (_("unrecognized attribute: `%s'"), str);
 625
 626   return attributes[k].code;
 627 }
 628
 629 /* Back up lexptr by yyleng and then to the rightmost occurrence of
 630    character CH, case-folded (there must be one).  WARNING: since
 631    lexptr points to the next input character that Flex has not yet
 632    transferred to its internal buffer, the use of this function
 633    depends on the assumption that Flex calls YY_INPUT only when it is
 634    logically necessary to do so (thus, there is no reading ahead
 635    farther than needed to identify the next token.)  */
 636
 637 static void
 638 rewind_to_char (int ch)
 639 {
 640   pstate->lexptr -= yyleng;
 641   while (toupper (*pstate->lexptr) != toupper (ch))
 642     pstate->lexptr -= 1;
 643   yyrestart (NULL);
 644 }
 645
 646 /* Dummy definition to suppress warnings about unused static definitions. */
 647 typedef void (*dummy_function) ();
 648 dummy_function ada_flex_use[] =
 649 {
 650   (dummy_function) yyunput
 651 };
 652
 653 DIAGNOSTIC_POP