Handle multi-byte bracket sequences in Ada lexer

author Tom Tromey <tromey@adacore.com>

Wed, 26 Jan 2022 14:11:18 +0000 (07:11 -0700)

committer Tom Tromey <tromey@adacore.com>

Mon, 28 Feb 2022 17:49:29 +0000 (10:49 -0700)
author Tom Tromey <tromey@adacore.com>
Wed, 26 Jan 2022 14:11:18 +0000 (07:11 -0700)
committer Tom Tromey <tromey@adacore.com>
Mon, 28 Feb 2022 17:49:29 +0000 (10:49 -0700)
diff --git a/gdb/ada-exp.y b/gdb/ada-exp.y

index 916b8ef94fef99f5a931e9fcf33b0d52236b00c1..d3fce8d05e3ef867adb3e08501bb763981f02495 100644 (file)
--- a/gdb/ada-exp.y
+++ b/gdb/ada-exp.y
@@ -98,7 +98,7 @@ static struct type *type_long_long (struct parser_state *);
  
  static struct type *type_long_double (struct parser_state *);
  
-static struct type *type_char (struct parser_state *);
+static struct type *type_for_char (struct parser_state *, ULONGEST);
  
  static struct type *type_boolean (struct parser_state *);
  
@@ -1727,10 +1727,18 @@ type_long_double (struct parser_state *par_state)
  }
  
  static struct type *
-type_char (struct parser_state *par_state)
+type_for_char (struct parser_state *par_state, ULONGEST value)
  {
-  return language_string_char_type (par_state->language (),
-                                   par_state->gdbarch ());
+  if (value <= 0xff)
+    return language_string_char_type (par_state->language (),
+                                     par_state->gdbarch ());
+  else if (value <= 0xffff)
+    return language_lookup_primitive_type (par_state->language (),
+                                          par_state->gdbarch (),
+                                          "wide_character");
+  return language_lookup_primitive_type (par_state->language (),
+                                        par_state->gdbarch (),
+                                        "wide_wide_character");
  }
  
  static struct type *
diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c

index f1d59d2aadb6364c364ab5ef1b983e99c69fa19c..d44b0906e6d777df0c2fec7a92aa6aff1cf9f67d 100644 (file)
--- a/gdb/ada-lang.c
+++ b/gdb/ada-lang.c
@@ -10187,7 +10187,7 @@ ada_resolvable::replace (operation_up &&owner,
    return std::move (owner);
  }
  
-/* Convert the character literal whose ASCII value would be VAL to the
+/* Convert the character literal whose value would be VAL to the
     appropriate value of type TYPE, if there is a translation.
     Otherwise return VAL.  Hence, in an enumeration type ('A', 'B'),
     the literal 'A' (VAL == 65), returns 0.  */
@@ -10195,7 +10195,7 @@ ada_resolvable::replace (operation_up &&owner,
  static LONGEST
  convert_char_literal (struct type *type, LONGEST val)
  {
-  char name[7];
+  char name[12];
    int f;
  
    if (type == NULL)
@@ -10206,8 +10206,12 @@ convert_char_literal (struct type *type, LONGEST val)
  
    if ((val >= 'a' && val <= 'z') || (val >= '0' && val <= '9'))
      xsnprintf (name, sizeof (name), "Q%c", (int) val);
+  else if (val >= 0 && val < 256)
+    xsnprintf (name, sizeof (name), "QU%02x", (unsigned) val);
+  else if (val >= 0 && val < 0x10000)
+    xsnprintf (name, sizeof (name), "QW%04x", (unsigned) val);
    else
-    xsnprintf (name, sizeof (name), "QU%02x", (int) val);
+    xsnprintf (name, sizeof (name), "QWW%08lx", (unsigned long) val);
    size_t len = strlen (name);
    for (f = 0; f < type->num_fields (); f += 1)
      {
@@ -13005,9 +13009,11 @@ public:
      add (arch_integer_type (gdbarch, gdbarch_short_bit (gdbarch),
                             0, "short_integer"));
      struct type *char_type = arch_character_type (gdbarch, TARGET_CHAR_BIT,
-                                                 0, "character");
+                                                 1, "character");
      lai->set_string_char_type (char_type);
      add (char_type);
+    add (arch_character_type (gdbarch, 16, 1, "wide_character"));
+    add (arch_character_type (gdbarch, 32, 1, "wide_wide_character"));
      add (arch_float_type (gdbarch, gdbarch_float_bit (gdbarch),
                           "float", gdbarch_float_format (gdbarch)));
      add (arch_float_type (gdbarch, gdbarch_double_bit (gdbarch),
diff --git a/gdb/ada-lex.l b/gdb/ada-lex.l

index d64496a3775ac7a2b94ceaf1fdbf83c156cb0be7..f61efba81a9637bf57e415c934a361214eb901a9 100644 (file)
--- a/gdb/ada-lex.l
+++ b/gdb/ada-lex.l
@@ -1,4 +1,4 @@
-/* FLEX lexer for Ada expressions, for GDB.
+/* FLEX lexer for Ada expressions, for GDB. -*- c++ -*-
     Copyright (C) 1994-2022 Free Software Foundation, Inc.
  
     This file is part of GDB.
@@ -150,20 +150,22 @@ static int paren_depth;
                 }
  
  <INITIAL>"'"({GRAPHIC}|\")"'" {
-                  yylval.typed_val.type = type_char (pstate);
                    yylval.typed_val.val = yytext[1];
+                  yylval.typed_val.type = type_for_char (pstate, yytext[1]);
                    return CHARLIT;
                 }
  
-<INITIAL>"'[\""{HEXDIG}{2}"\"]'"   {
-                   int v;
-                   yylval.typed_val.type = type_char (pstate);
-                  sscanf (yytext+3, "%2x", &v);
+<INITIAL>"'[\""{HEXDIG}{2,}"\"]'"   {
+                   ULONGEST v = strtoulst (yytext+3, nullptr, 16);
                    yylval.typed_val.val = v;
+                   yylval.typed_val.type = type_for_char (pstate, v);
                    return CHARLIT;
                 }
  
-\"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\"   {
+       /* Note that we don't handle bracket sequences of more than 2
+          digits here.  Currently there's no support for wide or
+          wide-wide strings.  */
+\"({GRAPHIC}|"[\""({HEXDIG}{2,}|\")"\"]")*\"   {
                    yylval.sval = processString (yytext+1, yyleng-2);
                    return STRING;
                 }
@@ -513,10 +515,12 @@ processString (const char *text, int len)
              }
             else
              {
-               int chr;
-              sscanf (p+2, "%2x", &chr);
+              const char *end;
+              ULONGEST chr = strtoulst (p + 2, &end, 16);
+              if (chr > 0xff)
+                error (_("wide strings are not yet supported"));
                *q = (char) chr;
-              p += 5;
+              p = end + 1;
              }
           }
         else
diff --git a/gdb/ada-valprint.c b/gdb/ada-valprint.c

index a59c392bef4b60ea4f6655dcf339d9d8e6a6adc7..bf95719f0406377b849a4acd66a6613af2082309 100644 (file)
--- a/gdb/ada-valprint.c
+++ b/gdb/ada-valprint.c
@@ -277,7 +277,11 @@ ada_emit_char (int c, struct type *type, struct ui_file *stream,
         fprintf_filtered (stream, "%c", c);
      }
    else
-    fprintf_filtered (stream, "[\"%0*x\"]", type_len * 2, c);
+    {
+      /* Follow GNAT's lead here and only use 6 digits for
+        wide_wide_character.  */
+      fprintf_filtered (stream, "[\"%0*x\"]", std::min (6, type_len * 2), c);
+    }
  }
  
  /* Character #I of STRING, given that TYPE_LEN is the size in bytes
diff --git a/gdb/testsuite/gdb.ada/char_enum_unicode.exp b/gdb/testsuite/gdb.ada/char_enum_unicode.exp

index aa8136054e70daf6c2c5691a9ccea09608fe3abc..fad239983a3f22419889cab3b0c309bf0aff4a4c 100644 (file)
--- a/gdb/testsuite/gdb.ada/char_enum_unicode.exp
+++ b/gdb/testsuite/gdb.ada/char_enum_unicode.exp
@@ -40,4 +40,13 @@ gdb_test "print Char_Y" " = 2 $y"
  gdb_test "print Char_King" " = 3 $king"
  gdb_test "print Char_Thorn" " = 4 $thorn"
  gdb_test "print Char_Enum_Type'('x')" " = 1 'x'"
+gdb_test "print Char_Enum_Type'('\[\"0178\"\]')" " = 2 $y"
+gdb_test "print Char_Enum_Type'('\[\"1fa00\"\]')" " = 3 $king"
  gdb_test "print Char_Enum_Type'('\[\"de\"\]')" " = 4 $thorn"
+
+gdb_test "print '\[\"0178\"\]'" " = 376 $y"
+gdb_test "print '\[\"01fa00\"\]'" " = 129536 $king"
+gdb_test "print '\[\"de\"\]'" " = 222 $thorn"
+
+gdb_test "print \"\[\"0178\"\]\"" "wide strings are not yet supported"
+gdb_test "print \"\[\"de\"\]\"" " = \"\\\[\"de\"\\\]\""
diff --git a/gdb/testsuite/gdb.ada/widewide.exp b/gdb/testsuite/gdb.ada/widewide.exp

index 6fabb5bb08cc88d5b599f24f1f3c58b4e3709c3e..c0268f9c99b6315bb881e1d8b0e46e1e03f6c488 100644 (file)
--- a/gdb/testsuite/gdb.ada/widewide.exp
+++ b/gdb/testsuite/gdb.ada/widewide.exp
@@ -33,9 +33,9 @@ if ![runto "foo.adb:$bp_location" ] then {
  
  gdb_test "print some_easy" "= 74 'J'"
  
-gdb_test "print some_larger" "= 48879 '\\\[\"0000beef\"\\\]'"
+gdb_test "print some_larger" "= 48879 '\\\[\"00beef\"\\\]'"
  
-gdb_test "print some_big" "= 14335727 '\\\[\"00dabeef\"\\\]'"
+gdb_test "print some_big" "= 14335727 '\\\[\"dabeef\"\\\]'"
  
  gdb_test "print my_wws" "= \" helo\""
author	Tom Tromey <tromey@adacore.com>
	Wed, 26 Jan 2022 14:11:18 +0000 (07:11 -0700)
committer	Tom Tromey <tromey@adacore.com>
	Mon, 28 Feb 2022 17:49:29 +0000 (10:49 -0700)
gdb/ada-exp.y		patch \| blob \| history
gdb/ada-lang.c		patch \| blob \| history
gdb/ada-lex.l		patch \| blob \| history
gdb/ada-valprint.c		patch \| blob \| history
gdb/testsuite/gdb.ada/char_enum_unicode.exp		patch \| blob \| history
gdb/testsuite/gdb.ada/widewide.exp		patch \| blob \| history