Fix PR c++/21323: GDB thinks char16_t and char32_t are signed in C++
authorPedro Alves <palves@redhat.com>
Wed, 12 Apr 2017 13:00:49 +0000 (14:00 +0100)
committerPedro Alves <palves@redhat.com>
Wed, 12 Apr 2017 13:00:49 +0000 (14:00 +0100)
While the C++ standard says that char16_t and char32_t are unsigned types:

 Types char16_t and char32_t denote distinct types with the same size,
 signedness, and alignment as uint_least16_t and uint_least32_t,
 respectively, in <cstdint>, called the underlying types.

... gdb treats them as signed currently:

 (gdb) p (char16_t)-1
 $1 = -1 u'\xffff'

There are actually two places in gdb that hardcode these types:

- gdbtypes.c:gdbtypes_post_init, when creating the built-in types,
  seemingly used by the "x /s" command (judging from commit 9a22f0d0).

- dwarf2read.c, when reading base types with DW_ATE_UTF encoding
  (which is what is used for these types, when compiling for C++11 and
  up).  Despite the comment, the type created does end up used.

Both places need fixing.  But since I couldn't tell why dwarf2read.c
needs to create a new type, I've made it use the per-arch built-in
types instead, so that the types are only created once per arch
instead of once per objfile.  That seems to work fine.

While writting the test, I noticed that the C++ language parser isn't
actually aware of these built-in types, so if you try to use them
without a program that uses them, you get:

 (gdb) set language c++
 (gdb) ptype char16_t
 No symbol table is loaded.  Use the "file" command.
 (gdb) ptype u"hello"
 No type named char16_t.
 (gdb) p u"hello"
 No type named char16_t.

That's fixed by simply adding a couple entries to C++'s built-in types
array in c-lang.c.  With that, we get the expected:

 (gdb) ptype char16_t
 type = char16_t
 (gdb) ptype u"hello"
 type = char16_t [6]
 (gdb) p u"hello"
 $1 = u"hello"

gdb/ChangeLog:
2017-04-12  Pedro Alves  <palves@redhat.com>

PR c++/21323
* c-lang.c (cplus_primitive_types) <cplus_primitive_type_char16_t,
cplus_primitive_type_char32_t>: New enum values.
(cplus_language_arch_info): Register cplus_primitive_type_char16_t
and cplus_primitive_type_char32_t.
* dwarf2read.c (read_base_type) <DW_ATE_UTF>: If bit size is 16 or
32, use the archtecture's built-in type for char16_t and char32_t,
respectively.  Otherwise, fallback to init_integer_type as before,
but make the type unsigned, and issue a complaint.
* gdbtypes.c (gdbtypes_post_init): Make char16_t and char32_t unsigned.

gdb/testsuite/ChangeLog:
2017-04-12  Pedro Alves  <palves@redhat.com>

PR c++/21323
* gdb.cp/wide_char_types.c: New file.
* gdb.cp/wide_char_types.exp: New file.

gdb/ChangeLog
gdb/c-lang.c
gdb/dwarf2read.c
gdb/gdbtypes.c
gdb/testsuite/ChangeLog
gdb/testsuite/gdb.cp/wide_char_types.c [new file with mode: 0644]
gdb/testsuite/gdb.cp/wide_char_types.exp [new file with mode: 0644]

index 4c03fc0a219451c3d44fc6b2abd4370e43870ab1..bdcbd1e2f96586328d84bcc9a205ac3bf2c9e152 100644 (file)
@@ -1,3 +1,16 @@
+2017-04-12  Pedro Alves  <palves@redhat.com>
+
+       PR c++/21323
+       * c-lang.c (cplus_primitive_types) <cplus_primitive_type_char16_t,
+       cplus_primitive_type_char32_t>: New enum values.
+       (cplus_language_arch_info): Register cplus_primitive_type_char16_t
+       and cplus_primitive_type_char32_t.
+       * dwarf2read.c (read_base_type) <DW_ATE_UTF>: If bit size is 16 or
+       32, use the archtecture's built-in type for char16_t and char32_t,
+       respectively.  Otherwise, fallback to init_integer_type as before,
+       but make the type unsigned, and issue a complaint.
+       * gdbtypes.c (gdbtypes_post_init): Make char16_t and char32_t unsigned.
+
 2017-04-12  Alan Hayward  <alan.hayward@arm.com>
 
        * m32r-tdep.c (M32R_ARG_REGISTER_SIZE): Added.
index a1001992fcb00cc4cc72b32520d17a30c5884b67..616aa267e84c563098cd9af81b5fd057497a49c9 100644 (file)
@@ -895,6 +895,8 @@ enum cplus_primitive_types {
   cplus_primitive_type_decfloat,
   cplus_primitive_type_decdouble,
   cplus_primitive_type_declong,
+  cplus_primitive_type_char16_t,
+  cplus_primitive_type_char32_t,
   nr_cplus_primitive_types
 };
 
@@ -950,6 +952,10 @@ cplus_language_arch_info (struct gdbarch *gdbarch,
     = builtin->builtin_decdouble;
   lai->primitive_type_vector [cplus_primitive_type_declong]
     = builtin->builtin_declong;
+  lai->primitive_type_vector [cplus_primitive_type_char16_t]
+    = builtin->builtin_char16;
+  lai->primitive_type_vector [cplus_primitive_type_char32_t]
+    = builtin->builtin_char32;
 
   lai->bool_type_symbol = "bool";
   lai->bool_type_default = builtin->builtin_bool;
index 966e1ee81ba99a17f96b095c19a5d14485f84af9..e390b322978accacee4edf84cd82ea49eb79a116 100644 (file)
@@ -15125,9 +15125,22 @@ read_base_type (struct die_info *die, struct dwarf2_cu *cu)
          type = init_integer_type (objfile, bits, 1, name);
        break;
       case DW_ATE_UTF:
-       /* We just treat this as an integer and then recognize the
-          type by name elsewhere.  */
-       type = init_integer_type (objfile, bits, 0, name);
+       {
+         gdbarch *arch = get_objfile_arch (objfile);
+
+         if (bits == 16)
+           type = builtin_type (arch)->builtin_char16;
+         else if (bits == 32)
+           type = builtin_type (arch)->builtin_char32;
+         else
+           {
+             complaint (&symfile_complaints,
+                        _("unsupported DW_ATE_UTF bit size: '%d'"),
+                        bits);
+             type = init_integer_type (objfile, bits, 1, name);
+           }
+         return set_die_type (die, type, cu);
+       }
        break;
 
       default:
index 6f3aeabc173e0bfc07cb6561cf403d979f41038d..c1f76fb539e061dd717f980d60a5c0d64c15fa4b 100644 (file)
@@ -5204,10 +5204,9 @@ gdbtypes_post_init (struct gdbarch *gdbarch)
 
   /* Wide character types.  */
   builtin_type->builtin_char16
-    = arch_integer_type (gdbarch, 16, 0, "char16_t");
+    = arch_integer_type (gdbarch, 16, 1, "char16_t");
   builtin_type->builtin_char32
-    = arch_integer_type (gdbarch, 32, 0, "char32_t");
-       
+    = arch_integer_type (gdbarch, 32, 1, "char32_t");
 
   /* Default data/code pointer types.  */
   builtin_type->builtin_data_ptr
index 83d6018e8ac96b2cb64ce2a6af52260241bb068b..e2555e05c37fe53b56c9abd135b76301deb7d190 100644 (file)
@@ -1,3 +1,9 @@
+2017-04-12  Pedro Alves  <palves@redhat.com>
+
+       PR c++/21323
+       * gdb.cp/wide_char_types.c: New file.
+       * gdb.cp/wide_char_types.exp: New file.
+
 2017-04-05  Sergio Durigan Junior  <sergiodj@redhat.com>
 
        PR gdb/21352
diff --git a/gdb/testsuite/gdb.cp/wide_char_types.c b/gdb/testsuite/gdb.cp/wide_char_types.c
new file mode 100644 (file)
index 0000000..8337cd4
--- /dev/null
@@ -0,0 +1,28 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2017 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <uchar.h>
+
+char16_t u16 = -1;
+char32_t u32 = -1;
+
+int
+main ()
+{
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.cp/wide_char_types.exp b/gdb/testsuite/gdb.cp/wide_char_types.exp
new file mode 100644 (file)
index 0000000..df5c8a8
--- /dev/null
@@ -0,0 +1,143 @@
+# This testcase is part of GDB, the GNU debugger.
+
+# Copyright 2017 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test GDB's awareness of the char16_t, char32_t (C++11+) built-in
+# types.  We also run most tests here in C mode, and check whether the
+# built-ins are disabled (gdb uses the typedefs in the debug info
+# instead.)
+
+standard_testfile
+
+# Test char16_t/char32_t in language LANG, against symbols in
+# a program.  Lang can be "c", "c++03" or "c++11".  In C++11,
+# char16_t/char32_t are built-in types, and the debug information
+# reflects that (see
+# http://wiki.dwarfstd.org/index.php?title=C%2B%2B0x:_New_string_literals).
+
+proc wide_char_types_program {lang} {
+    global srcfile testfile
+
+    set options {debug}
+    if {$lang == "c++03"} {
+       lappend options c++ additional_flags=-std=c++03
+       set out $testfile-cxx03
+    } elseif {$lang == "c++11"} {
+       lappend options c++ additional_flags=-std=c++11
+       set out $testfile-cxx11
+    } else {
+       set out $testfile-c
+    }
+
+    if { [prepare_for_testing "failed to prepare" \
+             ${out} [list $srcfile] $options] } {
+       return -1
+    }
+
+    if ![runto_main] then {
+       fail "can't run to main"
+       return 0
+    }
+    do_test_wide_char $lang "u16" "u32"
+}
+
+# Test char16_t/char32_t in language LANG.  Use CHAR16_EXP and
+# CHAR32_EXP as expression for each of the corresponding types.
+# (E.g., CHAR16_EXP will be u16 when testing against the program, and
+# "(char16_t)-1" when testing the built-in types without a program
+# loaded.)
+
+proc do_test_wide_char {lang char16_exp char32_exp} {
+    global gdb_prompt
+
+    # Check that the fixed-width wide types are distinct built-in
+    # types in C++11+.  In other modes, they're instead typedefs,
+    # found in the debug info.
+    if {$lang == "c++11"} {
+       gdb_test "ptype $char16_exp" "type = char16_t" \
+           "char16_t is distinct"
+       gdb_test "ptype $char32_exp" "type = char32_t" \
+           "char32_t is distinct"
+    } else {
+       gdb_test "ptype $char16_exp" "type = unsigned (long|int|short)" \
+           "char16_t is typedef"
+       gdb_test "ptype $char32_exp" "type = unsigned (long|int|short)" \
+           "char32_t is typedef"
+    }
+
+    # Check that the fixed-width wide char types are unsigned.
+    gdb_test "p $char16_exp" " = 65535 u'\\\\xffff'" \
+       "char16_t is unsigned"
+    gdb_test "p $char32_exp" " = 4294967295 U'\\\\xffffffff'" \
+       "char32_t is unsigned"
+
+    # Check sizeof.  These are fixed-width.
+    gdb_test "p sizeof($char16_exp)" "= 2" \
+       "sizeof($char16_exp) == 2"
+    gdb_test "p sizeof($char32_exp)" "= 4" \
+       "sizeof(char16_t) == 4"
+
+    # Test printing wide literal strings.  Note that when testing with
+    # no program started, this relies on GDB's awareness of the
+    # built-in wide char types.
+    gdb_test {p U"hello"} {= U"hello"}
+    gdb_test {p u"hello"} {= u"hello"}
+}
+
+# Make sure that the char16_t/char32_t types are recognized as
+# distinct built-in types in C++ mode, even with no program loaded.
+# Check that in C mode, the types are not recognized.
+
+proc wide_char_types_no_program {} {
+    global srcfile testfile
+
+    gdb_exit
+    gdb_start
+
+    # These types are not built-in in C.
+    with_test_prefix "c" {
+       gdb_test "set language c"
+
+       gdb_test "p (char16_t) -1" "No symbol table is loaded.*" \
+           "char16_t is not built-in"
+       gdb_test "p (char32_t) -1" "No symbol table is loaded.*" \
+           "char32_t is not built-in"
+
+       gdb_test {p U"hello"} "No type named char32_t\\\."
+       gdb_test {p u"hello"} "No type named char16_t\\\."
+    }
+
+    # Note GDB does not distinguish C++ dialects, so the fixed-width
+    # types are always available in C++ mode, even if they were not
+    # built-in types before C++11.
+    with_test_prefix "c++" {
+       gdb_test "set language c++"
+
+       do_test_wide_char "c++11" "(char16_t) -1" "(char32_t) -1"
+    }
+}
+
+# Check wide char types with no program loaded.
+with_test_prefix "no program" {
+    wide_char_types_no_program
+}
+
+# Check types when a program is loaded.
+with_test_prefix "with program" {
+    foreach_with_prefix lang {"c" "c++03" "c++11"} {
+       wide_char_types_program $lang
+    }
+}