From 96c07c5b96e970c93ab71a1f351ca669bba78d1a Mon Sep 17 00:00:00 2001 From: Tom Tromey Date: Fri, 25 Sep 2009 21:39:53 +0000 Subject: [PATCH] gdb PR python/10664: * language.h (struct language_defn) : Add 'char_type' argument. (LA_GET_STRING): Likewise. (default_get_string, c_get_string): Update. * language.c (default_get_string): Add 'char_type' argument. * c-valprint.c (c_textual_element_type): Rename from textual_element_type. No longer static. Update callers. * c-lang.h (c_textual_element_type): Declare. * c-lang.c (c_get_string): Add 'char_type' argument. gdb/testsuite PR python/10664: * gdb.base/charset.exp: Test utf-16 strings with Python. --- gdb/ChangeLog | 13 +++++++++++++ gdb/c-lang.c | 15 +++++++-------- gdb/c-lang.h | 4 ++++ gdb/c-valprint.c | 14 +++++++------- gdb/language.c | 2 +- gdb/language.h | 10 +++++----- gdb/python/py-value.c | 6 ++++-- gdb/testsuite/ChangeLog | 4 ++++ gdb/testsuite/gdb.base/charset.exp | 20 ++++++++++++++++++++ 9 files changed, 65 insertions(+), 23 deletions(-) diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 280b1120937..e2396a260c2 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,16 @@ +2009-09-25 Tom Tromey + + PR python/10664: + * language.h (struct language_defn) : Add + 'char_type' argument. + (LA_GET_STRING): Likewise. + (default_get_string, c_get_string): Update. + * language.c (default_get_string): Add 'char_type' argument. + * c-valprint.c (c_textual_element_type): Rename from + textual_element_type. No longer static. Update callers. + * c-lang.h (c_textual_element_type): Declare. + * c-lang.c (c_get_string): Add 'char_type' argument. + 2009-09-25 Tom Tromey * charset.c (iconv_open): Use UTF-16 and UTF-32, not UCS-2 and diff --git a/gdb/c-lang.c b/gdb/c-lang.c index 64258de4a6f..405c489fa4f 100644 --- a/gdb/c-lang.c +++ b/gdb/c-lang.c @@ -618,7 +618,7 @@ c_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, void c_get_string (struct value *value, gdb_byte **buffer, int *length, - const char **charset) + struct type **char_type, const char **charset) { int err, width; unsigned int fetchlimit; @@ -626,6 +626,7 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length, struct type *element_type = TYPE_TARGET_TYPE (type); int req_length = *length; enum bfd_endian byte_order = gdbarch_byte_order (get_type_arch (type)); + enum c_string_type kind; if (element_type == NULL) goto error; @@ -652,13 +653,11 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length, /* We work only with arrays and pointers. */ goto error; - element_type = check_typedef (element_type); - if (TYPE_CODE (element_type) != TYPE_CODE_INT - && TYPE_CODE (element_type) != TYPE_CODE_CHAR) - /* If the elements are not integers or characters, we don't consider it - a string. */ + if (! c_textual_element_type (element_type, 0)) goto error; - + kind = classify_type (element_type, + gdbarch_byte_order (get_type_arch (element_type)), + charset); width = TYPE_LENGTH (element_type); /* If the string lives in GDB's memory instead of the inferior's, then we @@ -717,7 +716,7 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length, if (*length != 0) *length = *length / width; - *charset = target_charset (); + *char_type = element_type; return; diff --git a/gdb/c-lang.h b/gdb/c-lang.h index ba9d9961a9e..afafc9bd809 100644 --- a/gdb/c-lang.h +++ b/gdb/c-lang.h @@ -106,5 +106,9 @@ extern int cp_is_vtbl_ptr_type (struct type *); extern int cp_is_vtbl_member (struct type *); +/* These are in c-valprint.c. */ + +extern int c_textual_element_type (struct type *, char); + #endif /* !defined (C_LANG_H) */ diff --git a/gdb/c-valprint.c b/gdb/c-valprint.c index dc391ee2244..c20bfa78450 100644 --- a/gdb/c-valprint.c +++ b/gdb/c-valprint.c @@ -54,7 +54,7 @@ print_function_pointer_address (struct gdbarch *gdbarch, CORE_ADDR address, } -/* A helper for textual_element_type. This checks the name of the +/* A helper for c_textual_element_type. This checks the name of the typedef. This is bogus but it isn't apparent that the compiler provides us the help we may need. */ @@ -77,8 +77,8 @@ textual_name (const char *name) vector types is not. The user can override this by using the /s format letter. */ -static int -textual_element_type (struct type *type, char format) +int +c_textual_element_type (struct type *type, char format) { struct type *true_type, *iter_type; @@ -178,7 +178,7 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, } /* Print arrays of textual chars with a string syntax. */ - if (textual_element_type (unresolved_elttype, options->format)) + if (c_textual_element_type (unresolved_elttype, options->format)) { /* If requested, look for the first null char and only print elements up to it. */ @@ -278,7 +278,7 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, /* For a pointer to a textual type, also print the string pointed to, unless pointer is null. */ - if (textual_element_type (unresolved_elttype, options->format) + if (c_textual_element_type (unresolved_elttype, options->format) && addr != 0) { i = val_print_string (unresolved_elttype, addr, -1, stream, @@ -491,7 +491,7 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, Since we don't know whether the value is really intended to be used as an integer or a character, print the character equivalent as well. */ - if (textual_element_type (unresolved_type, options->format)) + if (c_textual_element_type (unresolved_type, options->format)) { fputs_filtered (" ", stream); LA_PRINT_CHAR ((unsigned char) unpack_long (type, valaddr + embedded_offset), @@ -613,7 +613,7 @@ c_value_print (struct value *val, struct ui_file *stream, { /* Hack: remove (char *) for char strings. Their type is indicated by the quoted string anyway. - (Don't use textual_element_type here; quoted strings + (Don't use c_textual_element_type here; quoted strings are always exactly (char *), (wchar_t *), or the like. */ if (TYPE_CODE (val_type) == TYPE_CODE_PTR && TYPE_NAME (val_type) == NULL diff --git a/gdb/language.c b/gdb/language.c index 51a49f8ce32..5dbe81c0032 100644 --- a/gdb/language.c +++ b/gdb/language.c @@ -1045,7 +1045,7 @@ default_print_array_index (struct value *index_value, struct ui_file *stream, void default_get_string (struct value *value, gdb_byte **buffer, int *length, - const char **charset) + struct type **char_type, const char **charset) { error (_("Getting a string is unsupported in this language.")); } diff --git a/gdb/language.h b/gdb/language.h index c650e07d418..241fb010363 100644 --- a/gdb/language.h +++ b/gdb/language.h @@ -294,7 +294,7 @@ struct language_defn Otherwise *LENGTH will include all characters - including any nulls. CHARSET will hold the encoding used in the string. */ void (*la_get_string) (struct value *value, gdb_byte **buffer, int *length, - const char **charset); + struct type **chartype, const char **charset); /* Add fields above this point, so the magic number is always last. */ /* Magic number for compat checking */ @@ -394,8 +394,8 @@ extern enum language set_language (enum language); force_ellipses,options)) #define LA_EMIT_CHAR(ch, type, stream, quoter) \ (current_language->la_emitchar(ch, type, stream, quoter)) -#define LA_GET_STRING(value, buffer, length, encoding) \ - (current_language->la_get_string(value, buffer, length, encoding)) +#define LA_GET_STRING(value, buffer, length, chartype, encoding) \ + (current_language->la_get_string(value, buffer, length, chartype, encoding)) #define LA_PRINT_ARRAY_INDEX(index_value, stream, optins) \ (current_language->la_print_array_index(index_value, stream, options)) @@ -497,9 +497,9 @@ void default_print_typedef (struct type *type, struct symbol *new_symbol, struct ui_file *stream); void default_get_string (struct value *value, gdb_byte **buffer, int *length, - const char **charset); + struct type **char_type, const char **charset); void c_get_string (struct value *value, gdb_byte **buffer, int *length, - const char **charset); + struct type **char_type, const char **charset); #endif /* defined (LANGUAGE_H) */ diff --git a/gdb/python/py-value.c b/gdb/python/py-value.c index 58bcee3217f..e2ae0baddc7 100644 --- a/gdb/python/py-value.c +++ b/gdb/python/py-value.c @@ -238,6 +238,7 @@ valpy_string (PyObject *self, PyObject *args, PyObject *kw) const char *errors = NULL; const char *user_encoding = NULL; const char *la_encoding = NULL; + struct type *char_type; static char *keywords[] = { "encoding", "errors", "length" }; if (!PyArg_ParseTupleAndKeywords (args, kw, "|ssi", keywords, @@ -246,12 +247,13 @@ valpy_string (PyObject *self, PyObject *args, PyObject *kw) TRY_CATCH (except, RETURN_MASK_ALL) { - LA_GET_STRING (value, &buffer, &length, &la_encoding); + LA_GET_STRING (value, &buffer, &length, &char_type, &la_encoding); } GDB_PY_HANDLE_EXCEPTION (except); encoding = (user_encoding && *user_encoding) ? user_encoding : la_encoding; - unicode = PyUnicode_Decode (buffer, length, encoding, errors); + unicode = PyUnicode_Decode (buffer, length * TYPE_LENGTH (char_type), + encoding, errors); xfree (buffer); return unicode; diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog index 3695a9b1495..54f7963faed 100644 --- a/gdb/testsuite/ChangeLog +++ b/gdb/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2009-09-25 Tom Tromey + + * gdb.base/charset.exp: Test utf-16 strings with Python. + 2009-09-25 Tom Tromey * gdb.base/charset.exp: Use UTF-16 and UTF-32, not UCS-2 and diff --git a/gdb/testsuite/gdb.base/charset.exp b/gdb/testsuite/gdb.base/charset.exp index 7a96bb891e3..8ef217ae7d1 100644 --- a/gdb/testsuite/gdb.base/charset.exp +++ b/gdb/testsuite/gdb.base/charset.exp @@ -610,6 +610,26 @@ if {$ucs2_ok && $ucs4_ok} { test_combination u UTF-16 U UTF-32 } +if {$ucs2_ok} { + set go 1 + gdb_test_multiple "python print 'hello, world!'" \ + "verify python support for charset tests" { + -re "not supported.*$gdb_prompt $" { + unsupported "python support is disabled" + set go 0 + } + -re "$gdb_prompt $" {} + } + + if {$go} { + gdb_test "print u\"abcdef\"" " = u\"abcdef\"" \ + "set up for python printing of utf-16 string" + + gdb_test "python print gdb.history(0).string()" "abcdef" \ + "extract utf-16 string using python" + } +} + # Regression test for a cleanup bug in the charset code. gdb_test "print 'a' == 'a' || 'b' == 'b'" \ ".* = 1" \ -- 2.30.2