+2009-09-25 Tom Tromey <tromey@redhat.com>
+
+ PR python/10664:
+ * language.h (struct language_defn) <la_get_string>: Add
+ 'char_type' argument.
+ (LA_GET_STRING): Likewise.
+ (default_get_string, c_get_string): Update.
+ * language.c (default_get_string): Add 'char_type' argument.
+ * c-valprint.c (c_textual_element_type): Rename from
+ textual_element_type. No longer static. Update callers.
+ * c-lang.h (c_textual_element_type): Declare.
+ * c-lang.c (c_get_string): Add 'char_type' argument.
+
2009-09-25 Tom Tromey <tromey@redhat.com>
* charset.c (iconv_open): Use UTF-16 and UTF-32, not UCS-2 and
void
c_get_string (struct value *value, gdb_byte **buffer, int *length,
- const char **charset)
+ struct type **char_type, const char **charset)
{
int err, width;
unsigned int fetchlimit;
struct type *element_type = TYPE_TARGET_TYPE (type);
int req_length = *length;
enum bfd_endian byte_order = gdbarch_byte_order (get_type_arch (type));
+ enum c_string_type kind;
if (element_type == NULL)
goto error;
/* We work only with arrays and pointers. */
goto error;
- element_type = check_typedef (element_type);
- if (TYPE_CODE (element_type) != TYPE_CODE_INT
- && TYPE_CODE (element_type) != TYPE_CODE_CHAR)
- /* If the elements are not integers or characters, we don't consider it
- a string. */
+ if (! c_textual_element_type (element_type, 0))
goto error;
-
+ kind = classify_type (element_type,
+ gdbarch_byte_order (get_type_arch (element_type)),
+ charset);
width = TYPE_LENGTH (element_type);
/* If the string lives in GDB's memory instead of the inferior's, then we
if (*length != 0)
*length = *length / width;
- *charset = target_charset ();
+ *char_type = element_type;
return;
extern int cp_is_vtbl_member (struct type *);
+/* These are in c-valprint.c. */
+
+extern int c_textual_element_type (struct type *, char);
+
#endif /* !defined (C_LANG_H) */
}
-/* A helper for textual_element_type. This checks the name of the
+/* A helper for c_textual_element_type. This checks the name of the
typedef. This is bogus but it isn't apparent that the compiler
provides us the help we may need. */
vector types is not. The user can override this by using the /s
format letter. */
-static int
-textual_element_type (struct type *type, char format)
+int
+c_textual_element_type (struct type *type, char format)
{
struct type *true_type, *iter_type;
}
/* Print arrays of textual chars with a string syntax. */
- if (textual_element_type (unresolved_elttype, options->format))
+ if (c_textual_element_type (unresolved_elttype, options->format))
{
/* If requested, look for the first null char and only print
elements up to it. */
/* For a pointer to a textual type, also print the string
pointed to, unless pointer is null. */
- if (textual_element_type (unresolved_elttype, options->format)
+ if (c_textual_element_type (unresolved_elttype, options->format)
&& addr != 0)
{
i = val_print_string (unresolved_elttype, addr, -1, stream,
Since we don't know whether the value is really intended to
be used as an integer or a character, print the character
equivalent as well. */
- if (textual_element_type (unresolved_type, options->format))
+ if (c_textual_element_type (unresolved_type, options->format))
{
fputs_filtered (" ", stream);
LA_PRINT_CHAR ((unsigned char) unpack_long (type, valaddr + embedded_offset),
{
/* Hack: remove (char *) for char strings. Their
type is indicated by the quoted string anyway.
- (Don't use textual_element_type here; quoted strings
+ (Don't use c_textual_element_type here; quoted strings
are always exactly (char *), (wchar_t *), or the like. */
if (TYPE_CODE (val_type) == TYPE_CODE_PTR
&& TYPE_NAME (val_type) == NULL
void
default_get_string (struct value *value, gdb_byte **buffer, int *length,
- const char **charset)
+ struct type **char_type, const char **charset)
{
error (_("Getting a string is unsupported in this language."));
}
Otherwise *LENGTH will include all characters - including any nulls.
CHARSET will hold the encoding used in the string. */
void (*la_get_string) (struct value *value, gdb_byte **buffer, int *length,
- const char **charset);
+ struct type **chartype, const char **charset);
/* Add fields above this point, so the magic number is always last. */
/* Magic number for compat checking */
force_ellipses,options))
#define LA_EMIT_CHAR(ch, type, stream, quoter) \
(current_language->la_emitchar(ch, type, stream, quoter))
-#define LA_GET_STRING(value, buffer, length, encoding) \
- (current_language->la_get_string(value, buffer, length, encoding))
+#define LA_GET_STRING(value, buffer, length, chartype, encoding) \
+ (current_language->la_get_string(value, buffer, length, chartype, encoding))
#define LA_PRINT_ARRAY_INDEX(index_value, stream, optins) \
(current_language->la_print_array_index(index_value, stream, options))
struct ui_file *stream);
void default_get_string (struct value *value, gdb_byte **buffer, int *length,
- const char **charset);
+ struct type **char_type, const char **charset);
void c_get_string (struct value *value, gdb_byte **buffer, int *length,
- const char **charset);
+ struct type **char_type, const char **charset);
#endif /* defined (LANGUAGE_H) */
const char *errors = NULL;
const char *user_encoding = NULL;
const char *la_encoding = NULL;
+ struct type *char_type;
static char *keywords[] = { "encoding", "errors", "length" };
if (!PyArg_ParseTupleAndKeywords (args, kw, "|ssi", keywords,
TRY_CATCH (except, RETURN_MASK_ALL)
{
- LA_GET_STRING (value, &buffer, &length, &la_encoding);
+ LA_GET_STRING (value, &buffer, &length, &char_type, &la_encoding);
}
GDB_PY_HANDLE_EXCEPTION (except);
encoding = (user_encoding && *user_encoding) ? user_encoding : la_encoding;
- unicode = PyUnicode_Decode (buffer, length, encoding, errors);
+ unicode = PyUnicode_Decode (buffer, length * TYPE_LENGTH (char_type),
+ encoding, errors);
xfree (buffer);
return unicode;
+2009-09-25 Tom Tromey <tromey@redhat.com>
+
+ * gdb.base/charset.exp: Test utf-16 strings with Python.
+
2009-09-25 Tom Tromey <tromey@redhat.com>
* gdb.base/charset.exp: Use UTF-16 and UTF-32, not UCS-2 and
test_combination u UTF-16 U UTF-32
}
+if {$ucs2_ok} {
+ set go 1
+ gdb_test_multiple "python print 'hello, world!'" \
+ "verify python support for charset tests" {
+ -re "not supported.*$gdb_prompt $" {
+ unsupported "python support is disabled"
+ set go 0
+ }
+ -re "$gdb_prompt $" {}
+ }
+
+ if {$go} {
+ gdb_test "print u\"abcdef\"" " = u\"abcdef\"" \
+ "set up for python printing of utf-16 string"
+
+ gdb_test "python print gdb.history(0).string()" "abcdef" \
+ "extract utf-16 string using python"
+ }
+}
+
# Regression test for a cleanup bug in the charset code.
gdb_test "print 'a' == 'a' || 'b' == 'b'" \
".* = 1" \