From dea97812277d4b545fce15c857925c2997f4fd47 Mon Sep 17 00:00:00 2001 From: Kevin Buettner Date: Fri, 20 Sep 2002 00:26:06 +0000 Subject: [PATCH] Add new tests for charset support. --- gdb/testsuite/ChangeLog | 4 + gdb/testsuite/gdb.base/charset.c | 131 ++++++++ gdb/testsuite/gdb.base/charset.exp | 486 +++++++++++++++++++++++++++++ 3 files changed, 621 insertions(+) create mode 100644 gdb/testsuite/gdb.base/charset.c create mode 100644 gdb/testsuite/gdb.base/charset.exp diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog index 8c3b82b4efe..37d3d6bd451 100644 --- a/gdb/testsuite/ChangeLog +++ b/gdb/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2002-09-19 Jim Blandy + + * gdb.base/charset.exp, gdb.base/charset.c: New files. + 2002-09-19 Andrew Cagney * gdb.gdb/complaints.exp: New file. diff --git a/gdb/testsuite/gdb.base/charset.c b/gdb/testsuite/gdb.base/charset.c new file mode 100644 index 00000000000..ece684a1ef9 --- /dev/null +++ b/gdb/testsuite/gdb.base/charset.c @@ -0,0 +1,131 @@ +/* Test GDB's character set support + Jim Blandy --- December 2001 */ + +#include + + +/* X_string is a null-terminated string in the X charset whose + elements are as follows. X should be the name the `set charset' + command uses for the character set, in lower-case, with any + non-identifier characters replaced with underscores. Where a + character set doesn't have the given character, the string should + contain the character 'x'. + + [0] --- the `alert' character, '\a' + [1] --- the `backspace' character, '\b' + [2] --- the `escape' character, '\e' + [3] --- the `form feed' character, '\f' + [4] --- the `line feed' character, '\n' + [5] --- the `carriage return' character, '\r' + [6] --- the `horizontal tab' character, '\t' + [7] --- the `vertical tab' character, '\v' + [8 .. 33] --- the uppercase letters A-Z + [34 .. 59] --- the lowercase letters a-z + [60 .. 69] --- the digits 0-9 + [70] --- the `cent' character + [71] --- a control character with no defined backslash escape + + Feel free to extend these as you like. */ + +#define NUM_CHARS (72) + +char ascii_string[NUM_CHARS]; +char iso_8859_1_string[NUM_CHARS]; +char ebcdic_us_string[NUM_CHARS]; +char ibm1047_string[NUM_CHARS]; + + +void +init_string (char string[], + char x, + char alert, char backspace, char escape, char form_feed, + char line_feed, char carriage_return, char horizontal_tab, + char vertical_tab, char cent, char misc_ctrl) +{ + memset (string, x, NUM_CHARS); + string[0] = alert; + string[1] = backspace; + string[2] = escape; + string[3] = form_feed; + string[4] = line_feed; + string[5] = carriage_return; + string[6] = horizontal_tab; + string[7] = vertical_tab; + string[70] = cent; + string[71] = misc_ctrl; +} + + +void +fill_run (char string[], int start, int len, int first) +{ + int i; + + for (i = 0; i < len; i++) + string[start + i] = first + i; +} + + +int main () +{ +#ifdef usestubs + set_debug_traps(); + breakpoint(); +#endif + (void) malloc (1); + /* Initialize ascii_string. */ + init_string (ascii_string, + 120, + 7, 8, 27, 12, + 10, 13, 9, + 11, 120, 17); + fill_run (ascii_string, 8, 26, 65); + fill_run (ascii_string, 34, 26, 97); + fill_run (ascii_string, 60, 10, 48); + + /* Initialize iso_8859_1_string. */ + init_string (iso_8859_1_string, + 120, + 7, 8, 27, 12, + 10, 13, 9, + 11, 162, 17); + fill_run (iso_8859_1_string, 8, 26, 65); + fill_run (iso_8859_1_string, 34, 26, 97); + fill_run (iso_8859_1_string, 60, 10, 48); + + /* Initialize ebcdic_us_string. */ + init_string (ebcdic_us_string, + 167, + 47, 22, 39, 12, + 37, 13, 5, + 11, 74, 17); + /* In EBCDIC, the upper-case letters are broken into three separate runs. */ + fill_run (ebcdic_us_string, 8, 9, 193); + fill_run (ebcdic_us_string, 17, 9, 209); + fill_run (ebcdic_us_string, 26, 8, 226); + /* The lower-case letters are, too. */ + fill_run (ebcdic_us_string, 34, 9, 129); + fill_run (ebcdic_us_string, 43, 9, 145); + fill_run (ebcdic_us_string, 52, 8, 162); + /* The digits, at least, are contiguous. */ + fill_run (ebcdic_us_string, 60, 10, 240); + + /* Initialize ibm1047_string. */ + init_string (ibm1047_string, + 167, + 47, 22, 39, 12, + 37, 13, 5, + 11, 74, 17); + /* In EBCDIC, the upper-case letters are broken into three separate runs. */ + fill_run (ibm1047_string, 8, 9, 193); + fill_run (ibm1047_string, 17, 9, 209); + fill_run (ibm1047_string, 26, 8, 226); + /* The lower-case letters are, too. */ + fill_run (ibm1047_string, 34, 9, 129); + fill_run (ibm1047_string, 43, 9, 145); + fill_run (ibm1047_string, 52, 8, 162); + /* The digits, at least, are contiguous. */ + fill_run (ibm1047_string, 60, 10, 240); + + puts ("All set!"); /* all strings initialized */ +} diff --git a/gdb/testsuite/gdb.base/charset.exp b/gdb/testsuite/gdb.base/charset.exp new file mode 100644 index 00000000000..8e765aa3a95 --- /dev/null +++ b/gdb/testsuite/gdb.base/charset.exp @@ -0,0 +1,486 @@ +# Copyright 2001 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# Please email any bugs, comments, and/or additions to this file to: +# bug-gdb@prep.ai.mit.edu + +# Test GDB's character set support. + +if $tracelevel then { + strace $tracelevel +} + +set prms_id 0 +set bug_id 0 + +set testfile "charset" +set srcfile ${testfile}.c +set binfile ${objdir}/${subdir}/${testfile} +if { [gdb_compile "${srcdir}/${subdir}/${srcfile}" "${binfile}" executable {debug}] != "" } { + gdb_suppress_entire_file "Testcase compile failed, so all tests in this file will automatically fail." +} + +# Start with a fresh gdb. +gdb_exit +gdb_start +gdb_reinitialize_dir $srcdir/$subdir +gdb_load ${binfile} + +# Parse the output from a `show charset' command. Return the host +# and target charset as a two-element list. +proc parse_show_charset_output {testname} { + global gdb_prompt + + gdb_expect { + -re "The current host and target character set is `(.*)'\\.\[\r\n\]+$gdb_prompt $" { + set host_charset $expect_out(1,string) + set target_charset $expect_out(1,string) + pass $testname + } + -re "The current host character set is `(.*)'\\.\[\r\n\]+The current target character set is `(.*)'\\.\[\r\n\]+$gdb_prompt $" { + set host_charset $expect_out(1,string) + set target_charset $expect_out(2,string) + pass $testname + } + -re ".*$gdb_prompt $" { + fail $testname + } + timeout { + fail "$testname (timeout)" + } + } + + return [list $host_charset $target_charset] +} + + +# Try the various `show charset' commands. These are all aliases of each +# other; `show target-charset' and `show host-charset' actually print +# both the host and target charsets. + +send_gdb "show charset\n" +set show_charset [parse_show_charset_output "show charset"] + +send_gdb "show target-charset\n" +set show_target_charset [parse_show_charset_output "show target-charset"] + +if {! [string compare $show_charset $show_target_charset]} { + pass "check `show target-charset' against `show charset'" +} else { + fail "check `show target-charset' against `show charset'" +} + +send_gdb "show host-charset\n" +set show_host_charset [parse_show_charset_output "show host-charset"] + +if {! [string compare $show_charset $show_host_charset]} { + pass "check `show host-charset' against `show charset'" +} else { + fail "check `show host-charset' against `show charset'" +} + + +# Get the list of supported charsets. +send_gdb "set charset\n" + +# True iff we've seen the "Valid character sets are:" message. +set seen_valid 0 + +# True iff we've seen the "can be used as a host character set" message. +set seen_can_host 0 + +# A Tcl array mapping the names of all the character sets we've seen +# to "1" if the character set can be used as a host character set, or +# "0" otherwise. We can use `array names charsets' just to get a list +# of all character sets. +array set charsets {} + +proc all_charset_names {} { + global charsets + return [array names charsets] +} + +proc charset_exists {charset} { + global charsets + return [info exists charsets($charset)] +} + +proc valid_host_charset {charset} { + global charsets + return $charsets($charset) +} + +gdb_expect { + -re "Valid character sets are:\[\r\n\]+" { + # There's no ^ at the beginning of the pattern above, so that + # expect can skip the echoed `set charset' command. + set seen_valid 1 + exp_continue + } + -re "^ (\[^ \t\n\]*) \\*\[\r\n\]+" { + set charsets($expect_out(1,string)) 1 + exp_continue + } + -re "^ (\[^ \t\n\]*)\[ \t\]*\[\r\n\]+" { + set charsets($expect_out(1,string)) 0 + exp_continue + } + -re "^\\* - can be used as a host character set\[\r\n\]+" { + set seen_can_host 1 + exp_continue + } + -re ".*${gdb_prompt} $" { + # We don't do an exp_continue here. + } + timeout { + fail "get valid character sets (timeout)" + } +} + + +# Check that we've seen all the right pieces of the output, and that +# we can at least use ASCII as a host character set. +if {$seen_valid && $seen_can_host && [charset_exists ascii]} { + # We can't do the below as part of the test above, since all the + # [] substitution takes place before any expression evaluation + # takes place; && doesn't really short circuit things the way + # you'd like. We'd get an "can't read $charsets(ascii)" error + # even when `info exists' had returned zero. + if {[valid_host_charset ascii]} { + pass "get valid character sets" + } else { + fail "get valid character sets" + } +} else { + fail "get valid character sets (no ascii charset)" +} + + +# Try using `set host-charset' on an invalid character set. +gdb_test "set host-charset my_grandma_bonnie" \ + "GDB doesn't know of any character set named `my_grandma_bonnie'." \ + "try `set host-charset' with invalid charset" + + +# Try using `set target-charset' on an invalid character set. +gdb_test "set target-charset my_grandma_bonnie" \ + "GDB doesn't know of any character set named `my_grandma_bonnie'." \ + "try `set target-charset' with invalid charset" + + +# Make sure that GDB supports every host/target charset combination. +foreach host_charset [all_charset_names] { + if {[valid_host_charset $host_charset]} { + + set testname "try `set host-charset $host_charset'" + send_gdb "set host-charset $host_charset\n" + gdb_expect { + -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" { + # How did it get into `charsets' then? + fail "$testname (didn't recognize name)" + } + -re "GDB can't use `.*' as its host character set\\.\[\r\n]+${gdb_prompt} $" { + # Well, then why does its `charsets' entry say it can? + fail $testname + } + -re "${gdb_prompt} $" { + pass $testname + } + timeout { + fail "$testname (timeout)" + } + } + + # Check that the command actually had its intended effect: + # $host_charset should now be the host character set. + send_gdb "show charset\n" + set result [parse_show_charset_output "parse `show charset' after `set host-charset $host_charset'"] + if {! [string compare [lindex $result 0] $host_charset]} { + pass "check effect of `set host-charset $host_charset'" + } else { + fail "check effect of `set host-charset $host_charset'" + } + + # Now try setting every possible target character set, + # given that host charset. + foreach target_charset [all_charset_names] { + set testname "try `set target-charset $target_charset'" + send_gdb "set target-charset $target_charset\n" + gdb_expect { + -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" { + fail "$testname (didn't recognize name)" + } + -re "GDB can't convert from the .* character set to .*\\.\[\r\n\]+${gdb_prompt} $" { + # This is a serious problem. GDB should be able to convert + # between any arbitrary pair of character sets. + fail "$testname (can't convert)" + } + -re "${gdb_prompt} $" { + pass $testname + } + timeout { + fail "$testname (timeout)" + } + } + + # Check that the command actually had its intended effect: + # $target_charset should now be the target charset. + send_gdb "show charset\n" + set result [parse_show_charset_output "parse `show charset' after `set target-charset $target_charset'"] + if {! [string compare $result [list $host_charset $target_charset]]} { + pass "check effect of `set target-charset $target_charset'" + } else { + fail "check effect of `set target-charset $target_charset'" + } + + # Test handling of characters in the host charset which + # can't be translated into the target charset. \xA2 is + # `cent' in ISO-8859-1, which has no equivalent in ASCII. + # + # On some systems, the pseudo-tty through which we + # communicate with GDB insists on stripping the high bit + # from input characters, meaning that `cent' turns into + # `"'. Since ISO-8859-1 and ASCII are identical in the + # lower 128 characters, it's tough to see how we can test + # this behavior on such systems, so we just xfail it. + # + # Note: the \x16 (Control-V) is an escape to allow \xA2 to + # get past readline. + if {! [string compare $host_charset iso-8859-1] && ! [string compare $target_charset ascii]} { + + set testname "untranslatable character in character literal" + send_gdb "print '\x16\xA2'\n" + gdb_expect { + -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" { + pass $testname + } + -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" { + xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)" + } + -re "$gdb_prompt $" { + fail $testname + } + timeout { + fail "$testname (timeout)" + } + } + + set testname "untranslatable character in string literal" + # If the PTTY zeros bit seven, then this turns into + # print """ + # which gets us a syntax error. We don't care. + send_gdb "print \"\x16\xA2\"\n" + gdb_expect { + -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" { + pass $testname + } + -re "Unterminated string in expression.\[\r\n\]+$gdb_prompt $" { + xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)" + } + -re "$gdb_prompt $" { + fail $testname + } + timeout { + fail "$testname (timeout)" + } + } + + set testname "untranslatable characters in backslash escape" + send_gdb "print '\\\x16\xA2'\n" + gdb_expect { + -re "The escape sequence .* is equivalent to plain .*, which has no equivalent\[\r\n\]+in the .* character set\\.\[\r\n\]+$gdb_prompt $" { + pass $testname + } + -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" { + xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)" + } + -re "$gdb_prompt $" { + fail $testname + } + timeout { + fail "$testname (timeout)" + } + } + } + } + } +} + + +# Set the host character set to plain ASCII, and try actually printing +# some strings in various target character sets. We need to run the +# test program to the point at which the strings have been +# initialized. +gdb_test "break [gdb_get_line_number "all strings initialized"]" \ + ".*Breakpoint.* at .*" \ + "set breakpoint after all strings have been initialized" +gdb_run_cmd +gdb_expect { + -re "Breakpoint.*all strings initialized.*$gdb_prompt $" { + pass "run until all strings have been initialized" + } + -re "$gdb_prompt $" { + fail "run until all strings have been initialized" + } + timeout { + fail "run until all strings have been initialized (timeout)" + } +} + + +gdb_test "set host-charset ascii" "" +foreach target_charset [all_charset_names] { + send_gdb "set target-charset $target_charset\n" + gdb_expect { + -re "$gdb_prompt $" { + pass "set target-charset $target_charset" + } + timeout { + fail "set target-charset $target_charset (timeout)" + } + } + + # Try printing the null character. There seems to be a bug in + # gdb_test that requires us to use gdb_expect here. + send_gdb "print '\\0'\n" + gdb_expect { + -re "\\\$${decimal} = 0 '\\\\0'\[\r\n\]+$gdb_prompt $" { + pass "print the null character in ${target_charset}" + } + -re "$gdb_prompt $" { + fail "print the null character in ${target_charset}" + } + timeout { + fail "print the null character in ${target_charset} (timeout)" + } + } + + # Compute the name of the variable in the test program that holds + # a string in $target_charset. The variable's name is the + # character set's name, in lower-case, with all non-identifier + # characters replaced with '_', with "_string" stuck on the end. + set var_name [string tolower "${target_charset}_string"] + regsub -all -- "\[^a-z0-9_\]" $var_name "_" var_name + + # Compute a regexp matching the results we expect. This is static, + # but it's easier than writing it out. + regsub -all "." "abefnrtv" "(\\\\&|x)" escapes + set uppercase "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + set lowercase "abcdefghijklmnopqrstuvwxyz" + set digits "0123456789" + set octal_escape "\\\\\[0-9\]\[0-9\]\[0-9\]" + + send_gdb "print $var_name\n" + # ${escapes}${uppercase}${lowercase}${digits}${octal}${octal} + gdb_expect { + -re ".* = \"(\\\\a|x)(\\\\b|x)(\\\\e|x)(\\\\f|x)(\\\\n|x)(\\\\r|x)(\\\\t|x)(\\\\v|x)${uppercase}${lowercase}${digits}(\\\\\[0-9\]\[0-9\]\[0-9\]|x)(\\\\\[0-9\]\[0-9\]\[0-9\]|x).*\"\[\r\n\]+$gdb_prompt $" { + pass "print string in $target_charset" + } + -re "$gdb_prompt $" { + fail "print string in $target_charset" + } + timeout { + fail "print string in $target_charset (timeout)" + } + } + + # Try entering a character literal, and see if it comes back unchanged. + gdb_test "print 'A'" \ + " = \[0-9-\]+ 'A'" \ + "parse character literal in ${target_charset}" + + # Check that the character literal was encoded correctly. + gdb_test "print 'A' == $var_name\[8\]" \ + " = 1" \ + "check value of parsed character literal in ${target_charset}" + + # Try entering a string literal, and see if it comes back unchanged. + gdb_test "print \"abcdefABCDEF012345\"" \ + " = \"abcdefABCDEF012345\"" \ + "parse string literal in ${target_charset}" + + # Check that the string literal was encoded correctly. + gdb_test "print \"q\"\[0\] == $var_name\[50\]" \ + " = 1" \ + "check value of parsed string literal in ${target_charset}" + + # Test handling of characters in the target charset which + # can't be translated into the host charset. + if {! [string compare $target_charset iso-8859-1]} { + gdb_test "print iso_8859_1_string\[70\]" \ + " = \[0-9-\]+ '\\\\242'" \ + "print character with no equivalent in host character set" + gdb_test "print iso_8859_1_string + 70" \ + " = ${hex} \"\\\\242.*\"" \ + "print string with no equivalent in host character set" + } + + # Make sure that we don't apply the ISO-8859-1 `print_literally' + # function to ASCII. + if {! [string compare $target_charset ascii]} { + gdb_test "print iso_8859_1_string\[70\]" \ + " = \[0-9-\]+ '\\\\242'" \ + "print ASCII unprintable character" + gdb_test "print iso_8859_1_string + 70" \ + " = ${hex} \"\\\\242.*\"" \ + "print ASCII unprintable string" + } + + # Try printing characters with backslash escape equivalents. + set escapees {a b e f n r t v} + for {set i 0} {$i < [llength $escapees]} {incr i} { + set escape [lindex $escapees $i] + send_gdb "print $var_name\[$i\]\n" + set have_escape 1 + gdb_expect { + -re "= \[0-9-\]+ '\\\\${escape}'\[\r\n\]+$gdb_prompt $" { + pass "try printing '\\${escape}' in ${target_charset}" + } + -re "= \[0-9-\]+ 'x'\[\r\n\]+$gdb_prompt $" { + xfail "try printing '\\${escape}' in ${target_charset} (no such escape)" + set have_escape 0 + } + -re "$gdb_prompt $" { + fail "try printing '\\${escape}' in ${target_charset}" + } + timeout { + fail "try printing '\\${escape}' in ${target_charset} (timeout)" + } + } + + if {$have_escape} { + + # Try parsing a backslash escape in a character literal. + gdb_test "print '\\${escape}' == $var_name\[$i\]" \ + " = 1" \ + "check value of '\\${escape}' in ${target_charset}" + + # Try parsing a backslash escape in a string literal. + gdb_test "print \"\\${escape}\"\[0\] == $var_name\[$i\]" \ + " = 1" \ + "check value of \"\\${escape}\" in ${target_charset}" + } + } + + # Try printing a character escape that doesn't exist. We should + # get the unescaped character, in the target character set. + gdb_test "print '\\q'" " = \[0-9-\]+ 'q'" \ + "print escape that doesn't exist in $target_charset" + gdb_test "print '\\q' == $var_name\[50\]" " = 1" \ + "check value of escape that doesn't exist in $target_charset" +} + +gdb_exit -- 2.30.2