From dea97812277d4b545fce15c857925c2997f4fd47 Mon Sep 17 00:00:00 2001
From: Kevin Buettner <kevinb@redhat.com>
Date: Fri, 20 Sep 2002 00:26:06 +0000
Subject: [PATCH] Add new tests for charset support.

---
 gdb/testsuite/ChangeLog            |   4 +
 gdb/testsuite/gdb.base/charset.c   | 131 ++++++++
 gdb/testsuite/gdb.base/charset.exp | 486 +++++++++++++++++++++++++++++
 3 files changed, 621 insertions(+)
 create mode 100644 gdb/testsuite/gdb.base/charset.c
 create mode 100644 gdb/testsuite/gdb.base/charset.exp

diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog
index 8c3b82b4efe..37d3d6bd451 100644
--- a/gdb/testsuite/ChangeLog
+++ b/gdb/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2002-09-19  Jim Blandy  <jimb@redhat.com>
+
+        * gdb.base/charset.exp, gdb.base/charset.c: New files.
+
 2002-09-19  Andrew Cagney  <ac131313@redhat.com>
 
 	* gdb.gdb/complaints.exp: New file.
diff --git a/gdb/testsuite/gdb.base/charset.c b/gdb/testsuite/gdb.base/charset.c
new file mode 100644
index 00000000000..ece684a1ef9
--- /dev/null
+++ b/gdb/testsuite/gdb.base/charset.c
@@ -0,0 +1,131 @@
+/* Test GDB's character set support
+   Jim Blandy <jimb@cygnus.com> --- December 2001 */
+
+#include <stdio.h>
+
+
+/* X_string is a null-terminated string in the X charset whose
+   elements are as follows.  X should be the name the `set charset'
+   command uses for the character set, in lower-case, with any
+   non-identifier characters replaced with underscores.  Where a
+   character set doesn't have the given character, the string should
+   contain the character 'x'.
+
+   [0] --- the `alert' character, '\a'
+   [1] --- the `backspace' character, '\b'
+   [2] --- the `escape' character, '\e'
+   [3] --- the `form feed' character, '\f'
+   [4] --- the `line feed' character, '\n'
+   [5] --- the `carriage return' character, '\r'
+   [6] --- the `horizontal tab' character, '\t'
+   [7] --- the `vertical tab' character, '\v'
+   [8  .. 33] --- the uppercase letters A-Z
+   [34 .. 59] --- the lowercase letters a-z
+   [60 .. 69] --- the digits 0-9
+   [70] --- the `cent' character
+   [71] --- a control character with no defined backslash escape
+
+   Feel free to extend these as you like.  */
+
+#define NUM_CHARS (72)
+
+char ascii_string[NUM_CHARS];
+char iso_8859_1_string[NUM_CHARS];
+char ebcdic_us_string[NUM_CHARS];
+char ibm1047_string[NUM_CHARS];
+
+
+void
+init_string (char string[],
+             char x,
+             char alert, char backspace, char escape, char form_feed,
+             char line_feed, char carriage_return, char horizontal_tab,
+             char vertical_tab, char cent, char misc_ctrl)
+{
+  memset (string, x, NUM_CHARS);
+  string[0] = alert;
+  string[1] = backspace;
+  string[2] = escape;
+  string[3] = form_feed;
+  string[4] = line_feed;
+  string[5] = carriage_return;
+  string[6] = horizontal_tab;
+  string[7] = vertical_tab;
+  string[70] = cent;
+  string[71] = misc_ctrl;
+}
+
+
+void
+fill_run (char string[], int start, int len, int first)
+{
+  int i;
+
+  for (i = 0; i < len; i++)
+    string[start + i] = first + i;
+}
+
+
+int main ()
+{
+#ifdef usestubs
+  set_debug_traps();
+  breakpoint();
+#endif
+  (void) malloc (1);
+  /* Initialize ascii_string.  */
+  init_string (ascii_string,
+               120,
+               7, 8, 27, 12,
+               10, 13, 9,
+               11, 120, 17);
+  fill_run (ascii_string, 8, 26, 65);
+  fill_run (ascii_string, 34, 26, 97);
+  fill_run (ascii_string, 60, 10, 48);
+
+  /* Initialize iso_8859_1_string.  */
+  init_string (iso_8859_1_string,
+               120,
+               7, 8, 27, 12,
+               10, 13, 9,
+               11, 162, 17);
+  fill_run (iso_8859_1_string, 8, 26, 65);
+  fill_run (iso_8859_1_string, 34, 26, 97);
+  fill_run (iso_8859_1_string, 60, 10, 48);
+
+  /* Initialize ebcdic_us_string.  */
+  init_string (ebcdic_us_string,
+               167,
+               47, 22, 39, 12,
+               37, 13, 5,
+               11, 74, 17);
+  /* In EBCDIC, the upper-case letters are broken into three separate runs.  */
+  fill_run (ebcdic_us_string, 8, 9, 193);
+  fill_run (ebcdic_us_string, 17, 9, 209);
+  fill_run (ebcdic_us_string, 26, 8, 226);
+  /* The lower-case letters are, too.  */
+  fill_run (ebcdic_us_string, 34, 9, 129);
+  fill_run (ebcdic_us_string, 43, 9, 145);
+  fill_run (ebcdic_us_string, 52, 8, 162);
+  /* The digits, at least, are contiguous.  */
+  fill_run (ebcdic_us_string, 60, 10, 240);
+
+  /* Initialize ibm1047_string.  */
+  init_string (ibm1047_string,
+               167,
+               47, 22, 39, 12,
+               37, 13, 5,
+               11, 74, 17);
+  /* In EBCDIC, the upper-case letters are broken into three separate runs.  */
+  fill_run (ibm1047_string, 8, 9, 193);
+  fill_run (ibm1047_string, 17, 9, 209);
+  fill_run (ibm1047_string, 26, 8, 226);
+  /* The lower-case letters are, too.  */
+  fill_run (ibm1047_string, 34, 9, 129);
+  fill_run (ibm1047_string, 43, 9, 145);
+  fill_run (ibm1047_string, 52, 8, 162);
+  /* The digits, at least, are contiguous.  */
+  fill_run (ibm1047_string, 60, 10, 240);
+
+  puts ("All set!");            /* all strings initialized */
+}
diff --git a/gdb/testsuite/gdb.base/charset.exp b/gdb/testsuite/gdb.base/charset.exp
new file mode 100644
index 00000000000..8e765aa3a95
--- /dev/null
+++ b/gdb/testsuite/gdb.base/charset.exp
@@ -0,0 +1,486 @@
+# Copyright 2001 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  
+
+# Please email any bugs, comments, and/or additions to this file to:
+# bug-gdb@prep.ai.mit.edu
+
+# Test GDB's character set support.
+
+if $tracelevel then {
+	strace $tracelevel
+}
+
+set prms_id 0
+set bug_id 0
+
+set testfile "charset"
+set srcfile ${testfile}.c
+set binfile ${objdir}/${subdir}/${testfile}
+if  { [gdb_compile "${srcdir}/${subdir}/${srcfile}" "${binfile}" executable {debug}] != "" } {
+     gdb_suppress_entire_file "Testcase compile failed, so all tests in this file will automatically fail."
+}
+
+# Start with a fresh gdb.
+gdb_exit
+gdb_start
+gdb_reinitialize_dir $srcdir/$subdir
+gdb_load ${binfile}
+
+# Parse the output from a `show charset' command.  Return the host
+# and target charset as a two-element list.
+proc parse_show_charset_output {testname} {
+    global gdb_prompt
+
+    gdb_expect {
+        -re "The current host and target character set is `(.*)'\\.\[\r\n\]+$gdb_prompt $" {
+            set host_charset $expect_out(1,string)
+            set target_charset $expect_out(1,string)
+            pass $testname
+        }
+        -re "The current host character set is `(.*)'\\.\[\r\n\]+The current target character set is `(.*)'\\.\[\r\n\]+$gdb_prompt $" {
+            set host_charset $expect_out(1,string)
+            set target_charset $expect_out(2,string)
+            pass $testname
+        }
+        -re ".*$gdb_prompt $" {
+            fail $testname
+        }
+        timeout {
+            fail "$testname (timeout)"
+        }
+    }
+
+    return [list $host_charset $target_charset]
+}
+
+
+# Try the various `show charset' commands.  These are all aliases of each
+# other; `show target-charset' and `show host-charset' actually print
+# both the host and target charsets.
+
+send_gdb "show charset\n"
+set show_charset [parse_show_charset_output "show charset"]
+
+send_gdb "show target-charset\n"
+set show_target_charset [parse_show_charset_output "show target-charset"]
+
+if {! [string compare $show_charset $show_target_charset]} {
+    pass "check `show target-charset' against `show charset'"
+} else {
+    fail "check `show target-charset' against `show charset'"
+}
+
+send_gdb "show host-charset\n"
+set show_host_charset [parse_show_charset_output "show host-charset"]
+
+if {! [string compare $show_charset $show_host_charset]} {
+    pass "check `show host-charset' against `show charset'"
+} else {
+    fail "check `show host-charset' against `show charset'"
+}
+
+
+# Get the list of supported charsets.
+send_gdb "set charset\n"
+
+# True iff we've seen the "Valid character sets are:" message.
+set seen_valid 0
+
+# True iff we've seen the "can be used as a host character set" message.
+set seen_can_host 0
+
+# A Tcl array mapping the names of all the character sets we've seen
+# to "1" if the character set can be used as a host character set, or
+# "0" otherwise.  We can use `array names charsets' just to get a list
+# of all character sets.
+array set charsets {}
+
+proc all_charset_names {} {
+    global charsets
+    return [array names charsets]
+}
+
+proc charset_exists {charset} {
+    global charsets
+    return [info exists charsets($charset)]
+}
+
+proc valid_host_charset {charset} {
+    global charsets
+    return $charsets($charset)
+}
+
+gdb_expect {
+    -re "Valid character sets are:\[\r\n\]+" {
+        # There's no ^ at the beginning of the pattern above, so that
+        # expect can skip the echoed `set charset' command.
+        set seen_valid 1
+        exp_continue
+    }
+    -re "^  (\[^ \t\n\]*) \\*\[\r\n\]+" {
+        set charsets($expect_out(1,string)) 1
+        exp_continue
+    }
+    -re "^  (\[^ \t\n\]*)\[ \t\]*\[\r\n\]+" {
+        set charsets($expect_out(1,string)) 0
+        exp_continue
+    }
+    -re "^\\* - can be used as a host character set\[\r\n\]+" {
+        set seen_can_host 1
+        exp_continue
+    }
+    -re ".*${gdb_prompt} $" {
+        # We don't do an exp_continue here.
+    }
+    timeout {
+        fail "get valid character sets (timeout)"
+    }
+}
+
+
+# Check that we've seen all the right pieces of the output, and that
+# we can at least use ASCII as a host character set.
+if {$seen_valid && $seen_can_host && [charset_exists ascii]} {
+    # We can't do the below as part of the test above, since all the
+    # [] substitution takes place before any expression evaluation
+    # takes place; && doesn't really short circuit things the way
+    # you'd like.  We'd get an "can't read $charsets(ascii)" error
+    # even when `info exists' had returned zero.
+    if {[valid_host_charset ascii]} {
+        pass "get valid character sets"
+    } else {
+        fail "get valid character sets"
+    }
+} else {
+    fail "get valid character sets (no ascii charset)"
+}
+
+
+# Try using `set host-charset' on an invalid character set.
+gdb_test "set host-charset my_grandma_bonnie" \
+         "GDB doesn't know of any character set named `my_grandma_bonnie'." \
+         "try `set host-charset' with invalid charset"
+
+
+# Try using `set target-charset' on an invalid character set.
+gdb_test "set target-charset my_grandma_bonnie" \
+         "GDB doesn't know of any character set named `my_grandma_bonnie'." \
+         "try `set target-charset' with invalid charset"
+
+
+# Make sure that GDB supports every host/target charset combination.
+foreach host_charset [all_charset_names] {
+    if {[valid_host_charset $host_charset]} {
+
+        set testname "try `set host-charset $host_charset'"
+        send_gdb "set host-charset $host_charset\n"
+        gdb_expect {
+            -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" {
+                # How did it get into `charsets' then?
+                fail "$testname (didn't recognize name)"
+            }
+            -re "GDB can't use `.*' as its host character set\\.\[\r\n]+${gdb_prompt} $" {
+                # Well, then why does its `charsets' entry say it can?
+                fail $testname
+            }
+            -re "${gdb_prompt} $" {
+                pass $testname
+            }
+            timeout {
+                fail "$testname (timeout)"
+            }
+        }
+
+        # Check that the command actually had its intended effect:
+        # $host_charset should now be the host character set.
+        send_gdb "show charset\n"
+        set result [parse_show_charset_output "parse `show charset' after `set host-charset $host_charset'"]
+        if {! [string compare [lindex $result 0] $host_charset]} {
+            pass "check effect of `set host-charset $host_charset'"
+        } else {
+            fail "check effect of `set host-charset $host_charset'"
+        }
+
+        # Now try setting every possible target character set,
+        # given that host charset.
+        foreach target_charset [all_charset_names] {
+            set testname "try `set target-charset $target_charset'"
+            send_gdb "set target-charset $target_charset\n"
+            gdb_expect {
+                -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" {
+                    fail "$testname (didn't recognize name)"
+                }
+                -re "GDB can't convert from the .* character set to .*\\.\[\r\n\]+${gdb_prompt} $" {
+                    # This is a serious problem.  GDB should be able to convert
+                    # between any arbitrary pair of character sets.
+                    fail "$testname (can't convert)"
+                }
+                -re "${gdb_prompt} $" {
+                    pass $testname
+                }
+                timeout {
+                    fail "$testname (timeout)"
+                }
+            }
+
+            # Check that the command actually had its intended effect:
+            # $target_charset should now be the target charset.
+            send_gdb "show charset\n"
+            set result [parse_show_charset_output "parse `show charset' after `set target-charset $target_charset'"]
+            if {! [string compare $result [list $host_charset $target_charset]]} {
+                pass "check effect of `set target-charset $target_charset'"
+            } else {
+                fail "check effect of `set target-charset $target_charset'"
+            }
+
+            # Test handling of characters in the host charset which
+            # can't be translated into the target charset.  \xA2 is
+            # `cent' in ISO-8859-1, which has no equivalent in ASCII.
+            #
+            # On some systems, the pseudo-tty through which we
+            # communicate with GDB insists on stripping the high bit
+            # from input characters, meaning that `cent' turns into
+            # `"'.  Since ISO-8859-1 and ASCII are identical in the
+            # lower 128 characters, it's tough to see how we can test
+            # this behavior on such systems, so we just xfail it.
+	    #
+	    # Note: the \x16 (Control-V) is an escape to allow \xA2 to
+	    # get past readline.
+            if {! [string compare $host_charset iso-8859-1] && ! [string compare $target_charset ascii]} {
+
+                set testname "untranslatable character in character literal"
+                send_gdb "print '\x16\xA2'\n"
+                gdb_expect {
+                    -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" {
+                        pass $testname
+                    }
+                    -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" {
+                        xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)"
+                    }
+                    -re "$gdb_prompt $" {
+                        fail $testname
+                    }
+                    timeout {
+                        fail "$testname (timeout)"
+                    }
+                }
+
+                set testname "untranslatable character in string literal"
+                # If the PTTY zeros bit seven, then this turns into
+                #   print """
+                # which gets us a syntax error.  We don't care.
+                send_gdb "print \"\x16\xA2\"\n"
+                gdb_expect {
+                    -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" {
+                        pass $testname
+                    }
+                    -re "Unterminated string in expression.\[\r\n\]+$gdb_prompt $" {
+                        xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)"
+                    }
+                    -re "$gdb_prompt $" {
+                        fail $testname
+                    }
+                    timeout {
+                        fail "$testname (timeout)"
+                    }
+                }
+
+                set testname "untranslatable characters in backslash escape"
+                send_gdb "print '\\\x16\xA2'\n"
+                gdb_expect {
+                    -re "The escape sequence .* is equivalent to plain .*, which has no equivalent\[\r\n\]+in the .* character set\\.\[\r\n\]+$gdb_prompt $" {
+                        pass $testname
+                    }
+                    -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" {
+                        xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)"
+                    }
+                    -re "$gdb_prompt $" {
+                        fail $testname
+                    }
+                    timeout {
+                        fail "$testname (timeout)"
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+# Set the host character set to plain ASCII, and try actually printing
+# some strings in various target character sets.  We need to run the
+# test program to the point at which the strings have been
+# initialized.
+gdb_test "break [gdb_get_line_number "all strings initialized"]" \
+         ".*Breakpoint.* at .*" \
+         "set breakpoint after all strings have been initialized"
+gdb_run_cmd
+gdb_expect {
+    -re "Breakpoint.*all strings initialized.*$gdb_prompt $" {
+        pass "run until all strings have been initialized"
+    }
+    -re "$gdb_prompt $" {
+        fail "run until all strings have been initialized"
+    }
+    timeout {
+        fail "run until all strings have been initialized (timeout)"
+    }
+}
+
+
+gdb_test "set host-charset ascii" ""
+foreach target_charset [all_charset_names] {
+    send_gdb "set target-charset $target_charset\n" 
+    gdb_expect {
+        -re "$gdb_prompt $" {
+            pass "set target-charset $target_charset"
+        }
+        timeout {
+            fail "set target-charset $target_charset (timeout)"
+        }
+    }
+
+    # Try printing the null character.  There seems to be a bug in
+    # gdb_test that requires us to use gdb_expect here.
+    send_gdb "print '\\0'\n"
+    gdb_expect {
+        -re "\\\$${decimal} = 0 '\\\\0'\[\r\n\]+$gdb_prompt $" {
+            pass "print the null character in ${target_charset}"
+        }
+        -re "$gdb_prompt $" {
+            fail "print the null character in ${target_charset}"
+        }
+        timeout {
+            fail "print the null character in ${target_charset} (timeout)"
+        }
+    }
+
+    # Compute the name of the variable in the test program that holds
+    # a string in $target_charset.  The variable's name is the
+    # character set's name, in lower-case, with all non-identifier
+    # characters replaced with '_', with "_string" stuck on the end.
+    set var_name [string tolower "${target_charset}_string"]
+    regsub -all -- "\[^a-z0-9_\]" $var_name "_" var_name
+    
+    # Compute a regexp matching the results we expect.  This is static,
+    # but it's easier than writing it out.
+    regsub -all "." "abefnrtv" "(\\\\&|x)" escapes
+    set uppercase "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    set lowercase "abcdefghijklmnopqrstuvwxyz"
+    set digits "0123456789"
+    set octal_escape "\\\\\[0-9\]\[0-9\]\[0-9\]"
+
+    send_gdb "print $var_name\n"
+    # ${escapes}${uppercase}${lowercase}${digits}${octal}${octal}
+    gdb_expect {
+        -re ".* = \"(\\\\a|x)(\\\\b|x)(\\\\e|x)(\\\\f|x)(\\\\n|x)(\\\\r|x)(\\\\t|x)(\\\\v|x)${uppercase}${lowercase}${digits}(\\\\\[0-9\]\[0-9\]\[0-9\]|x)(\\\\\[0-9\]\[0-9\]\[0-9\]|x).*\"\[\r\n\]+$gdb_prompt $" {
+            pass "print string in $target_charset"
+        }
+        -re "$gdb_prompt $" {
+            fail "print string in $target_charset"
+        }
+        timeout {
+            fail "print string in $target_charset (timeout)"
+        }
+    }
+
+    # Try entering a character literal, and see if it comes back unchanged.
+    gdb_test "print 'A'" \
+             " = \[0-9-\]+ 'A'" \
+             "parse character literal in ${target_charset}"
+
+    # Check that the character literal was encoded correctly.
+    gdb_test "print 'A' == $var_name\[8\]" \
+             " = 1" \
+             "check value of parsed character literal in ${target_charset}"
+
+    # Try entering a string literal, and see if it comes back unchanged.
+    gdb_test "print \"abcdefABCDEF012345\"" \
+             " = \"abcdefABCDEF012345\"" \
+             "parse string literal in ${target_charset}"
+
+    # Check that the string literal was encoded correctly.
+    gdb_test "print \"q\"\[0\] == $var_name\[50\]" \
+             " = 1" \
+             "check value of parsed string literal in ${target_charset}"
+
+    # Test handling of characters in the target charset which
+    # can't be translated into the host charset.
+    if {! [string compare $target_charset iso-8859-1]} {
+        gdb_test "print iso_8859_1_string\[70\]" \
+                 " = \[0-9-\]+ '\\\\242'" \
+                 "print character with no equivalent in host character set"
+        gdb_test "print iso_8859_1_string + 70" \
+                 " = ${hex} \"\\\\242.*\"" \
+                 "print string with no equivalent in host character set"
+    }
+
+    # Make sure that we don't apply the ISO-8859-1 `print_literally'
+    # function to ASCII.
+    if {! [string compare $target_charset ascii]} {
+        gdb_test "print iso_8859_1_string\[70\]" \
+                 " = \[0-9-\]+ '\\\\242'" \
+                 "print ASCII unprintable character"
+        gdb_test "print iso_8859_1_string + 70" \
+                 " = ${hex} \"\\\\242.*\"" \
+                 "print ASCII unprintable string"
+    }
+
+    # Try printing characters with backslash escape equivalents.
+    set escapees {a b e f n r t v}
+    for {set i 0} {$i < [llength $escapees]} {incr i} {
+        set escape [lindex $escapees $i]
+        send_gdb "print $var_name\[$i\]\n"
+        set have_escape 1
+        gdb_expect {
+            -re "= \[0-9-\]+ '\\\\${escape}'\[\r\n\]+$gdb_prompt $" {
+                pass "try printing '\\${escape}' in ${target_charset}"
+            }
+            -re "= \[0-9-\]+ 'x'\[\r\n\]+$gdb_prompt $" {
+                xfail "try printing '\\${escape}' in ${target_charset} (no such escape)"
+                set have_escape 0
+            }
+            -re "$gdb_prompt $" {
+                fail "try printing '\\${escape}' in ${target_charset}"
+            }
+            timeout {
+                fail "try printing '\\${escape}' in ${target_charset} (timeout)"
+            }
+        }
+
+        if {$have_escape} {
+
+            # Try parsing a backslash escape in a character literal.
+            gdb_test "print '\\${escape}' == $var_name\[$i\]" \
+                     " = 1" \
+                     "check value of '\\${escape}' in ${target_charset}"
+
+            # Try parsing a backslash escape in a string literal.
+            gdb_test "print \"\\${escape}\"\[0\] == $var_name\[$i\]" \
+                     " = 1" \
+                     "check value of \"\\${escape}\" in ${target_charset}"
+        }
+    }
+
+    # Try printing a character escape that doesn't exist.  We should 
+    # get the unescaped character, in the target character set.
+    gdb_test "print '\\q'" " = \[0-9-\]+ 'q'" \
+             "print escape that doesn't exist in $target_charset"
+    gdb_test "print '\\q' == $var_name\[50\]" " = 1" \
+             "check value of escape that doesn't exist in $target_charset"
+}
+
+gdb_exit 
-- 
2.30.2