gdb/python: handle non utf-8 characters when source highlighting

author Andrew Burgess <aburgess@redhat.com>

Fri, 26 Nov 2021 13:15:28 +0000 (13:15 +0000)

committer Andrew Burgess <aburgess@redhat.com>

Wed, 26 Jan 2022 23:12:52 +0000 (23:12 +0000)
author Andrew Burgess <aburgess@redhat.com>
Fri, 26 Nov 2021 13:15:28 +0000 (13:15 +0000)
committer Andrew Burgess <aburgess@redhat.com>
Wed, 26 Jan 2022 23:12:52 +0000 (23:12 +0000)
diff --git a/gdb/python/lib/gdb/__init__.py b/gdb/python/lib/gdb/__init__.py

index d5e7eac45cc5e02046c595897974aef642fb93cc..9734a0d943787c38f612049574eb30f204a0a85e 100644 (file)
--- a/gdb/python/lib/gdb/__init__.py
+++ b/gdb/python/lib/gdb/__init__.py
@@ -234,7 +234,7 @@ def find_pc_line(pc):
  
  def set_parameter(name, value):
      """Set the GDB parameter NAME to VALUE."""
-    execute('set ' + name + ' ' + str(value), to_string=True)
+    execute("set " + name + " " + str(value), to_string=True)
  
  
  @contextmanager
@@ -258,7 +258,9 @@ try:
          try:
              lexer = lexers.get_lexer_for_filename(filename, stripnl=False)
              formatter = formatters.TerminalFormatter()
-            return highlight(contents, lexer, formatter)
+            return highlight(contents, lexer, formatter).encode(
+                host_charset(), "backslashreplace"
+            )
          except:
              return None
  
diff --git a/gdb/python/python.c b/gdb/python/python.c

index 2c8081e1b07e38150782a930bc695480c161332f..2e659ee6e146e7b38a3cd775c2424e794a88c5f7 100644 (file)
--- a/gdb/python/python.c
+++ b/gdb/python/python.c
@@ -1172,13 +1172,24 @@ gdbpy_colorize (const std::string &filename, const std::string &contents)
        gdbpy_print_stack ();
        return {};
      }
-  gdbpy_ref<> contents_arg (PyString_FromString (contents.c_str ()));
+
+  /* The pygments library, which is what we currently use for applying
+     styling, is happy to take input as a bytes object, and to figure out
+     the encoding for itself.  This removes the need for us to figure out
+     (guess?) at how the content is encoded, which is probably a good
+     thing.  */
+  gdbpy_ref<> contents_arg (PyBytes_FromStringAndSize (contents.c_str (),
+                                                      contents.size ()));
    if (contents_arg == nullptr)
      {
        gdbpy_print_stack ();
        return {};
      }
  
+  /* Calling gdb.colorize passing in the filename (a string), and the file
+     contents (a bytes object).  This function should return either a bytes
+     object, the same contents with styling applied, or None to indicate
+     that no styling should be performed.  */
    gdbpy_ref<> result (PyObject_CallFunctionObjArgs (hook.get (),
                                                     fname_arg.get (),
                                                     contents_arg.get (),
@@ -1189,25 +1200,17 @@ gdbpy_colorize (const std::string &filename, const std::string &contents)
        return {};
      }
  
-  if (!gdbpy_is_string (result.get ()))
+  if (result == Py_None)
      return {};
-
-  gdbpy_ref<> unic = python_string_to_unicode (result.get ());
-  if (unic == nullptr)
-    {
-      gdbpy_print_stack ();
-      return {};
-    }
-  gdbpy_ref<> host_str (PyUnicode_AsEncodedString (unic.get (),
-                                                  host_charset (),
-                                                  nullptr));
-  if (host_str == nullptr)
+  else if (!PyBytes_Check (result.get ()))
      {
+      PyErr_SetString (PyExc_TypeError,
+                      _("Return value from gdb.colorize should be a bytes object or None."));
        gdbpy_print_stack ();
        return {};
      }
  
-  return std::string (PyBytes_AsString (host_str.get ()));
+  return std::string (PyBytes_AsString (result.get ()));
  }
  
  \f
diff --git a/gdb/testsuite/gdb.python/py-source-styling.c b/gdb/testsuite/gdb.python/py-source-styling.c

new file mode 100644 (file)

index 0000000..e27f460
--- /dev/null
+++ b/gdb/testsuite/gdb.python/py-source-styling.c
@@ -0,0 +1,29 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2022 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+int
+main ()
+{
+  int some_variable = 1234;
+
+  /* The following line contains a character that is non-utf-8.  This is a
+     critical part of the test as Python 3 can't convert this into a string
+     using its default mechanism.  */
+  char c[] = "À";              /* List this line.  */
+
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.python/py-source-styling.exp b/gdb/testsuite/gdb.python/py-source-styling.exp

new file mode 100644 (file)

index 0000000..68bbc9f
--- /dev/null
+++ b/gdb/testsuite/gdb.python/py-source-styling.exp
@@ -0,0 +1,64 @@
+# Copyright (C) 2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This file is part of the GDB testsuite.  It checks for memory leaks
+# associated with allocating and deallocation gdb.Inferior objects.
+
+load_lib gdb-python.exp
+
+standard_testfile
+
+save_vars { env(TERM) } {
+    # We need an ANSI-capable terminal to get the output, additionally
+    # we need to set LC_ALL so GDB knows the terminal is UTF-8
+    # capable, otherwise we'll get a UnicodeEncodeError trying to
+    # encode the output.
+    setenv TERM ansi
+
+    if { [prepare_for_testing "failed to prepare" ${testfile} ${srcfile}] } {
+       return -1
+    }
+
+    if { [skip_python_tests] } { continue }
+
+    if { ![gdb_py_module_available "pygments"] } {
+       unsupported "pygments module not available"
+       return -1
+    }
+
+    if ![runto_main] {
+       return
+    }
+
+    gdb_test_no_output "maint set gnu-source-highlight enabled off"
+
+    gdb_test "maint flush source-cache" "Source cache flushed\\."
+
+    set seen_style_escape false
+    set line_number [gdb_get_line_number "List this line."]
+    gdb_test_multiple "list ${line_number}" "" {
+       -re "Python Exception.*" {
+           fail $gdb_test_name
+       }
+       -re "\033" {
+           set seen_style_escape true
+           exp_continue
+       }
+       -re "$gdb_prompt $" {
+           gdb_assert { $seen_style_escape }
+           pass $gdb_test_name
+       }
+    }
+}
author	Andrew Burgess <aburgess@redhat.com>
	Fri, 26 Nov 2021 13:15:28 +0000 (13:15 +0000)
committer	Andrew Burgess <aburgess@redhat.com>
	Wed, 26 Jan 2022 23:12:52 +0000 (23:12 +0000)
gdb/python/lib/gdb/__init__.py		patch \| blob \| history
gdb/python/python.c		patch \| blob \| history
gdb/testsuite/gdb.python/py-source-styling.c	[new file with mode: 0644]	patch \| blob
gdb/testsuite/gdb.python/py-source-styling.exp	[new file with mode: 0644]	patch \| blob