From e867795e8bcd6571c785e5e1d872fff0a5c7b290 Mon Sep 17 00:00:00 2001 From: Andrew Burgess Date: Mon, 25 Oct 2021 17:26:57 +0100 Subject: [PATCH] gdb: use python to colorize disassembler output This commit adds styling support to the disassembler output, as such two new commands are added to GDB: set style disassembler enabled on|off show style disassembler enabled In this commit I make use of the Python Pygments package to provide the styling. I did investigate making use of libsource-highlight, however, I found the highlighting results to be inferior to those of Pygments; only some mnemonics were highlighted, and highlighting of register names such as r9d and r8d (on x86-64) was incorrect. To enable disassembler highlighting via Pygments, I've added a new extension language hook, which is then implemented for Python. This hook is very similar to the existing hook for source code colorization. One possibly odd choice I made with the new hook is to pass a gdb.Architecture through, even though this is currently unused. The reason this argument is not used is that, currently, styling is performed identically for all architectures. However, even though the Python function used to perform styling of disassembly output is not part of any documented API, I don't want to close the door on a user overriding this function to provide architecture specific styling. To do this, the user would inevitably require access to the gdb.Architecture, and so I decided to add this field now. The styling is applied within gdb_disassembler::print_insn, to achieve this, gdb_disassembler now writes its output into a temporary buffer, styling is then applied to the contents of this buffer. Finally the gdb_disassembler buffer is copied out to its final destination stream. There's a new test to check that the disassembler output includes some escape sequences, though I don't check for specific colours; the precise colors will depend on which instructions are in the disassembler output, and, I guess, how pygments is configured. The only negative change with this commit is how we currently style addresses in GDB. Currently, when the disassembler wants to print an address, we call back into GDB, and GDB prints the address value using the `address` styling, and the symbol name using `function` styling. After this commit, if pygments is used, then all disassembler styling is done through pygments, and this include the address and symbol name parts of the disassembler output. I don't know how much of an issue this will be for people. There's already some precedent for this in GDB when we look at source styling. For example, function names in styled source listings are not styled using the `function` style, but instead, either GNU Source Highlight, or pygments gets to decide how the function name should be styled. If the Python pygments library is not present then GDB will continue to behave as it always has, the disassembler output is mostly unstyled, but the address and symbols are styled using the `address` and `function` styles, as they are today. However, if the user does `set style disassembler enabled off`, then all disassembler styling is switched off. This obviously covers the use of pygments, but also includes the minimal styling done by GDB when pygments is not available. --- gdb/NEWS | 6 +++ gdb/cli/cli-style.c | 44 +++++++++++++++++ gdb/cli/cli-style.h | 3 ++ gdb/disasm.c | 58 +++++++++++++++++++++- gdb/disasm.h | 25 ++++++++++ gdb/doc/gdb.texinfo | 15 ++++++ gdb/extension-priv.h | 6 +++ gdb/extension.c | 20 ++++++++ gdb/extension.h | 8 ++++ gdb/python/lib/gdb/__init__.py | 13 +++++ gdb/python/python.c | 67 ++++++++++++++++++++++++++ gdb/testsuite/gdb.base/style.exp | 82 +++++++++++++++++++++++++++++++- gdb/ui-file.h | 8 ++++ 13 files changed, 352 insertions(+), 3 deletions(-) diff --git a/gdb/NEWS b/gdb/NEWS index e173d38c3a1..9da74e71796 100644 --- a/gdb/NEWS +++ b/gdb/NEWS @@ -89,6 +89,12 @@ show suppress-cli-notifications the program being debugged stops (e.g., because of hitting a breakpoint, completing source-stepping, an interrupt, etc.). +set style disassembler enabled on|off +show style disassembler enabled + If GDB is compiled with Python support, and the Python Pygments + package is available, then, when this setting is on, disassembler + output will have styling applied. + * Changed commands maint packet diff --git a/gdb/cli/cli-style.c b/gdb/cli/cli-style.c index 2fd00e9cc3e..6c1652d3986 100644 --- a/gdb/cli/cli-style.c +++ b/gdb/cli/cli-style.c @@ -38,6 +38,11 @@ bool cli_styling = true; bool source_styling = true; +/* True if disassembler styling is enabled. Note that this is only + consulted when cli_styling is true. */ + +bool disassembler_styling = true; + /* Name of colors; must correspond to ui_file_style::basic_color. */ static const char * const cli_colors[] = { "none", @@ -274,6 +279,14 @@ cli_style_option::add_setshow_commands (enum command_class theclass, static cmd_list_element *style_set_list; static cmd_list_element *style_show_list; +/* The command list for 'set style disassembler'. */ + +static cmd_list_element *style_disasm_set_list; + +/* The command list for 'show style disassembler'. */ + +static cmd_list_element *style_disasm_show_list; + static void set_style_enabled (const char *args, int from_tty, struct cmd_list_element *c) { @@ -301,6 +314,18 @@ show_style_sources (struct ui_file *file, int from_tty, fprintf_filtered (file, _("Source code styling is disabled.\n")); } +/* Implement 'show style disassembler'. */ + +static void +show_style_disassembler (struct ui_file *file, int from_tty, + struct cmd_list_element *c, const char *value) +{ + if (disassembler_styling) + fprintf_filtered (file, _("Disassembler output styling is enabled.\n")); + else + fprintf_filtered (file, _("Disassembler output styling is disabled.\n")); +} + void _initialize_cli_style (); void _initialize_cli_style () @@ -337,6 +362,25 @@ available if the appropriate extension is available at runtime." ), set_style_enabled, show_style_sources, &style_set_list, &style_show_list); + add_setshow_prefix_cmd ("disassembler", no_class, + _("\ +Style-specific settings for the disassembler.\n\ +Configure various disassembler style-related variables."), + _("\ +Style-specific settings for the disassembler.\n\ +Configure various disassembler style-related variables."), + &style_disasm_set_list, &style_disasm_show_list, + &style_set_list, &style_show_list); + + add_setshow_boolean_cmd ("enabled", no_class, &disassembler_styling, _("\ +Set whether disassembler output styling is enabled."), _("\ +Show whether disassembler output styling is enabled."), _("\ +If enabled, disassembler output is styled. Disassembler highlighting\n\ +requires the Python Pygments library, if this library is not available\n\ +then disassembler highlighting will not be possible." + ), set_style_enabled, show_style_disassembler, + &style_disasm_set_list, &style_disasm_show_list); + file_name_style.add_setshow_commands (no_class, _("\ Filename display styling.\n\ Configure filename colors and display intensity."), diff --git a/gdb/cli/cli-style.h b/gdb/cli/cli-style.h index 3333c72f65a..f69df47098c 100644 --- a/gdb/cli/cli-style.h +++ b/gdb/cli/cli-style.h @@ -128,6 +128,9 @@ extern cli_style_option version_style; /* True if source styling is enabled. */ extern bool source_styling; +/* True if disassembler styling is enabled. */ +extern bool disassembler_styling; + /* True if styling is enabled. */ extern bool cli_styling; diff --git a/gdb/disasm.c b/gdb/disasm.c index 44c702a7177..b4cde801cb0 100644 --- a/gdb/disasm.c +++ b/gdb/disasm.c @@ -782,9 +782,12 @@ get_all_disassembler_options (struct gdbarch *gdbarch) gdb_disassembler::gdb_disassembler (struct gdbarch *gdbarch, struct ui_file *file, di_read_memory_ftype read_memory_func) - : m_gdbarch (gdbarch) + : m_gdbarch (gdbarch), + m_buffer (!use_ext_lang_colorization_p && disassembler_styling + && file->can_emit_style_escape ()), + m_dest (file) { - init_disassemble_info (&m_di, file, dis_asm_fprintf); + init_disassemble_info (&m_di, &m_buffer, dis_asm_fprintf); m_di.flavour = bfd_target_unknown_flavour; m_di.memory_error_func = dis_asm_memory_error; m_di.print_address_func = dis_asm_print_address; @@ -813,14 +816,65 @@ gdb_disassembler::~gdb_disassembler () disassemble_free_target (&m_di); } +/* See disasm.h. */ + +bool gdb_disassembler::use_ext_lang_colorization_p = true; + +/* See disasm.h. */ + int gdb_disassembler::print_insn (CORE_ADDR memaddr, int *branch_delay_insns) { m_err_memaddr.reset (); + m_buffer.clear (); int length = gdbarch_print_insn (arch (), memaddr, &m_di); + /* If we have successfully disassembled an instruction, styling is on, we + think that the extension language might be able to perform styling for + us, and the destination can support styling, then lets call into the + extension languages in order to style this output. */ + if (length > 0 && disassembler_styling + && use_ext_lang_colorization_p + && m_dest->can_emit_style_escape ()) + { + gdb::optional ext_contents; + ext_contents = ext_lang_colorize_disasm (m_buffer.string (), arch ()); + if (ext_contents.has_value ()) + m_buffer = std::move (*ext_contents); + else + { + /* The extension language failed to add styling to the + disassembly output. Set the static flag so that next time we + disassemble we don't even bother attempting to use the + extension language for styling. */ + use_ext_lang_colorization_p = false; + + /* The instruction we just disassembled, and the extension + languages failed to style, might have otherwise had some + minimal styling applied by GDB. To regain that styling we + need to recreate m_buffer, but this time with styling support. + + To do this we perform an in-place new, but this time turn on + the styling support, then we can re-disassembly the + instruction, and gain any minimal styling GDB might add. */ + gdb_static_assert ((std::is_same::value)); + gdb_assert (!m_buffer.term_out ()); + m_buffer.~string_file (); + new (&m_buffer) string_file (true); + length = gdbarch_print_insn (arch (), memaddr, &m_di); + gdb_assert (length > 0); + } + } + + /* Push any disassemble output to the real destination stream. We do + this even if the disassembler reported failure (-1) as the + disassembler may have printed something to its output stream. */ + m_di.fprintf_func (m_dest, "%s", m_buffer.c_str ()); + + /* If the disassembler failed then report an appropriate error. */ if (length < 0) { if (m_err_memaddr.has_value ()) diff --git a/gdb/disasm.h b/gdb/disasm.h index 359fb6a67fd..399afc5ae71 100644 --- a/gdb/disasm.h +++ b/gdb/disasm.h @@ -82,6 +82,31 @@ private: non-memory error. */ gdb::optional m_err_memaddr; + /* Disassembler output is built up into this buffer. Whether this + string_file is created with styling support or not depends on the + value of use_ext_lang_colorization_p, as well as whether disassembler + styling in general is turned on, and also, whether *m_dest supports + styling or not. */ + string_file m_buffer; + + /* The stream to which disassembler output will be written. */ + ui_file *m_dest; + + /* When true, m_buffer will be created without styling support, + otherwise, m_buffer will be created with styling support. + + This field will initially be true, but will be set to false if + ext_lang_colorize_disasm fails to add styling at any time. + + If the extension language is going to add the styling then m_buffer + should be created without styling support, the extension language will + then add styling at the end of the disassembly process. + + If the extension language is not going to add the styling, then we + create m_buffer with styling support, and GDB will add minimal styling + (currently just to addresses and symbols) as it goes. */ + static bool use_ext_lang_colorization_p; + static int dis_asm_fprintf (void *stream, const char *format, ...) ATTRIBUTE_PRINTF(2,3); diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo index 1f85913278b..a68cf31dcf3 100644 --- a/gdb/doc/gdb.texinfo +++ b/gdb/doc/gdb.texinfo @@ -26132,6 +26132,21 @@ then it will be used. @item show style sources Show the current state of source code styling. + +@item set style disassembler enabled @samp{on|off} +Enable or disable disassembler styling. This affects whether +disassembler output, such as the output of the @code{disassemble} +command, is styled. Disassembler styling only works if styling in +general is enabled (with @code{set style enabled on}), and if a source +highlighting library is available to @value{GDBN}. + +To highlight disassembler output, @value{GDBN} must be compiled with +Python support, and the Python Pygments package must be available. If +these requirements are not met then @value{GDBN} will not highlight +disassembler output, even when this option is @samp{on}. + +@item show style disassembler enabled +Show the current state of disassembler styling. @end table Subcommands of @code{set style} control specific forms of styling. diff --git a/gdb/extension-priv.h b/gdb/extension-priv.h index ed2121a127b..d9450b51231 100644 --- a/gdb/extension-priv.h +++ b/gdb/extension-priv.h @@ -257,6 +257,12 @@ struct extension_language_ops or an empty option. */ gdb::optional (*colorize) (const std::string &name, const std::string &contents); + + /* Colorize a single line of disassembler output, CONTENT. This should + either return colorized (using ANSI terminal escapes) version of the + contents, or an empty optional. */ + gdb::optional (*colorize_disasm) (const std::string &content, + gdbarch *gdbarch); }; /* State necessary to restore a signal handler to its previous value. */ diff --git a/gdb/extension.c b/gdb/extension.c index f04e928d33d..8f39b86e952 100644 --- a/gdb/extension.c +++ b/gdb/extension.c @@ -904,6 +904,26 @@ ext_lang_colorize (const std::string &filename, const std::string &contents) return result; } +/* See extension.h. */ + +gdb::optional +ext_lang_colorize_disasm (const std::string &content, gdbarch *gdbarch) +{ + gdb::optional result; + + for (const struct extension_language_defn *extlang : extension_languages) + { + if (extlang->ops == nullptr + || extlang->ops->colorize_disasm == nullptr) + continue; + result = extlang->ops->colorize_disasm (content, gdbarch); + if (result.has_value ()) + return result; + } + + return result; +} + /* Called via an observer before gdb prints its prompt. Iterate over the extension languages giving them a chance to change the prompt. The first one to change the prompt wins, diff --git a/gdb/extension.h b/gdb/extension.h index 64d7396f5b7..7eb89530c44 100644 --- a/gdb/extension.h +++ b/gdb/extension.h @@ -319,6 +319,14 @@ extern void get_matching_xmethod_workers extern gdb::optional ext_lang_colorize (const std::string &filename, const std::string &contents); +/* Try to colorize a single line of disassembler output, CONTENT for + GDBARCH. This will return either a colorized (using ANSI terminal + escapes) version of CONTENT, or an empty value if colorizing could not + be done. */ + +extern gdb::optional ext_lang_colorize_disasm + (const std::string &content, gdbarch *gdbarch); + #if GDB_SELF_TEST namespace selftests { extern void (*hook_set_active_ext_lang) (); diff --git a/gdb/python/lib/gdb/__init__.py b/gdb/python/lib/gdb/__init__.py index 9734a0d9437..891f89093f1 100644 --- a/gdb/python/lib/gdb/__init__.py +++ b/gdb/python/lib/gdb/__init__.py @@ -264,7 +264,20 @@ try: except: return None + def colorize_disasm(content, gdbarch): + # Don't want any errors. + try: + lexer = lexers.get_lexer_by_name("asm") + formatter = formatters.TerminalFormatter() + return highlight(content, lexer, formatter).rstrip().encode() + except: + return None + + except: def colorize(filename, contents): return None + + def colorize_disasm(content, gdbarch): + return None diff --git a/gdb/python/python.c b/gdb/python/python.c index 2e659ee6e14..cc80dc1daf9 100644 --- a/gdb/python/python.c +++ b/gdb/python/python.c @@ -121,6 +121,8 @@ static enum ext_lang_rc gdbpy_before_prompt_hook (const struct extension_language_defn *, const char *current_gdb_prompt); static gdb::optional gdbpy_colorize (const std::string &filename, const std::string &contents); +static gdb::optional gdbpy_colorize_disasm + (const std::string &content, gdbarch *gdbarch); /* The interface between gdb proper and loading of python scripts. */ @@ -162,6 +164,8 @@ static const struct extension_language_ops python_extension_ops = gdbpy_get_matching_xmethod_workers, gdbpy_colorize, + + gdbpy_colorize_disasm, }; #endif /* HAVE_PYTHON */ @@ -1213,6 +1217,69 @@ gdbpy_colorize (const std::string &filename, const std::string &contents) return std::string (PyBytes_AsString (result.get ())); } +/* This is the extension_language_ops.colorize_disasm "method". */ + +static gdb::optional +gdbpy_colorize_disasm (const std::string &content, gdbarch *gdbarch) +{ + if (!gdb_python_initialized) + return {}; + + gdbpy_enter enter_py; + + if (gdb_python_module == nullptr + || !PyObject_HasAttrString (gdb_python_module, "colorize_disasm")) + return {}; + + gdbpy_ref<> hook (PyObject_GetAttrString (gdb_python_module, + "colorize_disasm")); + if (hook == nullptr) + { + gdbpy_print_stack (); + return {}; + } + + if (!PyCallable_Check (hook.get ())) + return {}; + + gdbpy_ref<> content_arg (PyBytes_FromString (content.c_str ())); + if (content_arg == nullptr) + { + gdbpy_print_stack (); + return {}; + } + + gdbpy_ref<> gdbarch_arg (gdbarch_to_arch_object (gdbarch)); + if (gdbarch_arg == nullptr) + { + gdbpy_print_stack (); + return {}; + } + + gdbpy_ref<> result (PyObject_CallFunctionObjArgs (hook.get (), + content_arg.get (), + gdbarch_arg.get (), + nullptr)); + if (result == nullptr) + { + gdbpy_print_stack (); + return {}; + } + + if (result == Py_None) + return {}; + + if (!PyBytes_Check (result.get ())) + { + PyErr_SetString (PyExc_TypeError, + _("Return value from gdb.colorize_disasm should be a bytes object or None.")); + gdbpy_print_stack (); + return {}; + } + + return std::string (PyBytes_AsString (result.get ())); +} + /* Printing. */ diff --git a/gdb/testsuite/gdb.base/style.exp b/gdb/testsuite/gdb.base/style.exp index 2cd155d2cf2..68196d6e3e2 100644 --- a/gdb/testsuite/gdb.base/style.exp +++ b/gdb/testsuite/gdb.base/style.exp @@ -13,6 +13,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +load_lib gdb-python.exp + # Test CLI output styling. standard_testfile @@ -187,8 +189,16 @@ proc run_style_tests { } { gdb_test_no_output "set width 0" + # If disassembler styling is being done by the Python pygments + # module, then we can't be sure how the 'some_called_function' + # symbol will be styled. However, if pygments is not being + # used then we can know how the symbol name will be styled. set main [limited_style main function] - set func [limited_style some_called_function function] + if { $::python_disassembly_styling } { + set func "some_called_function" + } else { + set func [limited_style some_called_function function] + } # Somewhere should see the call to the function. gdb_test "disassemble main" \ [concat "Dump of assembler code for function $main:.*" \ @@ -304,6 +314,62 @@ proc run_style_tests { } { } } +# Check that disassembler styling can be disabled. The function that +# we are disassembling has some minimal styling applied even if the +# Python pygments module is not available, so, when we disable +# disassembler styling, we should always see a change in output. +proc test_disable_disassembler_styling { } { + save_vars { env(TERM) } { + # We need an ANSI-capable terminal to get the output. + setenv TERM ansi + + # Restart GDB with the correct TERM variable setting, this + # means that GDB will enable styling. + clean_restart_and_disable $::binfile + + set styled_hex [limited_style $::hex address] + set main [limited_style main function] + + foreach_with_prefix disasm_styling { on off } { + gdb_test_no_output "set style disassembler enabled ${disasm_styling}" + + set saw_header_line false + set saw_styled_output_line false + set saw_unstyled_output_line false + gdb_test_multiple "disassemble main" "" { + -re "disassemble main\r\n" { + exp_continue + } + -re "^Dump of assembler code for function $main:" { + set saw_header_line true + exp_continue + } + -re "^\\s+${styled_hex}\\s+<\[^>\]+>:\\s+\[^\r\n\033\]+\r\n" { + set saw_unstyled_output_line true + exp_continue + } + -re "^\\s+${styled_hex}\\s+<\[^>\]+>:\\s+\[^\r\n\]+\033\[^\r\n\]+\r\n" { + set saw_styled_output_line true + exp_continue + } + -re "^End of assembler dump\\.\r\n" { + exp_continue + } + -re "^$::gdb_prompt $" { + gdb_assert { $saw_header_line } + if { $disasm_styling } { + gdb_assert { $saw_styled_output_line } + gdb_assert { !$saw_unstyled_output_line } + } else { + gdb_assert { !$saw_styled_output_line } + gdb_assert { $saw_unstyled_output_line } + } + } + } + } + } +} + # A separate test from the above as the styled text this checks can't # currently be disabled (the text is printed too early in GDB's # startup process). @@ -317,6 +383,15 @@ proc test_startup_version_string { } { gdb_test "" "${vers}.*" "version is styled at startup" } +# Check to see if the Python styling of disassembler output is +# expected or not, this styling requires Python support in GDB, and +# the Python pygments module to be available. +clean_restart ${binfile} +if {![skip_python_tests] && [gdb_py_module_available "pygments"]} { + set python_disassembly_styling true +} else { + set python_disassembly_styling false +} # Run tests with all styles in their default state. with_test_prefix "all styles enabled" { @@ -333,5 +408,10 @@ foreach style { title file function highlight variable \ } } +# Check that the disassembler styling can be disabled. +if { $python_disassembly_styling } { + test_disable_disassembler_styling +} + # Finally, check the styling of the version string during startup. test_startup_version_string diff --git a/gdb/ui-file.h b/gdb/ui-file.h index 3df9f936da5..d8bc3fb3e24 100644 --- a/gdb/ui-file.h +++ b/gdb/ui-file.h @@ -191,6 +191,14 @@ public: return ret; } + /* Set the internal buffer contents to STR. Any existing contents are + discarded. */ + string_file &operator= (std::string &&str) + { + m_string = std::move (str); + return *this; + } + /* Provide a few convenience methods with the same API as the underlying std::string. */ const char *data () const { return m_string.data (); } -- 2.30.2