From 4de4e48514fc47aeb4ca95cd4091e2a333fbe9e1 Mon Sep 17 00:00:00 2001 From: Andrew Burgess Date: Tue, 24 Jan 2023 15:35:45 +0000 Subject: [PATCH] gdb/python: extend the Python Disassembler API to allow for styling This commit extends the Python Disassembler API to allow for styling of the instructions. Before this commit the Python Disassembler API allowed the user to do two things: - They could intercept instruction disassembly requests and return a string of their choosing, this string then became the disassembled instruction, or - They could call builtin_disassemble, which would call back into libopcode to perform the disassembly. As libopcode printed the instruction GDB would collect these print requests and build a string. This string was then returned from the builtin_disassemble call, and the user could modify or extend this string as needed. Neither of these approaches allowed for, or preserved, disassembler styling, which is now available within libopcodes for many of the more popular architectures GDB supports. This commit aims to fill this gap. After this commit a user will be able to do the following things: - Implement a custom instruction disassembler entirely in Python without calling back into libopcodes, the custom disassembler will be able to return styling information such that GDB will display the instruction fully styled. All of GDB's existing style settings will affect how instructions coming from the Python disassembler are displayed in the expected manner. - Call builtin_disassemble and receive a result that represents how libopcode would like the instruction styled. The user can then adjust or extend the disassembled instruction before returning the result to GDB. Again, the instruction will be styled as expected. To achieve this I will add two new classes to GDB, DisassemblerTextPart and DisassemblerAddressPart. Within builtin_disassemble, instead of capturing the print calls from libopcodes and building a single string, we will now create either a text part or address part and store these parts in a vector. The DisassemblerTextPart will capture a small piece of text along with the associated style that should be used to display the text. This corresponds to the disassembler calling disassemble_info::fprintf_styled_func, or for disassemblers that don't support styling disassemble_info::fprintf_func. The DisassemblerAddressPart is used when libopcodes requests that an address be printed, and takes care of printing the address and associated symbol, this corresponds to the disassembler calling disassemble_info::print_address_func. These parts are then placed within the DisassemblerResult when builtin_disassemble returns. Alternatively, the user can directly create parts by calling two new methods on the DisassembleInfo class: DisassembleInfo.text_part and DisassembleInfo.address_part. Having created these parts the user can then pass these parts when initializing a new DisassemblerResult object. Finally, when we return from Python to gdbpy_print_insn, one way or another, the result being returned will have a list of parts. Back in GDB's C++ code we walk the list of parts and call back into GDB's core to display the disassembled instruction with the correct styling. The new API lives in parallel with the old API. Any existing code that creates a DisassemblerResult using a single string immediately creates a single DisassemblerTextPart containing the entire instruction and gives this part the default text style. This is also what happens if the user calls builtin_disassemble for an architecture that doesn't (yet) support libopcode styling. This matches up with what happens when the Python API is not involved, an architecture without disassembler styling support uses the old libopcodes printing API (the API that doesn't pass style info), and GDB just prints everything using the default text style. The reason that parts are created by calling methods on DisassembleInfo, rather than calling the class constructor directly, is DisassemblerAddressPart. Ideally this part would only hold the address which the part represents, but in order to support backwards compatibility we need to be able to convert the DisassemblerAddressPart into a string. To do that we need to call GDB's internal print_address function, and to do that we need an gdbarch. What this means is that the DisassemblerAddressPart needs to take a gdb.Architecture object at creation time. The only valid place a user can pull this from is from the DisassembleInfo object, so having the DisassembleInfo act as a factory ensures that the correct gdbarch is passed over each time. I implemented both solutions (the one presented here, and an alternative where parts could be constructed directly), and this felt like the cleanest solution. Reviewed-By: Eli Zaretskii Reviewed-By: Tom Tromey --- gdb/NEWS | 19 + gdb/doc/python.texi | 311 ++++++++- gdb/python/py-disasm.c | 871 +++++++++++++++++++++++-- gdb/testsuite/gdb.python/py-disasm.exp | 94 ++- gdb/testsuite/gdb.python/py-disasm.py | 164 ++++- 5 files changed, 1366 insertions(+), 93 deletions(-) diff --git a/gdb/NEWS b/gdb/NEWS index ca164257126..b82114d80b0 100644 --- a/gdb/NEWS +++ b/gdb/NEWS @@ -173,6 +173,25 @@ info main ** It is now no longer possible to sub-class the gdb.disassembler.DisassemblerResult type. + ** The Disassembler API from the gdb.disassembler module has been + extended to include styling support: + + - The DisassemblerResult class can now be initialized with a list + of parts. Each part represents part of the disassembled + instruction along with the associated style information. This + list of parts can be accessed with the new + DisassemblerResult.parts property. + + - New constants gdb.disassembler.STYLE_* representing all the + different styles part of an instruction might have. + + - New methods DisassembleInfo.text_part and + DisassembleInfo.address_part which are used to create the new + styled parts of a disassembled instruction. + + - Changes are backwards compatible, the older API can still be + used to disassemble instructions without styling. + *** Changes in GDB 13 * MI version 1 is deprecated, and will be removed in GDB 14. diff --git a/gdb/doc/python.texi b/gdb/doc/python.texi index a906c168373..5d714ee1ca3 100644 --- a/gdb/doc/python.texi +++ b/gdb/doc/python.texi @@ -6870,6 +6870,7 @@ values can be 1 (left), 2 (middle), or 3 (right). using the Python API. The disassembler related features are contained within the @code{gdb.disassembler} module: +@anchor{DisassembleInfo Class} @deftp {class} gdb.disassembler.DisassembleInfo Disassembly is driven by instances of this class. Each time @value{GDBN} needs to disassemble an instruction, an instance of this @@ -6971,6 +6972,25 @@ not itself raise a @code{MemoryError}. Any other exception type raised in @code{read_memory} will propagate back and be re-raised by @code{builtin_disassemble}. @end defun + +@defun DisassembleInfo.text_part (style, string) +Create a new @code{DisassemblerTextPart} representing a piece of a +disassembled instruction. @var{string} should be a non-empty string, +and @var{style} should be an appropriate style constant +(@pxref{Disassembler Style Constants}). + +Disassembler parts are used when creating a @code{DisassemblerResult} +in order to represent the styling within an instruction +(@pxref{DisassemblerResult Class}). +@end defun + +@defun DisassembleInfo.address_part (address) +Create a new @code{DisassemblerAddressPart}. @var{address} is the +value of the absolute address this part represents. A +@code{DisassemblerAddressPart} is displayed as an absolute address and +an associated symbol, the address and symbol are styled appropriately. +@end defun + @end deftp @anchor{Disassembler Class} @@ -7024,6 +7044,7 @@ the error stream according to the @kbd{set python print-stack} setting @end defun @end deftp +@anchor{DisassemblerResult Class} @deftp {class} gdb.disassembler.DisassemblerResult This class represents the result of disassembling a single instruction. An instance of this class will be returned from @@ -7037,11 +7058,30 @@ It is not possible to sub-class the @code{DisassemblerResult} class. The @code{DisassemblerResult} class has the following properties and methods: -@defun DisassemblerResult.__init__ (length, string) +@defun DisassemblerResult.__init__ (length, string, parts) Initialize an instance of this class, @var{length} is the length of the disassembled instruction in bytes, which must be greater than -zero, and @var{string} is a non-empty string that represents the -disassembled instruction. +zero. + +Only one of @var{string} or @var{parts} should be used to initialize a +new @code{DisassemblerResult}; the other one should be passed the +value @code{None}. Alternatively, the arguments can be passed by +name, and the unused argument can be ignored. + +The @var{string} argument, if not @code{None}, is a non-empty string +that represents the entire disassembled instruction. Building a result +object using the @var{string} argument does not allow for any styling +information to be included in the result. @value{GDBN} will style the +result as a single @code{DisassemblerTextPart} with @code{STYLE_TEXT} +style (@pxref{Disassembler Styling Parts}). + +The @var{parts} argument, if not @code{None}, is a non-empty sequence +of @code{DisassemblerPart} objects. Each part represents a small part +of the disassembled instruction along with associated styling +information. A result object built using @var{parts} can be displayed +by @value{GDBN} with full styling information +(@pxref{style_disassembler_enabled,,@kbd{set style disassembler +enabled}}). @end defun @defvar DisassemblerResult.length @@ -7051,10 +7091,273 @@ instruction in bytes, this will always be greater than zero. @defvar DisassemblerResult.string A read-only property containing a non-empty string representing the -disassembled instruction. +disassembled instruction. The @var{string} is a representation of the +disassembled instruction without any styling information. To see how +the instruction will be styled use the @var{parts} property. + +If this instance was initialized using separate +@code{DisassemblerPart} objects, the @var{string} property will still +be valid. The @var{string} value is created by concatenating the +@code{DisassemblerPart.string} values of each component part +(@pxref{Disassembler Styling Parts}). +@end defvar + +@defvar DisassemblerResult.parts +A read-only property containing a non-empty sequence of +@code{DisassemblerPart} objects. Each @code{DisassemblerPart} object +contains a small part of the instruction along with information about +how that part should be styled. @value{GDBN} uses this information to +create styled disassembler output +(@pxref{style_disassembler_enabled,,@kbd{set style disassembler +enabled}}). + +If this instance was initialized using a single string rather than +with a sequence of @code{DisassemblerPart} objects, the @var{parts} +property will still be valid. In this case the @var{parts} property +will hold a sequence containing a single @code{DisassemblerTextPart} +object, the string of which will represent the entire instruction, and +the style of which will be @code{STYLE_TEXT}. +@end defvar +@end deftp + +@anchor{Disassembler Styling Parts} +@deftp {class} gdb.disassembler.DisassemblerPart +This is a parent class from which the different part sub-classes +inherit. Only instances of the sub-classes detailed below will be +returned by the Python API. + +It is not possible to directly create instances of either this parent +class, or any of the sub-classes listed below. Instances of the +sub-classes listed below are created by calling +@code{builtin_disassemble} (@pxref{builtin_disassemble}) and are +returned within the @code{DisassemblerResult} object, or can be +created by calling the @code{text_part} and @code{address_part} +methods on the @code{DisassembleInfo} class (@pxref{DisassembleInfo +Class}). + +The @code{DisassemblerPart} class has a single property: + +@defvar DisassemblerPart.string +A read-only property that contains a non-empty string representing +this part of the disassembled instruction. The string within this +property doesn't include any styling information. @end defvar @end deftp +@deftp {class} gdb.disassembler.DisassemblerTextPart +The @code{DisassemblerTextPart} class represents a piece of the +disassembled instruction and the associated style for that piece. +Instances of this class can't be created directly, instead call +@code{DisassembleInfo.text_part} to create a new instance of this +class (@pxref{DisassembleInfo Class}). + +As well as the properties of its parent class, the +@code{DisassemblerTextPart} has the following additional property: + +@defvar DisassemblerTextPart.style +A read-only property that contains one of the defined style constants. +@value{GDBN} will use this style when styling this part of the +disassembled instruction (@pxref{Disassembler Style Constants}). +@end defvar +@end deftp + +@deftp {class} gdb.disassembler.DisassemblerAddressPart +The @code{DisassemblerAddressPart} class represents an absolute +address within a disassembled instruction. Using a +@code{DisassemblerAddressPart} instead of a +@code{DisassemblerTextPart} with @code{STYLE_ADDRESS} is preferred, +@value{GDBN} will display the address as both an absolute address, and +will look up a suitable symbol to display next to the address. Using +@code{DisassemblerAddressPart} also ensures that user settings such as +@code{set print max-symbolic-offset} are respected. + +Here is an example of an x86-64 instruction: + +@smallexample +call 0x401136 +@end smallexample + +@noindent +In this instruction the @code{0x401136 } was generated from a +single @code{DisassemblerAddressPart}. The @code{0x401136} will be +styled with @code{STYLE_ADDRESS}, and @code{foo} will be styled with +@code{STYLE_SYMBOL}. The @code{<} and @code{>} will be styled as +@code{STYLE_TEXT}. + +If the inclusion of the symbol name is not required then a +@code{DisassemblerTextPart} with style @code{STYLE_ADDRESS} can be +used instead. + +Instances of this class can't be created directly, instead call +@code{DisassembleInfo.address_part} to create a new instance of this +class (@pxref{DisassembleInfo Class}). + +As well as the properties of its parent class, the +@code{DisassemblerAddressPart} has the following additional property: + +@defvar DisassemblerAddressPart.address +A read-only property that contains the @var{address} passed to this +object's @code{__init__} method. +@end defvar +@end deftp + +@anchor{Disassembler Style Constants} + +The following table lists all of the disassembler styles that are +available. @value{GDBN} maps these style constants onto its style +settings (@pxref{Output Styling}). In some cases, several style +constants produce the same style settings, and thus will produce the +same visual effect on the screen. This could change in future +releases of @value{GDBN}, so care should be taken to select the +correct style constant to ensure correct output styling in future +releases of @value{GDBN}. + +@vtable @code +@vindex STYLE_TEXT +@item gdb.disassembler.STYLE_TEXT +This is the default style used by @value{GDBN} when styling +disassembler output. This style should be used for any parts of the +instruction that don't fit any of the other styles listed below. +@value{GDBN} styles text with this style using its default style. + +@vindex STYLE_MNEMONIC +@item gdb.disassembler.STYLE_MNEMONIC +This style is used for styling the primary instruction mnemonic, which +usually appears at, or near, the start of the disassembled instruction +string. + +@value{GDBN} styles text with this style using the @code{disassembler +mnemonic} style setting. + +@vindex STYLE_SUB_MNEMONIC +@item gdb.disassembler.STYLE_SUB_MNEMONIC +This style is used for styling any sub-mnemonics within a disassembled +instruction. A sub-mnemonic is any text within the instruction that +controls the function of the instruction, but which is disjoint from +the primary mnemonic (which will have styled @code{STYLE_MNEMONIC}). + +As an example, consider this AArch64 instruction: + +@smallexample +add w16, w7, w1, lsl #1 +@end smallexample + +@noindent +The @code{add} is the primary instruction mnemonic, and would be given +style @code{STYLE_MNEMONIC}, while @code{lsl} is the sub-mnemonic, and +would be given the style @code{STYLE_SUB_MNEMONIC}. + +@value{GDBN} styles text with this style using the @code{disassembler +mnemonic} style setting. + +@vindex STYLE_ASSEMBLER_DIRECTIVE +@item gdb.disassembler.STYLE_ASSEMBLER_DIRECTIVE +Sometimes a series of bytes doesn't decode to a valid instruction. In +this case the disassembler may choose to represent the result of +disassembling using an assembler directive, for example: + +@smallexample +.word 0x1234 +@end smallexample + +@noindent +In this case, the @code{.word} would be give the +@code{STYLE_ASSEMBLER_DIRECTIVE} style. An assembler directive is +similar to a mnemonic in many ways but is something that is not part +of the architecture's instruction set. + +@value{GDBN} styles text with this style using the @code{disassembler +mnemonic} style setting. + +@vindex STYLE_REGISTER +@item gdb.disassembler.STYLE_REGISTER +This style is used for styling any text that represents a register +name, or register number, within a disassembled instruction. + +@value{GDBN} styles text with this style using the @code{disassembler +register} style setting. + +@vindex STYLE_ADDRESS +@item gdb.disassembler.STYLE_ADDRESS +This style is used for styling numerical values that represent +absolute addresses within the disassembled instruction. + +When creating a @code{DisassemblerTextPart} with this style, you +should consider if a @code{DisassemblerAddressPart} would be more +appropriate. See @ref{Disassembler Styling Parts} for a description +of what each part offers. + +@value{GDBN} styles text with this style using the @code{disassembler +address} style setting. + +@vindex STYLE_ADDRESS_OFFSET +@item gdb.disassembler.STYLE_ADDRESS_OFFSET +This style is used for styling numerical values that represent offsets +to addresses within the disassembled instruction. A value is +considered an address offset when the instruction itself is going to +access memory, and the value is being used to offset which address is +accessed. + +For example, an architecture might have an instruction that loads from +memory using an address within a register. If that instruction also +allowed for an immediate offset to be encoded into the instruction, +this would be an address offset. Similarly, a branch instruction +might jump to an address in a register plus an address offset that is +encoded into the instruction. + +@value{GDBN} styles text with this style using the @code{disassembler +immediate} style setting. + +@vindex STYLE_IMMEDIATE +@item gdb.disassembler.STYLE_IMMEDIATE +Use @code{STYLE_IMMEDIATE} for any numerical values within a +disassembled instruction when those values are not addresses, address +offsets, or register numbers (The styles @code{STYLE_ADDRESS}, +@code{STYLE_ADDRESS_OFFSET}, or @code{STYLE_REGISTER} can be used in +those cases). + +@value{GDBN} styles text with this style using the @code{disassembler +immediate} style setting. + +@vindex STYLE_SYMBOL +@item gdb.disassembler.STYLE_SYMBOL +This style is used for styling the textual name of a symbol that is +included within a disassembled instruction. A symbol name is often +included next to an absolute address within a disassembled instruction +to make it easier for the user to understand what the address is +referring too. For example: + +@smallexample +call 0x401136 +@end smallexample + +@noindent +Here @code{foo} is the name of a symbol, and should be given the +@code{STYLE_SYMBOL} style. + +Adding symbols next to absolute addresses like this is handled +automatically by the @code{DisassemblerAddressPart} class +(@pxref{Disassembler Styling Parts}). + +@value{GDBN} styles text with this style using the @code{disassembler +symbol} style setting. + +@vindex STYLE_COMMENT_START +@item gdb.disassembler.STYLE_COMMENT_START +This style is used to start a line comment in the disassembly output. +Unlike other styles, which only apply to the single +@code{DisassemblerTextPiece} to which they are applied, the comment +style is sticky, and overrides the style of any further pieces within +this instruction. + +This means that, after a @code{STYLE_COMMENT_START} piece has been +seen, @value{GDBN} will apply the comment style until the end of the +line, ignoring the specific style within a piece. + +@value{GDBN} styles text with this style using the @code{disassembler +comment} style setting. +@end vtable + The following functions are also contained in the @code{gdb.disassembler} module: diff --git a/gdb/python/py-disasm.c b/gdb/python/py-disasm.c index f246a093014..85d936ee4a5 100644 --- a/gdb/python/py-disasm.c +++ b/gdb/python/py-disasm.c @@ -56,6 +56,49 @@ struct disasm_info_object extern PyTypeObject disasm_info_object_type CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_info_object"); +/* Implement gdb.disassembler.DisassembleAddressPart type. An object of + this type represents a small part of a disassembled instruction; a part + that is an address that should be printed using a call to GDB's + internal print_address function. */ + +struct disasm_addr_part_object +{ + PyObject_HEAD + + /* The address to be formatted. */ + bfd_vma address; + + /* A gdbarch. This is only needed in the case where the user asks for + the DisassemblerAddressPart to be converted to a string. When we + return this part to GDB within a DisassemblerResult then GDB will use + the gdbarch from the initial disassembly request. */ + struct gdbarch *gdbarch; +}; + +extern PyTypeObject disasm_addr_part_object_type + CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_addr_part_object"); + +/* Implement gdb.disassembler.DisassembleTextPart type. An object of + this type represents a small part of a disassembled instruction; a part + that is a piece of test along with an associated style. */ + +struct disasm_text_part_object +{ + PyObject_HEAD + + /* The string that is this part. */ + std::string *string; + + /* The style to use when displaying this part. */ + enum disassembler_style style; +}; + +extern PyTypeObject disasm_text_part_object_type + CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_text_part_object"); + +extern PyTypeObject disasm_part_object_type + CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("PyObject"); + /* Implement gdb.disassembler.DisassemblerResult type, an object that holds the result of calling the disassembler. This is mostly the length of the disassembled instruction (in bytes), and the string representing the @@ -68,9 +111,9 @@ struct disasm_result_object /* The length of the disassembled instruction in bytes. */ int length; - /* A buffer which, when allocated, holds the disassembled content of an - instruction. */ - string_file *content; + /* A vector containing all the parts of the disassembled instruction. + Each part will be a DisassemblerPart sub-class. */ + std::vector> *parts; }; extern PyTypeObject disasm_result_object_type @@ -88,7 +131,7 @@ static bool python_print_insn_enabled = false; placed in the application_data field of the disassemble_info that is used when we call gdbarch_print_insn. */ -struct gdbpy_disassembler : public gdb_printing_disassembler +struct gdbpy_disassembler : public gdb_disassemble_info { /* Constructor. */ gdbpy_disassembler (disasm_info_object *obj, PyObject *memory_source); @@ -109,6 +152,27 @@ struct gdbpy_disassembler : public gdb_printing_disassembler unsigned int len, struct disassemble_info *info) noexcept; + /* Callback used as the disassemble_info's fprintf_func callback. The + DIS_INFO pointer is a pointer to a gdbpy_disassembler object. */ + static int fprintf_func (void *dis_info, const char *format, ...) noexcept + ATTRIBUTE_PRINTF(2,3); + + /* Callback used as the disassemble_info's fprintf_styled_func callback. + The DIS_INFO pointer is a pointer to a gdbpy_disassembler. */ + static int fprintf_styled_func (void *dis_info, + enum disassembler_style style, + const char *format, ...) noexcept + ATTRIBUTE_PRINTF(3,4); + + /* Helper used by fprintf_func and fprintf_styled_func. This function + creates a new DisassemblerTextPart and adds it to the disassembler's + parts list. The actual disassembler is accessed through DIS_INFO, + which is a pointer to the gdbpy_disassembler object. */ + static int vfprintf_styled_func (void *dis_info, + enum disassembler_style style, + const char *format, va_list args) noexcept + ATTRIBUTE_PRINTF(3,0); + /* Return a reference to an optional that contains the address at which a memory error occurred. The optional will only have a value if a memory error actually occurred. */ @@ -118,9 +182,9 @@ struct gdbpy_disassembler : public gdb_printing_disassembler /* Return the content of the disassembler as a string. The contents are moved out of the disassembler, so after this call the disassembler contents have been reset back to empty. */ - std::string release () + std::vector> release () { - return m_string_file.release (); + return std::move (m_parts); } /* If there is a Python exception stored in this disassembler then @@ -147,8 +211,10 @@ struct gdbpy_disassembler : public gdb_printing_disassembler private: - /* Where the disassembler result is written. */ - string_file m_string_file; + /* The list of all the parts that make up this disassembled instruction. + This is populated as a result of the callbacks from libopcodes as the + instruction is disassembled. */ + std::vector> m_parts; /* The DisassembleInfo object we are disassembling for. */ disasm_info_object *m_disasm_info_object; @@ -286,6 +352,38 @@ disasmpy_set_memory_error_for_address (CORE_ADDR address) PyErr_SetObject (gdbpy_gdb_memory_error, address_obj); } +/* Create a new DisassemblerTextPart and return a gdbpy_ref wrapper for + the new object. STR is the string content of the part and STYLE is the + style to be used when GDB displays this part. */ + +static gdbpy_ref<> +make_disasm_text_part (std::string &&str, enum disassembler_style style) +{ + PyTypeObject *type = &disasm_text_part_object_type; + disasm_text_part_object *text_part + = (disasm_text_part_object *) type->tp_alloc (type, 0); + text_part->string = new std::string (str); + text_part->style = style; + + return gdbpy_ref<> ((PyObject *) text_part); +} + +/* Create a new DisassemblerAddressPart and return a gdbpy_ref wrapper for + the new object. GDBARCH is the architecture used when formatting the + address, and ADDRESS is the numerical address to be displayed. */ + +static gdbpy_ref<> +make_disasm_addr_part (struct gdbarch *gdbarch, CORE_ADDR address) +{ + PyTypeObject *type = &disasm_addr_part_object_type; + disasm_addr_part_object *addr_part + = (disasm_addr_part_object *) type->tp_alloc (type, 0); + addr_part->address = address; + addr_part->gdbarch = gdbarch; + + return gdbpy_ref<> ((PyObject *) addr_part); +} + /* Ensure that a gdb.disassembler.DisassembleInfo is valid. */ #define DISASMPY_DISASM_INFO_REQUIRE_VALID(Info) \ @@ -298,21 +396,135 @@ disasmpy_set_memory_error_for_address (CORE_ADDR address) } \ } while (0) -/* Initialise OBJ, a DisassemblerResult object with LENGTH and CONTENT. +/* Implement DisassembleInfo.text_part method. Creates and returns a new + DisassemblerTextPart object. */ + +static PyObject * +disasmpy_info_make_text_part (PyObject *self, PyObject *args, + PyObject *kwargs) +{ + disasm_info_object *obj = (disasm_info_object *) self; + DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); + + static const char *keywords[] = { "style", "string", NULL }; + int style_num; + const char *string; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "is", keywords, + &style_num, &string)) + return nullptr; + + if (style_num < 0 || style_num > ((int) dis_style_comment_start)) + { + PyErr_SetString (PyExc_ValueError, + _("Invalid disassembler style.")); + return nullptr; + } + + if (strlen (string) == 0) + { + PyErr_SetString (PyExc_ValueError, + _("String must not be empty.")); + return nullptr; + } + + gdbpy_ref<> text_part + = make_disasm_text_part (std::string (string), + (enum disassembler_style) style_num); + return text_part.release (); +} + +/* Implement DisassembleInfo.address_part method. Creates and returns a + new DisassemblerAddressPart object. */ + +static PyObject * +disasmpy_info_make_address_part (PyObject *self, PyObject *args, + PyObject *kwargs) +{ + disasm_info_object *obj = (disasm_info_object *) self; + DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); + + static const char *keywords[] = { "address", NULL }; + CORE_ADDR address; + PyObject *address_object; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "O", keywords, + &address_object)) + return nullptr; + + if (get_addr_from_python (address_object, &address) < 0) + return nullptr; + + return make_disasm_addr_part (obj->gdbarch, address).release (); +} + +/* Return a string representation of TEXT_PART. The returned string does + not include any styling. */ + +static std::string +disasmpy_part_to_string (const disasm_text_part_object *text_part) +{ + gdb_assert (text_part->string != nullptr); + return *(text_part->string); +} + +/* Return a string representation of ADDR_PART. The returned string does + not include any styling. */ + +static std::string +disasmpy_part_to_string (const disasm_addr_part_object *addr_part) +{ + string_file buf; + print_address (addr_part->gdbarch, addr_part->address, &buf); + return buf.release (); +} + +/* PARTS is a vector of Python objects, each is a sub-class of + DisassemblerPart. Create a string by concatenating the string + representation of each part, and return this new string. + + Converting an address part requires that we call back into GDB core, + which could throw an exception. As such, calls to this function should + be wrapped with a try/catch. */ + +static std::string +disasmpy_parts_list_to_string (const std::vector> &parts) +{ + std::string str; + for (auto p : parts) + { + if (Py_TYPE (p.get ()) == &disasm_text_part_object_type) + { + disasm_text_part_object *text_part + = (disasm_text_part_object *) p.get (); + str += disasmpy_part_to_string (text_part); + } + else + { + gdb_assert (Py_TYPE (p.get ()) == &disasm_addr_part_object_type); + + disasm_addr_part_object *addr_part + = (disasm_addr_part_object *) p.get (); + str += disasmpy_part_to_string (addr_part); + } + } + + return str; +} + +/* Initialise OBJ, a DisassemblerResult object with LENGTH and PARTS. OBJ might already have been initialised, in which case any existing - content should be discarded before the new CONTENT is moved in. */ + content should be discarded before the new PARTS are moved in. */ static void disasmpy_init_disassembler_result (disasm_result_object *obj, int length, - std::string content) + std::vector> &&parts) { - if (obj->content == nullptr) - obj->content = new string_file; + if (obj->parts == nullptr) + obj->parts = new std::vector>; else - obj->content->clear (); + obj->parts->clear (); obj->length = length; - *(obj->content) = std::move (content); + *(obj->parts) = std::move (parts); } /* Implement gdb.disassembler.builtin_disassemble(). Calls back into GDB's @@ -375,9 +587,19 @@ disasmpy_builtin_disassemble (PyObject *self, PyObject *args, PyObject *kw) } else { - std::string content = disassembler.release (); - if (!content.empty ()) - PyErr_SetString (gdbpy_gdberror_exc, content.c_str ()); + auto content = disassembler.release (); + std::string str; + + try + { + str = disasmpy_parts_list_to_string (content); + } + catch (const gdb_exception &except) + { + GDB_PY_HANDLE_EXCEPTION (except); + } + if (!str.empty ()) + PyErr_SetString (gdbpy_gdberror_exc, str.c_str ()); else PyErr_SetString (gdbpy_gdberror_exc, _("Unknown disassembly error.")); @@ -393,10 +615,10 @@ disasmpy_builtin_disassemble (PyObject *self, PyObject *args, PyObject *kw) gdb_assert (!disassembler.memory_error_address ().has_value ()); /* Create a DisassemblerResult containing the results. */ - std::string content = disassembler.release (); PyTypeObject *type = &disasm_result_object_type; gdbpy_ref res ((disasm_result_object *) type->tp_alloc (type, 0)); + auto content = disassembler.release (); disasmpy_init_disassembler_result (res.get (), length, std::move (content)); return reinterpret_cast (res.release ()); } @@ -510,6 +732,88 @@ disasmpy_info_progspace (PyObject *self, void *closure) return pspace_to_pspace_object (obj->program_space).release (); } +/* Helper function called when the libopcodes disassembler produces some + output. FORMAT and ARGS are used to create a string which GDB will + display using STYLE. The string is either added as a new + DisassemblerTextPart to the list of parts being built in the current + gdbpy_disassembler object (accessed through DIS_INFO). Or, if the last + part in the gdbpy_disassembler is a text part in the same STYLE, then + the new string is appended to the previous part. + + The merging behaviour make the Python API a little more user friendly, + some disassemblers produce their output character at a time, there's no + particular reason for this, it's just how they are implemented. By + merging parts with the same style we make it easier for the user to + analyse the disassembler output. */ + +int +gdbpy_disassembler::vfprintf_styled_func (void *dis_info, + enum disassembler_style style, + const char *format, + va_list args) noexcept +{ + gdb_disassemble_info *di = (gdb_disassemble_info *) dis_info; + gdbpy_disassembler *dis + = gdb::checked_static_cast (di); + + if (!dis->m_parts.empty () + && Py_TYPE (dis->m_parts.back ().get ()) == &disasm_text_part_object_type + && (((disasm_text_part_object *) dis->m_parts.back ().get ())->style + == style)) + { + std::string *string + = ((disasm_text_part_object *) dis->m_parts.back ().get ())->string; + string_vappendf (*string, format, args); + } + else + { + std::string str = string_vprintf (format, args); + if (str.size () > 0) + { + gdbpy_ref<> text_part + = make_disasm_text_part (std::move (str), style); + dis->m_parts.emplace_back (std::move (text_part)); + } + } + + /* Something non -ve. */ + return 0; +} + +/* Disassembler callback for architectures where libopcodes doesn't + created styled output. In these cases we format all the output using + the (default) text style. */ + +int +gdbpy_disassembler::fprintf_func (void *dis_info, + const char *format, ...) noexcept +{ + va_list args; + va_start (args, format); + vfprintf_styled_func (dis_info, dis_style_text, format, args); + va_end (args); + + /* Something non -ve. */ + return 0; +} + +/* Disassembler callback for architectures where libopcodes does create + styled output. Just creates a new text part with the given STYLE. */ + +int +gdbpy_disassembler::fprintf_styled_func (void *dis_info, + enum disassembler_style style, + const char *format, ...) noexcept +{ + va_list args; + va_start (args, format); + vfprintf_styled_func (dis_info, style, format, args); + va_end (args); + + /* Something non -ve. */ + return 0; +} + /* This implements the disassemble_info read_memory_func callback and is called from the libopcodes disassembler when the disassembler wants to read memory. @@ -615,11 +919,24 @@ disasmpy_result_str (PyObject *self) { disasm_result_object *obj = (disasm_result_object *) self; - gdb_assert (obj->content != nullptr); - gdb_assert (obj->content->size () > 0); + /* These conditions are all enforced when the DisassemblerResult object + is created. */ + gdb_assert (obj->parts != nullptr); + gdb_assert (obj->parts->size () > 0); gdb_assert (obj->length > 0); - return PyUnicode_Decode (obj->content->c_str (), - obj->content->size (), + + std::string str; + + try + { + str = disasmpy_parts_list_to_string (*obj->parts); + } + catch (const gdb_exception &except) + { + GDB_PY_HANDLE_EXCEPTION (except); + } + + return PyUnicode_Decode (str.c_str (), str.size (), host_charset (), nullptr); } @@ -642,6 +959,39 @@ disasmpy_result_string (PyObject *self, void *closure) return disasmpy_result_str (self); } +/* Implement DisassemblerResult.parts method. Returns a list of all the + parts that make up this result. There should always be at least one + part, so the returned list should never be empty. */ + +static PyObject * +disasmpy_result_parts (PyObject *self, void *closure) +{ + disasm_result_object *obj = (disasm_result_object *) self; + + /* These conditions are all enforced when the DisassemblerResult object + is created. */ + gdb_assert (obj->parts != nullptr); + gdb_assert (obj->parts->size () > 0); + gdb_assert (obj->length > 0); + + gdbpy_ref<> result_list (PyList_New (obj->parts->size ())); + if (result_list == nullptr) + return nullptr; + Py_ssize_t idx = 0; + for (auto p : *obj->parts) + { + gdbpy_ref<> item = gdbpy_ref<>::new_reference (p.get ()); + PyList_SET_ITEM (result_list.get (), idx, item.release ()); + ++idx; + } + + /* This should follow naturally from the obj->parts list being + non-empty. */ + gdb_assert (PyList_Size (result_list.get()) > 0); + + return result_list.release (); +} + /* Implement DisassemblerResult.__init__. Takes two arguments, an integer, the length in bytes of the disassembled instruction, and a string, the disassembled content of the instruction. */ @@ -649,11 +999,12 @@ disasmpy_result_string (PyObject *self, void *closure) static int disasmpy_result_init (PyObject *self, PyObject *args, PyObject *kwargs) { - static const char *keywords[] = { "length", "string", NULL }; + static const char *keywords[] = { "length", "string", "parts", NULL }; int length; - const char *string; - if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "is", keywords, - &length, &string)) + const char *string = nullptr; + PyObject *parts_list = nullptr; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "i|zO", keywords, + &length, &string, &parts_list)) return -1; if (length <= 0) @@ -663,17 +1014,85 @@ disasmpy_result_init (PyObject *self, PyObject *args, PyObject *kwargs) return -1; } - if (strlen (string) == 0) + if (parts_list == Py_None) + parts_list = nullptr; + + if (string != nullptr && parts_list != nullptr) { - PyErr_SetString (PyExc_ValueError, - _("String must not be empty.")); + PyErr_Format (PyExc_ValueError, + _("Cannot use 'string' and 'parts' when creating %s."), + Py_TYPE (self)->tp_name); return -1; } - disasm_result_object *obj = (disasm_result_object *) self; - disasmpy_init_disassembler_result (obj, length, std::string (string)); + if (string != nullptr) + { + if (strlen (string) == 0) + { + PyErr_SetString (PyExc_ValueError, + _("String must not be empty.")); + return -1; + } + + disasm_result_object *obj = (disasm_result_object *) self; + std::vector> content; + gdbpy_ref<> text_part + = make_disasm_text_part (std::string (string), dis_style_text); + content.emplace_back (text_part.release ()); + disasmpy_init_disassembler_result (obj, length, std::move (content)); + } + else + { + if (!PySequence_Check (parts_list)) + { + PyErr_SetString (PyExc_TypeError, + _("'parts' argument is not a sequence")); + return -1; + } + + Py_ssize_t parts_count = PySequence_Size (parts_list); + if (parts_count <= 0) + { + PyErr_SetString (PyExc_ValueError, + _("'parts' list must not be empty.")); + return -1; + } + + disasm_result_object *obj = (disasm_result_object *) self; + std::vector> content (parts_count); + + struct gdbarch *gdbarch = nullptr; + for (Py_ssize_t i = 0; i < parts_count; ++i) + { + gdbpy_ref<> part (PySequence_GetItem (parts_list, i)); + + if (part == nullptr) + return -1; + + if (Py_TYPE (part.get ()) == &disasm_addr_part_object_type) + { + disasm_addr_part_object *addr_part + = (disasm_addr_part_object *) part.get (); + gdb_assert (addr_part->gdbarch != nullptr); + if (gdbarch == nullptr) + gdbarch = addr_part->gdbarch; + else if (addr_part->gdbarch != gdbarch) + { + PyErr_SetString (PyExc_ValueError, + _("Inconsistent gdb.Architectures used " + "in 'parts' sequence.")); + return -1; + } + } + + content[i] = std::move (part); + } + + disasmpy_init_disassembler_result (obj, length, std::move (content)); + } return 0; + } /* Implement __repr__ for the DisassemblerResult type. */ @@ -683,12 +1102,12 @@ disasmpy_result_repr (PyObject *self) { disasm_result_object *obj = (disasm_result_object *) self; - gdb_assert (obj->content != nullptr); + gdb_assert (obj->parts != nullptr); - return PyUnicode_FromFormat ("<%s length=%d string=\"%s\">", + return PyUnicode_FromFormat ("<%s length=%d string=\"%U\">", Py_TYPE (obj)->tp_name, obj->length, - obj->content->string ().c_str ()); + disasmpy_result_str (self)); } /* Implement memory_error_func callback for disassemble_info. Extract the @@ -712,16 +1131,22 @@ gdbpy_disassembler::print_address_func (bfd_vma addr, { gdbpy_disassembler *dis = static_cast (info->application_data); - print_address (dis->arch (), addr, dis->stream ()); + + gdbpy_ref<> addr_part + = make_disasm_addr_part (dis->arch (), addr); + dis->m_parts.emplace_back (std::move (addr_part)); } /* constructor. */ gdbpy_disassembler::gdbpy_disassembler (disasm_info_object *obj, PyObject *memory_source) - : gdb_printing_disassembler (obj->gdbarch, &m_string_file, - read_memory_func, memory_error_func, - print_address_func), + : gdb_disassemble_info (obj->gdbarch, + read_memory_func, + memory_error_func, + print_address_func, + fprintf_func, + fprintf_styled_func), m_disasm_info_object (obj), m_memory_source (memory_source) { /* Nothing. */ } @@ -932,20 +1357,39 @@ gdbpy_print_insn (struct gdbarch *gdbarch, CORE_ADDR memaddr, return gdb::optional (-1); } - /* Validate the text of the disassembled instruction. */ - gdb_assert (result_obj->content != nullptr); - std::string string (std::move (result_obj->content->release ())); - if (strlen (string.c_str ()) == 0) + /* It is impossible to create a DisassemblerResult object with an empty + parts list. We know that each part results in a non-empty string, so + we know that the instruction disassembly will not be the empty + string. */ + gdb_assert (result_obj->parts->size () > 0); + + /* Now print out the parts that make up this instruction. */ + for (auto &p : *result_obj->parts) { - PyErr_SetString (PyExc_ValueError, - _("String attribute must not be empty.")); - gdbpy_print_stack (); - return gdb::optional (-1); + if (Py_TYPE (p.get ()) == &disasm_text_part_object_type) + { + disasm_text_part_object *text_part + = (disasm_text_part_object *) p.get (); + gdb_assert (text_part->string != nullptr); + info->fprintf_styled_func (info->stream, text_part->style, + "%s", text_part->string->c_str ()); + } + else + { + gdb_assert (Py_TYPE (p.get ()) == &disasm_addr_part_object_type); + disasm_addr_part_object *addr_part + = (disasm_addr_part_object *) p.get (); + /* A DisassemblerAddressPart can only be created by calling a + method on DisassembleInfo, and the gdbarch is copied from the + DisassembleInfo into the DisassemblerAddressPart. As the + DisassembleInfo has its gdbarch initialised from GDBARCH in + this scope, and this architecture can't be changed, then the + following assert should hold. */ + gdb_assert (addr_part->gdbarch == gdbarch); + info->print_address_func (addr_part->address, info); + } } - /* Print the disassembled instruction back to core GDB, and return the - length of the disassembled instruction. */ - info->fprintf_func (info->stream, "%s", string.c_str ()); return gdb::optional (length); } @@ -956,10 +1400,143 @@ static void disasmpy_dealloc_result (PyObject *self) { disasm_result_object *obj = (disasm_result_object *) self; - delete obj->content; + delete obj->parts; Py_TYPE (self)->tp_free (self); } +/* The tp_init callback for the DisassemblerPart type. This just raises an + exception, which prevents the user from creating objects of this type. + Instead the user should create instances of a sub-class. */ + +static int +disasmpy_part_init (PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyErr_SetString (PyExc_RuntimeError, + _("Cannot create instances of DisassemblerPart.")); + return -1; +} + +/* Return a string representing STYLE. The returned string is used as a + constant defined in the gdb.disassembler module. */ + +static const char * +get_style_name (enum disassembler_style style) +{ + switch (style) + { + case dis_style_text: return "STYLE_TEXT"; + case dis_style_mnemonic: return "STYLE_MNEMONIC"; + case dis_style_sub_mnemonic: return "STYLE_SUB_MNEMONIC"; + case dis_style_assembler_directive: return "STYLE_ASSEMBLER_DIRECTIVE"; + case dis_style_register: return "STYLE_REGISTER"; + case dis_style_immediate: return "STYLE_IMMEDIATE"; + case dis_style_address: return "STYLE_ADDRESS"; + case dis_style_address_offset: return "STYLE_ADDRESS_OFFSET"; + case dis_style_symbol: return "STYLE_SYMBOL"; + case dis_style_comment_start: return "STYLE_COMMENT_START"; + } + + gdb_assert_not_reached ("unknown disassembler style"); +} + +/* Implement DisassemblerTextPart.__repr__ method. */ + +static PyObject * +disasmpy_text_part_repr (PyObject *self) +{ + disasm_text_part_object *obj = (disasm_text_part_object *) self; + + gdb_assert (obj->string != nullptr); + + return PyUnicode_FromFormat ("<%s string='%s', style='%s'>", + Py_TYPE (obj)->tp_name, + obj->string->c_str (), + get_style_name (obj->style)); +} + +/* Implement DisassemblerTextPart.__str__ attribute. */ + +static PyObject * +disasmpy_text_part_str (PyObject *self) +{ + disasm_text_part_object *obj = (disasm_text_part_object *) self; + + return PyUnicode_Decode (obj->string->c_str (), obj->string->size (), + host_charset (), nullptr); +} + +/* Implement DisassemblerTextPart.string attribute. */ + +static PyObject * +disasmpy_text_part_string (PyObject *self, void *closure) +{ + return disasmpy_text_part_str (self); +} + +/* Implement DisassemblerTextPart.style attribute. */ + +static PyObject * +disasmpy_text_part_style (PyObject *self, void *closure) +{ + disasm_text_part_object *obj = (disasm_text_part_object *) self; + + LONGEST style_val = (LONGEST) obj->style; + return gdb_py_object_from_longest (style_val).release (); +} + +/* Implement DisassemblerAddressPart.__repr__ method. */ + +static PyObject * +disasmpy_addr_part_repr (PyObject *self) +{ + disasm_addr_part_object *obj = (disasm_addr_part_object *) self; + + return PyUnicode_FromFormat ("<%s address='%s'>", + Py_TYPE (obj)->tp_name, + core_addr_to_string_nz (obj->address)); +} + +/* Implement DisassemblerAddressPart.__str__ attribute. */ + +static PyObject * +disasmpy_addr_part_str (PyObject *self) +{ + disasm_addr_part_object *obj = (disasm_addr_part_object *) self; + + std::string str; + try + { + string_file buf; + print_address (obj->gdbarch, obj->address, &buf); + str = buf.release (); + } + catch (const gdb_exception &except) + { + GDB_PY_HANDLE_EXCEPTION (except); + } + + return PyUnicode_Decode (str.c_str (), str.size (), + host_charset (), nullptr); +} + +/* Implement DisassemblerAddressPart.string attribute. */ + +static PyObject * +disasmpy_addr_part_string (PyObject *self, void *closure) +{ + return disasmpy_addr_part_str (self); +} + +/* Implement DisassemblerAddressPart.address attribute. */ + +static PyObject * +disasmpy_addr_part_address (PyObject *self, void *closure) +{ + disasm_addr_part_object *obj = (disasm_addr_part_object *) self; + + return gdb_py_object_from_longest (obj->address).release (); +} + /* The get/set attributes of the gdb.disassembler.DisassembleInfo type. */ static gdb_PyGetSetDef disasm_info_object_getset[] = { @@ -982,6 +1559,14 @@ Read LEN octets for the instruction to disassemble." }, { "is_valid", disasmpy_info_is_valid, METH_NOARGS, "is_valid () -> Boolean.\n\ Return true if this DisassembleInfo is valid, false if not." }, + { "text_part", (PyCFunction) disasmpy_info_make_text_part, + METH_VARARGS | METH_KEYWORDS, + "text_part (STRING, STYLE) -> DisassemblerTextPart\n\ +Create a new text part, with contents STRING styled with STYLE." }, + { "address_part", (PyCFunction) disasmpy_info_make_address_part, + METH_VARARGS | METH_KEYWORDS, + "address_part (ADDRESS) -> DisassemblerAddressPart\n\ +Create a new address part representing ADDRESS." }, {nullptr} /* Sentinel */ }; @@ -992,6 +1577,28 @@ static gdb_PyGetSetDef disasm_result_object_getset[] = { "Length of the disassembled instruction.", nullptr }, { "string", disasmpy_result_string, nullptr, "String representing the disassembled instruction.", nullptr }, + { "parts", disasmpy_result_parts, nullptr, + "List of all the separate disassembly parts", nullptr }, + { nullptr } /* Sentinel */ +}; + +/* The get/set attributes of the gdb.disassembler.DisassemblerTextPart type. */ + +static gdb_PyGetSetDef disasmpy_text_part_getset[] = { + { "string", disasmpy_text_part_string, nullptr, + "String representing a text part.", nullptr }, + { "style", disasmpy_text_part_style, nullptr, + "The style of this text part.", nullptr }, + { nullptr } /* Sentinel */ +}; + +/* The get/set attributes of the gdb.disassembler.DisassemblerAddressPart type. */ + +static gdb_PyGetSetDef disasmpy_addr_part_getset[] = { + { "string", disasmpy_addr_part_string, nullptr, + "String representing an address part.", nullptr }, + { "address", disasmpy_addr_part_address, nullptr, + "The address of this address part.", nullptr }, { nullptr } /* Sentinel */ }; @@ -1046,6 +1653,13 @@ gdbpy_initialize_disasm () PyObject *dict = PyImport_GetModuleDict (); PyDict_SetItemString (dict, "_gdb.disassembler", gdb_disassembler_module); + for (int i = 0; i <= (int) dis_style_comment_start; ++i) + { + const char *style_name = get_style_name ((enum disassembler_style) i); + if (PyModule_AddIntConstant (gdb_disassembler_module, style_name, i) < 0) + return -1; + } + disasm_info_object_type.tp_new = PyType_GenericNew; if (PyType_Ready (&disasm_info_object_type) < 0) return -1; @@ -1062,6 +1676,32 @@ gdbpy_initialize_disasm () (PyObject *) &disasm_result_object_type) < 0) return -1; + disasm_part_object_type.tp_new = PyType_GenericNew; + if (PyType_Ready (&disasm_part_object_type) < 0) + return -1; + + if (gdb_pymodule_addobject (gdb_disassembler_module, "DisassemblerPart", + (PyObject *) &disasm_part_object_type) < 0) + return -1; + + disasm_addr_part_object_type.tp_new = PyType_GenericNew; + if (PyType_Ready (&disasm_addr_part_object_type) < 0) + return -1; + + if (gdb_pymodule_addobject (gdb_disassembler_module, + "DisassemblerAddressPart", + (PyObject *) &disasm_addr_part_object_type) < 0) + return -1; + + disasm_text_part_object_type.tp_new = PyType_GenericNew; + if (PyType_Ready (&disasm_text_part_object_type) < 0) + return -1; + + if (gdb_pymodule_addobject (gdb_disassembler_module, + "DisassemblerTextPart", + (PyObject *) &disasm_text_part_object_type) < 0) + return -1; + return 0; } @@ -1152,3 +1792,132 @@ PyTypeObject disasm_result_object_type = { disasmpy_result_init, /* tp_init */ 0, /* tp_alloc */ }; + +/* Describe the gdb.disassembler.DisassemblerPart type. This type exists + only as an abstract base-class for the various part sub-types. The + init method for this type throws an error. As such we don't both to + provide a tp_repr method for this parent class. */ + +PyTypeObject disasm_part_object_type = { + PyVarObject_HEAD_INIT (nullptr, 0) + "gdb.disassembler.DisassemblerPart", /*tp_name*/ + sizeof (PyObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "GDB object, representing part of a disassembled instruction", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + disasmpy_part_init, /* tp_init */ + 0, /* tp_alloc */ +}; + +/* Describe the gdb.disassembler.DisassemblerTextPart type. */ + +PyTypeObject disasm_text_part_object_type = { + PyVarObject_HEAD_INIT (nullptr, 0) + "gdb.disassembler.DisassemblerTextPart", /*tp_name*/ + sizeof (disasm_text_part_object_type), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + disasmpy_text_part_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + disasmpy_text_part_str, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "GDB object, representing a text part of an instruction", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + disasmpy_text_part_getset, /* tp_getset */ + &disasm_part_object_type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ +}; + +/* Describe the gdb.disassembler.DisassemblerAddressPart type. */ + +PyTypeObject disasm_addr_part_object_type = { + PyVarObject_HEAD_INIT (nullptr, 0) + "gdb.disassembler.DisassemblerAddressPart", /*tp_name*/ + sizeof (disasm_addr_part_object), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + disasmpy_addr_part_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + disasmpy_addr_part_str, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "GDB object, representing an address part of an instruction", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + disasmpy_addr_part_getset, /* tp_getset */ + &disasm_part_object_type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ +}; diff --git a/gdb/testsuite/gdb.python/py-disasm.exp b/gdb/testsuite/gdb.python/py-disasm.exp index 5cbf02fc9fe..304393f71ab 100644 --- a/gdb/testsuite/gdb.python/py-disasm.exp +++ b/gdb/testsuite/gdb.python/py-disasm.exp @@ -69,6 +69,12 @@ set nop "(nop|nop\t0)" set unknown_error_pattern "unknown disassembler error \\(error = -1\\)" set addr_pattern "\r\n=> ${curr_pc_pattern} <\[^>\]+>:\\s+" set base_pattern "${addr_pattern}${nop}" + +# Helper proc to format a Python exception of TYPE with MSG. +proc make_exception_pattern { type msg } { + return "${::addr_pattern}Python Exception : $msg\r\n\r\n${::unknown_error_pattern}" +} + set test_plans \ [list \ [list "" "${base_pattern}\r\n.*"] \ @@ -90,13 +96,40 @@ set test_plans \ [list "RethrowMemoryErrorDisassembler" "${addr_pattern}Cannot access memory at address $hex"] \ [list "ReadMemoryMemoryErrorDisassembler" "${addr_pattern}Cannot access memory at address ${curr_pc_pattern}"] \ [list "ReadMemoryGdbErrorDisassembler" "${addr_pattern}read_memory raised GdbError\r\n${unknown_error_pattern}"] \ - [list "ReadMemoryRuntimeErrorDisassembler" "${addr_pattern}Python Exception : read_memory raised RuntimeError\r\n\r\n${unknown_error_pattern}"] \ + [list "ReadMemoryRuntimeErrorDisassembler" \ + [make_exception_pattern "RuntimeError" \ + "read_memory raised RuntimeError"]] \ [list "ReadMemoryCaughtMemoryErrorDisassembler" "${addr_pattern}${nop}\r\n.*"] \ [list "ReadMemoryCaughtGdbErrorDisassembler" "${addr_pattern}${nop}\r\n.*"] \ [list "ReadMemoryCaughtRuntimeErrorDisassembler" "${addr_pattern}${nop}\r\n.*"] \ - [list "MemorySourceNotABufferDisassembler" "${addr_pattern}Python Exception : Result from read_memory is not a buffer\r\n\r\n${unknown_error_pattern}"] \ - [list "MemorySourceBufferTooLongDisassembler" "${addr_pattern}Python Exception : Buffer returned from read_memory is sized $decimal instead of the expected $decimal\r\n\r\n${unknown_error_pattern}"] \ - [list "ResultOfWrongType" "${addr_pattern}Python Exception : Result is not a DisassemblerResult.\r\n.*"]] + [list "MemorySourceNotABufferDisassembler" \ + [make_exception_pattern "TypeError" \ + "Result from read_memory is not a buffer"]] \ + [list "MemorySourceBufferTooLongDisassembler" \ + [make_exception_pattern "ValueError" \ + "Buffer returned from read_memory is sized $decimal instead of the expected $decimal"]] \ + [list "ResultOfWrongType" \ + [make_exception_pattern "TypeError" \ + "Result is not a DisassemblerResult."]] \ + [list "ErrorCreatingTextPart_NoArgs" \ + [make_exception_pattern "TypeError" \ + "function missing required argument 'style' \\(pos 1\\)"]] \ + [list "ErrorCreatingAddressPart_NoArgs" \ + [make_exception_pattern "TypeError" \ + "function missing required argument 'address' \\(pos 1\\)"]] \ + [list "ErrorCreatingTextPart_NoString" \ + [make_exception_pattern "TypeError" \ + "function missing required argument 'string' \\(pos 2\\)"]] \ + [list "ErrorCreatingTextPart_NoStyle" \ + [make_exception_pattern "TypeError" \ + "function missing required argument 'style' \\(pos 1\\)"]] \ + [list "All_Text_Part_Styles" "${addr_pattern}p1p2p3p4p5p6p7p8p9p10\r\n.*"] \ + [list "ErrorCreatingTextPart_StringAndParts" \ + [make_exception_pattern "ValueError" \ + "Cannot use 'string' and 'parts' when creating gdb\\.disassembler\\.DisassemblerResult\\."]] \ + [list "Build_Result_Using_All_Parts" \ + "${addr_pattern}fake\treg, ${curr_pc_pattern}(?: <\[^>\]+>)?, 123\r\n.*"] \ + ] # Now execute each test plan. foreach plan $test_plans { @@ -216,13 +249,48 @@ with_test_prefix "Bad DisassembleInfo creation" { "Error while executing Python code\\."] } -# Test that we can't inherit from the DisassemblerResult class. -gdb_test_multiline "Sub-class a breakpoint" \ - "python" "" \ - "class InvalidResultType(gdb.disassembler.DisassemblerResult):" "" \ - " def __init__(self):" "" \ - " pass" "" \ - "end" \ +# Some of the disassembler related types should not be sub-typed, +# check these now. +with_test_prefix "check inheritance" { + foreach_with_prefix type {gdb.disassembler.DisassemblerResult \ + gdb.disassembler.DisassemblerPart + gdb.disassembler.DisassemblerTextPart \ + gdb.disassembler.DisassemblerAddressPart} { + set type_ptn [string_to_regexp $type] + gdb_test_multiline "Sub-class a breakpoint" \ + "python" "" \ + "class InvalidResultType($type):" "" \ + " def __init__(self):" "" \ + " pass" "" \ + "end" \ + [multi_line \ + "TypeError: type '${type_ptn}' is not an acceptable base type" \ + "Error while executing Python code\\."] + } +} + + +# Test some error conditions when creating a DisassemblerResult object. +gdb_test "python result = gdb.disassembler.DisassemblerResult()" \ [multi_line \ - "TypeError: type 'gdb\\.disassembler\\.DisassemblerResult' is not an acceptable base type" \ - "Error while executing Python code\\."] + "TypeError: function missing required argument 'length' \\(pos 1\\)" \ + "Error while executing Python code\\."] \ + "try to create a DisassemblerResult without a length argument" + +foreach len {0 -1} { + gdb_test "python result = gdb.disassembler.DisassemblerResult($len)" \ + [multi_line \ + "ValueError: Length must be greater than 0\\." \ + "Error while executing Python code\\."] \ + "try to create a DisassemblerResult with length $len" +} + +# Check we can't directly create DisassemblerTextPart or +# DisassemblerAddressPart objects. +foreach type {DisassemblerTextPart DisassemblerAddressPart} { + gdb_test "python result = gdb.disassembler.${type}()" \ + [multi_line \ + "RuntimeError: Cannot create instances of DisassemblerPart\\." \ + "Error while executing Python code\\."] \ + "try to create an instance of ${type}" +} diff --git a/gdb/testsuite/gdb.python/py-disasm.py b/gdb/testsuite/gdb.python/py-disasm.py index 17a7e752935..ec6b0e8deca 100644 --- a/gdb/testsuite/gdb.python/py-disasm.py +++ b/gdb/testsuite/gdb.python/py-disasm.py @@ -25,6 +25,26 @@ from gdb.disassembler import Disassembler, DisassemblerResult current_pc = None +def builtin_disassemble_wrapper(info): + result = gdb.disassembler.builtin_disassemble(info) + assert result.length > 0 + assert len(result.parts) > 0 + tmp_str = "" + for p in result.parts: + assert(p.string == str(p)) + tmp_str += p.string + assert tmp_str == result.string + return result + + +def check_building_disassemble_result(): + """Check that we can create DisassembleResult objects correctly.""" + + result = gdb.disassembler.DisassemblerResult() + + print("PASS") + + def is_nop(s): return s == "nop" or s == "nop\t0" @@ -70,7 +90,7 @@ class ShowInfoRepr(TestDisassembler): def disassemble(self, info): comment = "\t## " + repr(info) - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) string = result.string + comment length = result.length return DisassemblerResult(length=length, string=string) @@ -94,7 +114,7 @@ class ShowInfoSubClassRepr(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) comment = "\t## " + repr(info) - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) string = result.string + comment length = result.length return DisassemblerResult(length=length, string=string) @@ -106,7 +126,7 @@ class ShowResultRepr(TestDisassembler): output.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) comment = "\t## " + repr(result) string = result.string + comment length = result.length @@ -118,11 +138,11 @@ class ShowResultStr(TestDisassembler): resulting string in a comment within the disassembler output.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) comment = "\t## " + str(result) string = result.string + comment length = result.length - return DisassemblerResult(length=length, string=string) + return DisassemblerResult(length=length, string=string, parts=None) class GlobalPreInfoDisassembler(TestDisassembler): @@ -138,7 +158,7 @@ class GlobalPreInfoDisassembler(TestDisassembler): if not isinstance(ar, gdb.Architecture): raise gdb.GdbError("invalid architecture type") - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) text = result.string + "\t## ad = 0x%x, ar = %s" % (ad, ar.name()) return DisassemblerResult(result.length, text) @@ -148,7 +168,7 @@ class GlobalPostInfoDisassembler(TestDisassembler): """Check the attributes of DisassembleInfo after disassembly has occurred.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) ad = info.address ar = info.architecture @@ -169,7 +189,7 @@ class GlobalReadDisassembler(TestDisassembler): adds them as a comment to the disassembler output.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) len = result.length str = "" for o in range(len): @@ -187,7 +207,7 @@ class GlobalAddrDisassembler(TestDisassembler): """Check the gdb.format_address method.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) arch = info.architecture addr = info.address program_space = info.progspace @@ -214,7 +234,7 @@ class GdbErrorLateDisassembler(TestDisassembler): """Raise a GdbError after calling the builtin disassembler.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) raise gdb.GdbError("GdbError after builtin disassembler") @@ -222,7 +242,7 @@ class RuntimeErrorLateDisassembler(TestDisassembler): """Raise a RuntimeError after calling the builtin disassembler.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) raise RuntimeError("RuntimeError after builtin disassembler") @@ -235,7 +255,7 @@ class MemoryErrorEarlyDisassembler(TestDisassembler): info.read_memory(1, -info.address + 2) except gdb.MemoryError: tag = "## AFTER ERROR" - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) text = result.string + "\t" + tag return DisassemblerResult(result.length, text) @@ -245,7 +265,7 @@ class MemoryErrorLateDisassembler(TestDisassembler): before we return a result.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) # The following read will throw an error. info.read_memory(1, -info.address + 2) return DisassemblerResult(1, "BAD") @@ -282,7 +302,7 @@ class TaggingDisassembler(TestDisassembler): self._tag = tag def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) text = result.string + "\t## tag = %s" % self._tag return DisassemblerResult(result.length, text) @@ -307,7 +327,7 @@ class GlobalCachingDisassembler(TestDisassembler): and cache the DisassembleInfo so that it is not garbage collected.""" GlobalCachingDisassembler.cached_insn_disas.append(info) GlobalCachingDisassembler.cached_insn_disas.append(self.MyInfo(info)) - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) text = result.string + "\t## CACHED" return DisassemblerResult(result.length, text) @@ -373,7 +393,7 @@ class ReadMemoryMemoryErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class ReadMemoryGdbErrorDisassembler(TestDisassembler): @@ -389,7 +409,7 @@ class ReadMemoryGdbErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class ReadMemoryRuntimeErrorDisassembler(TestDisassembler): @@ -405,7 +425,7 @@ class ReadMemoryRuntimeErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class ReadMemoryCaughtMemoryErrorDisassembler(TestDisassembler): @@ -422,7 +442,7 @@ class ReadMemoryCaughtMemoryErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) try: - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) except gdb.MemoryError: return None @@ -441,7 +461,7 @@ class ReadMemoryCaughtGdbErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) try: - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) except gdb.GdbError as e: if e.args[0] == "exception message": return None @@ -462,7 +482,7 @@ class ReadMemoryCaughtRuntimeErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) try: - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) except RuntimeError as e: if e.args[0] == "exception message": return None @@ -479,7 +499,7 @@ class MemorySourceNotABufferDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class MemorySourceBufferTooLongDisassembler(TestDisassembler): @@ -501,7 +521,101 @@ class MemorySourceBufferTooLongDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) + + +class ErrorCreatingTextPart_NoArgs(TestDisassembler): + """Try to create a DisassemblerTextPart with no arguments.""" + + def disassemble(self, info): + part = info.text_part() + return None + + +class ErrorCreatingAddressPart_NoArgs(TestDisassembler): + """Try to create a DisassemblerAddressPart with no arguments.""" + + def disassemble(self, info): + part = info.address_part() + return None + + +class ErrorCreatingTextPart_NoString(TestDisassembler): + """Try to create a DisassemblerTextPart with no string argument.""" + + def disassemble(self, info): + part = info.text_part(gdb.disassembler.STYLE_TEXT) + return None + + +class ErrorCreatingTextPart_NoStyle(TestDisassembler): + """Try to create a DisassemblerTextPart with no string argument.""" + + def disassemble(self, info): + part = info.text_part(string="abc") + return None + + +class ErrorCreatingTextPart_StringAndParts(TestDisassembler): + """Try to create a DisassemblerTextPart with both a string and a parts list.""" + + def disassemble(self, info): + parts = [] + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, "p1")) + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, "p2")) + + return DisassemblerResult(length=4, string="p1p2", parts=parts) + + +class All_Text_Part_Styles(TestDisassembler): + """Create text parts with all styles.""" + + def disassemble(self, info): + parts = [] + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, "p1")) + parts.append(info.text_part(gdb.disassembler.STYLE_MNEMONIC, "p2")) + parts.append(info.text_part(gdb.disassembler.STYLE_SUB_MNEMONIC, "p3")) + parts.append(info.text_part(gdb.disassembler.STYLE_ASSEMBLER_DIRECTIVE, "p4")) + parts.append(info.text_part(gdb.disassembler.STYLE_REGISTER, "p5")) + parts.append(info.text_part(gdb.disassembler.STYLE_IMMEDIATE, "p6")) + parts.append(info.text_part(gdb.disassembler.STYLE_ADDRESS, "p7")) + parts.append(info.text_part(gdb.disassembler.STYLE_ADDRESS_OFFSET, "p8")) + parts.append(info.text_part(gdb.disassembler.STYLE_SYMBOL, "p9")) + parts.append(info.text_part(gdb.disassembler.STYLE_COMMENT_START, "p10")) + + result = builtin_disassemble_wrapper(info) + result = DisassemblerResult(length=result.length, parts=parts) + + tmp_str = ""; + for p in parts: + assert (p.string == str(p)) + tmp_str += str(p) + assert tmp_str == result.string + + return result + + +class Build_Result_Using_All_Parts(TestDisassembler): + """Disassemble an instruction and return a result that makes use of + text and address parts.""" + + def disassemble(self, info): + global current_pc + + parts = [] + parts.append(info.text_part(gdb.disassembler.STYLE_MNEMONIC, "fake")) + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, "\t")) + parts.append(info.text_part(gdb.disassembler.STYLE_REGISTER, "reg")) + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, ", ")) + addr_part = info.address_part(current_pc) + assert addr_part.address == current_pc + parts.append(addr_part) + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, ", ")) + parts.append(info.text_part(gdb.disassembler.STYLE_IMMEDIATE, "123")) + + result = builtin_disassemble_wrapper(info) + result = DisassemblerResult(length=result.length, parts=parts) + return result class BuiltinDisassembler(Disassembler): @@ -511,7 +625,7 @@ class BuiltinDisassembler(Disassembler): super().__init__("BuiltinDisassembler") def __call__(self, info): - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class AnalyzingDisassembler(Disassembler): @@ -606,7 +720,7 @@ class AnalyzingDisassembler(Disassembler): # Override the info object, this provides access to our # read_memory function. info = self.MyInfo(info, self._start, self._end, self._nop_bytes) - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) # Record some informaiton about the first 'nop' instruction we find. if self._nop_index is None and is_nop(result.string): -- 2.30.2