From: Anders Dellien Date: Fri, 2 Aug 2019 13:56:49 +0000 (+0200) Subject: Improved handling of location information (#225) X-Git-Tag: v0.26~13 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=19fafd6e2dc521fa64bfffe1220589bf09711ad8;p=pyelftools.git Improved handling of location information (#225) This commit moves some of the location-handling code from the examples to a new class (LocationParser) in order to make it more reusable. Also adds two test files containing location information. --- diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index 3d97af3..5fba0c3 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -11,11 +11,10 @@ from collections import namedtuple from ..common.utils import struct_parse - +LocationExpr = namedtuple('LocationExpr', 'loc_expr') LocationEntry = namedtuple('LocationEntry', 'begin_offset end_offset loc_expr') BaseAddressEntry = namedtuple('BaseAddressEntry', 'base_address') - class LocationLists(object): """ A single location list is a Python list consisting of LocationEntry or BaseAddressEntry objects. @@ -69,3 +68,56 @@ class LocationLists(object): end_offset=end_offset, loc_expr=loc_expr)) return lst + +class LocationParser(object): + """ A parser for location information in DIEs. + Handles both location information contained within the attribute + itself (represented as a LocationExpr object) and references to + location lists in the .debug_loc section (represented as a + list). + """ + def __init__(self, location_lists): + self.location_lists = location_lists + + @staticmethod + def attribute_has_location(attr, dwarf_version): + """ Checks if a DIE attribute contains location information. + """ + return (LocationParser._attribute_is_loclistptr_class(attr) and + (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or + LocationParser._attribute_has_loc_list(attr, dwarf_version))) + + def parse_from_attribute(self, attr, dwarf_version): + """ Parses a DIE attribute and returns either a LocationExpr or + a list. + """ + if self.attribute_has_location(attr, dwarf_version): + if self._attribute_has_loc_expr(attr, dwarf_version): + return LocationExpr(attr.value) + elif self._attribute_has_loc_list(attr, dwarf_version): + return self.location_lists.get_location_list_at_offset( + attr.value) + else: + raise ValueError("Attribute does not have location information") + + #------ PRIVATE ------# + + @staticmethod + def _attribute_has_loc_expr(attr, dwarf_version): + return (dwarf_version < 4 and attr.form == 'DW_FORM_block1' or + attr.form == 'DW_FORM_exprloc') + + @staticmethod + def _attribute_has_loc_list(attr, dwarf_version): + return ((dwarf_version < 4 and + attr.form in ('DW_FORM_data4', 'DW_FORM_data8')) or + attr.form == 'DW_FORM_sec_offset') + + @staticmethod + def _attribute_is_loclistptr_class(attr): + return (attr.name in ( 'DW_AT_location', 'DW_AT_string_length', + 'DW_AT_const_value', 'DW_AT_return_addr', + 'DW_AT_data_member_location', + 'DW_AT_frame_base', 'DW_AT_segment', + 'DW_AT_static_link', 'DW_AT_use_location', + 'DW_AT_vtable_elem_location')) diff --git a/examples/dwarf_location_info.py b/examples/dwarf_location_info.py new file mode 100644 index 0000000..5258e49 --- /dev/null +++ b/examples/dwarf_location_info.py @@ -0,0 +1,111 @@ +#------------------------------------------------------------------------------- +# elftools example: dwarf_location_info.py +# +# Examine DIE entries which have either location list values or location +# expression values and decode that information. +# +# Location information can either be completely contained within a DIE +# (using 'DW_FORM_exprloc' in DWARFv4 or 'DW_FORM_block1' in earlier +# versions) or be a reference to a location list contained within +# the .debug_loc section (using 'DW_FORM_sec_offset' in DWARFv4 or +# 'DW_FORM_data4' / 'DW_FORM_data8' in earlier versions). +# +# The LocationParser object parses the DIE attributes and handles both +# formats. +# +# The directory 'test/testfiles_for_location_info' contains test files with +# location information represented in both DWARFv4 and DWARFv2 forms. +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from __future__ import print_function +import sys + +# If pyelftools is not installed, the example can also run from the root or +# examples/ dir of the source distribution. +sys.path[0:0] = ['.', '..'] + +from elftools.common.py3compat import itervalues +from elftools.elf.elffile import ELFFile +from elftools.dwarf.descriptions import ( + describe_DWARF_expr, set_global_machine_arch) +from elftools.dwarf.locationlists import ( + LocationEntry, LocationExpr, LocationParser) + +def process_file(filename): + print('Processing file:', filename) + with open(filename, 'rb') as f: + elffile = ELFFile(f) + + if not elffile.has_dwarf_info(): + print(' file has no DWARF info') + return + + # get_dwarf_info returns a DWARFInfo context object, which is the + # starting point for all DWARF-based processing in pyelftools. + dwarfinfo = elffile.get_dwarf_info() + + # The location lists are extracted by DWARFInfo from the .debug_loc + # section, and returned here as a LocationLists object. + location_lists = dwarfinfo.location_lists() + + # This is required for the descriptions module to correctly decode + # register names contained in DWARF expressions. + set_global_machine_arch(elffile.get_machine_arch()) + + # Create a LocationParser object that parses the DIE attributes and + # creates objects representing the actual location information. + loc_parser = LocationParser(location_lists) + + for CU in dwarfinfo.iter_CUs(): + # DWARFInfo allows to iterate over the compile units contained in + # the .debug_info section. CU is a CompileUnit object, with some + # computed attributes (such as its offset in the section) and + # a header which conforms to the DWARF standard. The access to + # header elements is, as usual, via item-lookup. + print(' Found a compile unit at offset %s, length %s' % ( + CU.cu_offset, CU['unit_length'])) + + # A CU provides a simple API to iterate over all the DIEs in it. + for DIE in CU.iter_DIEs(): + # Go over all attributes of the DIE. Each attribute is an + # AttributeValue object (from elftools.dwarf.die), which we + # can examine. + for attr in itervalues(DIE.attributes): + # Check if this attribute contains location information + if loc_parser.attribute_has_location(attr, CU['version']): + print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) + loc = loc_parser.parse_from_attribute(attr, + CU['version']) + # We either get a list (in case the attribute is a + # reference to the .debug_loc section) or a LocationExpr + # object (in case the attribute itself contains location + # information). + if isinstance(loc, LocationExpr): + print(' %s' % ( + describe_DWARF_expr(loc.loc_expr, + dwarfinfo.structs))) + elif isinstance(loc, list): + print(show_loclist(loc, + dwarfinfo, + indent=' ')) + +def show_loclist(loclist, dwarfinfo, indent): + """ Display a location list nicely, decoding the DWARF expressions + contained within. + """ + d = [] + for loc_entity in loclist: + if isinstance(loc_entity, LocationEntry): + d.append('%s <<%s>>' % ( + loc_entity, + describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs))) + else: + d.append(str(loc_entity)) + return '\n'.join(indent + s for s in d) + +if __name__ == '__main__': + if sys.argv[1] == '--test': + for filename in sys.argv[2:]: + process_file(filename) diff --git a/examples/dwarf_location_lists.py b/examples/dwarf_location_lists.py deleted file mode 100644 index a3a3982..0000000 --- a/examples/dwarf_location_lists.py +++ /dev/null @@ -1,107 +0,0 @@ -#------------------------------------------------------------------------------- -# elftools example: dwarf_location_lists.py -# -# Examine DIE entries which have location list values, and decode these -# location lists. -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- -from __future__ import print_function -import sys - -# If pyelftools is not installed, the example can also run from the root or -# examples/ dir of the source distribution. -sys.path[0:0] = ['.', '..'] - - -from elftools.common.py3compat import itervalues -from elftools.elf.elffile import ELFFile -from elftools.dwarf.descriptions import ( - describe_DWARF_expr, set_global_machine_arch) -from elftools.dwarf.locationlists import LocationEntry - - -def process_file(filename): - print('Processing file:', filename) - with open(filename, 'rb') as f: - elffile = ELFFile(f) - - if not elffile.has_dwarf_info(): - print(' file has no DWARF info') - return - - # get_dwarf_info returns a DWARFInfo context object, which is the - # starting point for all DWARF-based processing in pyelftools. - dwarfinfo = elffile.get_dwarf_info() - - # The location lists are extracted by DWARFInfo from the .debug_loc - # section, and returned here as a LocationLists object. - location_lists = dwarfinfo.location_lists() - - # This is required for the descriptions module to correctly decode - # register names contained in DWARF expressions. - set_global_machine_arch(elffile.get_machine_arch()) - - for CU in dwarfinfo.iter_CUs(): - # DWARFInfo allows to iterate over the compile units contained in - # the .debug_info section. CU is a CompileUnit object, with some - # computed attributes (such as its offset in the section) and - # a header which conforms to the DWARF standard. The access to - # header elements is, as usual, via item-lookup. - print(' Found a compile unit at offset %s, length %s' % ( - CU.cu_offset, CU['unit_length'])) - - # A CU provides a simple API to iterate over all the DIEs in it. - for DIE in CU.iter_DIEs(): - # Go over all attributes of the DIE. Each attribute is an - # AttributeValue object (from elftools.dwarf.die), which we - # can examine. - for attr in itervalues(DIE.attributes): - if attribute_has_location_list(attr, CU['version']): - # This is a location list. Its value is an offset into - # the .debug_loc section, so we can use the location - # lists object to decode it. - loclist = location_lists.get_location_list_at_offset( - attr.value) - - print(' DIE %s. attr %s.\n%s' % ( - DIE.tag, - attr.name, - show_loclist(loclist, dwarfinfo, indent=' '))) - - -def show_loclist(loclist, dwarfinfo, indent): - """ Display a location list nicely, decoding the DWARF expressions - contained within. - """ - d = [] - for loc_entity in loclist: - if isinstance(loc_entity, LocationEntry): - d.append('%s <<%s>>' % ( - loc_entity, - describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs))) - else: - d.append(str(loc_entity)) - return '\n'.join(indent + s for s in d) - - -def attribute_has_location_list(attr, dwarf_version): - """ Only some attributes can have location list values, if they have the - required DW_FORM (loclistptr "class" in DWARF spec v3) - """ - if (attr.name in ( 'DW_AT_location', 'DW_AT_string_length', - 'DW_AT_const_value', 'DW_AT_return_addr', - 'DW_AT_data_member_location', 'DW_AT_frame_base', - 'DW_AT_segment', 'DW_AT_static_link', - 'DW_AT_use_location', 'DW_AT_vtable_elem_location')): - if (dwarf_version < 4 and attr.form in ('DW_FORM_data4', 'DW_FORM_data8') or - attr.form == 'DW_FORM_sec_offset'): - return True - return False - - -if __name__ == '__main__': - if sys.argv[1] == '--test': - for filename in sys.argv[2:]: - process_file(filename) diff --git a/examples/reference_output/dwarf_location_info.out b/examples/reference_output/dwarf_location_info.out new file mode 100644 index 0000000..9e1fe8e --- /dev/null +++ b/examples/reference_output/dwarf_location_info.out @@ -0,0 +1,33 @@ +Processing file: ./examples/sample_exe64.elf + Found a compile unit at offset 0, length 115 + Found a compile unit at offset 119, length 135 + DIE DW_TAG_variable. attr DW_AT_location. + (DW_OP_addr: 400608) + Found a compile unit at offset 258, length 156 + DIE DW_TAG_subprogram. attr DW_AT_frame_base. + LocationEntry(begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> + LocationEntry(begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>> + LocationEntry(begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + (DW_OP_fbreg: -20) + DIE DW_TAG_formal_parameter. attr DW_AT_location. + (DW_OP_fbreg: -32) + DIE DW_TAG_variable. attr DW_AT_location. + (DW_OP_addr: 601018) + Found a compile unit at offset 418, length 300 + DIE DW_TAG_subprogram. attr DW_AT_frame_base. + (DW_OP_breg7 (rsp): 8) + DIE DW_TAG_subprogram. attr DW_AT_frame_base. + LocationEntry(begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> + LocationEntry(begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + LocationEntry(begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>> + LocationEntry(begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + LocationEntry(begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>> + LocationEntry(begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + LocationEntry(begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>> + LocationEntry(begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>> + DIE DW_TAG_variable. attr DW_AT_location. + LocationEntry(begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>> diff --git a/examples/reference_output/dwarf_location_lists.out b/examples/reference_output/dwarf_location_lists.out deleted file mode 100644 index 8788755..0000000 --- a/examples/reference_output/dwarf_location_lists.out +++ /dev/null @@ -1,23 +0,0 @@ -Processing file: ./examples/sample_exe64.elf - Found a compile unit at offset 0, length 115 - Found a compile unit at offset 119, length 135 - Found a compile unit at offset 258, length 156 - DIE DW_TAG_subprogram. attr DW_AT_frame_base. - LocationEntry(begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> - LocationEntry(begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>> - LocationEntry(begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>> - Found a compile unit at offset 418, length 300 - DIE DW_TAG_subprogram. attr DW_AT_frame_base. - LocationEntry(begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> - LocationEntry(begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>> - DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>> - LocationEntry(begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>> - DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>> - LocationEntry(begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>> - DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>> - LocationEntry(begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>> - DIE DW_TAG_variable. attr DW_AT_location. - LocationEntry(begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>> diff --git a/test/testfiles_for_location_info/test-dwarf2.o b/test/testfiles_for_location_info/test-dwarf2.o new file mode 100755 index 0000000..9bc2a28 Binary files /dev/null and b/test/testfiles_for_location_info/test-dwarf2.o differ diff --git a/test/testfiles_for_location_info/test-dwarf4.o b/test/testfiles_for_location_info/test-dwarf4.o new file mode 100755 index 0000000..187ce70 Binary files /dev/null and b/test/testfiles_for_location_info/test-dwarf4.o differ