Improved handling of location information (#225)
authorAnders Dellien <anders@andersdellien.se>
Fri, 2 Aug 2019 13:56:49 +0000 (15:56 +0200)
committerEli Bendersky <eliben@users.noreply.github.com>
Fri, 2 Aug 2019 13:56:49 +0000 (06:56 -0700)
This commit moves some of the location-handling code from the examples
to a new class (LocationParser) in order to make it more reusable.

Also adds two test files containing location information.

elftools/dwarf/locationlists.py
examples/dwarf_location_info.py [new file with mode: 0644]
examples/dwarf_location_lists.py [deleted file]
examples/reference_output/dwarf_location_info.out [new file with mode: 0644]
examples/reference_output/dwarf_location_lists.out [deleted file]
test/testfiles_for_location_info/test-dwarf2.o [new file with mode: 0755]
test/testfiles_for_location_info/test-dwarf4.o [new file with mode: 0755]

index 3d97af3cb0c0f16ec71b4e5a14a16adf6f409c55..5fba0c356fadc8e782195fcfc307a11f425f44ba 100644 (file)
@@ -11,11 +11,10 @@ from collections import namedtuple
 
 from ..common.utils import struct_parse
 
-
+LocationExpr = namedtuple('LocationExpr', 'loc_expr')
 LocationEntry = namedtuple('LocationEntry', 'begin_offset end_offset loc_expr')
 BaseAddressEntry = namedtuple('BaseAddressEntry', 'base_address')
 
-
 class LocationLists(object):
     """ A single location list is a Python list consisting of LocationEntry or
         BaseAddressEntry objects.
@@ -69,3 +68,56 @@ class LocationLists(object):
                     end_offset=end_offset,
                     loc_expr=loc_expr))
         return lst
+
+class LocationParser(object):
+    """ A parser for location information in DIEs.
+        Handles both location information contained within the attribute
+        itself (represented as a LocationExpr object) and references to
+        location lists in the .debug_loc section (represented as a
+        list).
+    """
+    def __init__(self, location_lists):
+        self.location_lists = location_lists
+
+    @staticmethod
+    def attribute_has_location(attr, dwarf_version):
+        """ Checks if a DIE attribute contains location information.
+        """
+        return (LocationParser._attribute_is_loclistptr_class(attr) and
+                (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or
+                 LocationParser._attribute_has_loc_list(attr, dwarf_version)))
+
+    def parse_from_attribute(self, attr, dwarf_version):
+        """ Parses a DIE attribute and returns either a LocationExpr or
+            a list.
+        """
+        if self.attribute_has_location(attr, dwarf_version):
+            if self._attribute_has_loc_expr(attr, dwarf_version):
+                return LocationExpr(attr.value)
+            elif self._attribute_has_loc_list(attr, dwarf_version):
+                return self.location_lists.get_location_list_at_offset(
+                    attr.value)
+        else:
+            raise ValueError("Attribute does not have location information")
+
+    #------ PRIVATE ------#
+
+    @staticmethod
+    def _attribute_has_loc_expr(attr, dwarf_version):
+        return (dwarf_version < 4 and attr.form == 'DW_FORM_block1' or
+                attr.form == 'DW_FORM_exprloc')
+
+    @staticmethod
+    def _attribute_has_loc_list(attr, dwarf_version):
+        return ((dwarf_version < 4 and
+                 attr.form in ('DW_FORM_data4', 'DW_FORM_data8')) or
+                attr.form == 'DW_FORM_sec_offset')
+
+    @staticmethod
+    def _attribute_is_loclistptr_class(attr):
+        return (attr.name in ( 'DW_AT_location', 'DW_AT_string_length',
+                               'DW_AT_const_value', 'DW_AT_return_addr',
+                               'DW_AT_data_member_location',
+                               'DW_AT_frame_base', 'DW_AT_segment',
+                               'DW_AT_static_link', 'DW_AT_use_location',
+                               'DW_AT_vtable_elem_location'))
diff --git a/examples/dwarf_location_info.py b/examples/dwarf_location_info.py
new file mode 100644 (file)
index 0000000..5258e49
--- /dev/null
@@ -0,0 +1,111 @@
+#-------------------------------------------------------------------------------
+# elftools example: dwarf_location_info.py
+#
+# Examine DIE entries which have either location list values or location
+# expression values and decode that information.
+#
+# Location information can either be completely contained within a DIE
+# (using 'DW_FORM_exprloc' in DWARFv4 or 'DW_FORM_block1' in earlier
+# versions) or be a reference to a location list contained within
+# the .debug_loc section (using 'DW_FORM_sec_offset' in DWARFv4 or
+# 'DW_FORM_data4' / 'DW_FORM_data8' in earlier versions).
+#
+# The LocationParser object parses the DIE attributes and handles both
+# formats.
+#
+# The directory 'test/testfiles_for_location_info' contains test files with
+# location information represented in both DWARFv4 and DWARFv2 forms.
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+from __future__ import print_function
+import sys
+
+# If pyelftools is not installed, the example can also run from the root or
+# examples/ dir of the source distribution.
+sys.path[0:0] = ['.', '..']
+
+from elftools.common.py3compat import itervalues
+from elftools.elf.elffile import ELFFile
+from elftools.dwarf.descriptions import (
+    describe_DWARF_expr, set_global_machine_arch)
+from elftools.dwarf.locationlists import (
+    LocationEntry, LocationExpr, LocationParser)
+
+def process_file(filename):
+    print('Processing file:', filename)
+    with open(filename, 'rb') as f:
+        elffile = ELFFile(f)
+
+        if not elffile.has_dwarf_info():
+            print('  file has no DWARF info')
+            return
+
+        # get_dwarf_info returns a DWARFInfo context object, which is the
+        # starting point for all DWARF-based processing in pyelftools.
+        dwarfinfo = elffile.get_dwarf_info()
+
+        # The location lists are extracted by DWARFInfo from the .debug_loc
+        # section, and returned here as a LocationLists object.
+        location_lists = dwarfinfo.location_lists()
+
+        # This is required for the descriptions module to correctly decode
+        # register names contained in DWARF expressions.
+        set_global_machine_arch(elffile.get_machine_arch())
+
+        # Create a LocationParser object that parses the DIE attributes and
+        # creates objects representing the actual location information.
+        loc_parser = LocationParser(location_lists)
+
+        for CU in dwarfinfo.iter_CUs():
+            # DWARFInfo allows to iterate over the compile units contained in
+            # the .debug_info section. CU is a CompileUnit object, with some
+            # computed attributes (such as its offset in the section) and
+            # a header which conforms to the DWARF standard. The access to
+            # header elements is, as usual, via item-lookup.
+            print('  Found a compile unit at offset %s, length %s' % (
+                CU.cu_offset, CU['unit_length']))
+
+            # A CU provides a simple API to iterate over all the DIEs in it.
+            for DIE in CU.iter_DIEs():
+                # Go over all attributes of the DIE. Each attribute is an
+                # AttributeValue object (from elftools.dwarf.die), which we
+                # can examine.
+                for attr in itervalues(DIE.attributes):
+                    # Check if this attribute contains location information
+                    if loc_parser.attribute_has_location(attr, CU['version']):
+                        print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
+                        loc = loc_parser.parse_from_attribute(attr,
+                                                              CU['version'])
+                        # We either get a list (in case the attribute is a
+                        # reference to the .debug_loc section) or a LocationExpr
+                        # object (in case the attribute itself contains location
+                        # information).
+                        if isinstance(loc, LocationExpr):
+                            print('      %s' % (
+                                describe_DWARF_expr(loc.loc_expr,
+                                                    dwarfinfo.structs)))
+                        elif isinstance(loc, list):
+                            print(show_loclist(loc,
+                                               dwarfinfo,
+                                               indent='      '))
+
+def show_loclist(loclist, dwarfinfo, indent):
+    """ Display a location list nicely, decoding the DWARF expressions
+        contained within.
+    """
+    d = []
+    for loc_entity in loclist:
+        if isinstance(loc_entity, LocationEntry):
+            d.append('%s <<%s>>' % (
+                loc_entity,
+                describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs)))
+        else:
+            d.append(str(loc_entity))
+    return '\n'.join(indent + s for s in d)
+
+if __name__ == '__main__':
+    if sys.argv[1] == '--test':
+        for filename in sys.argv[2:]:
+            process_file(filename)
diff --git a/examples/dwarf_location_lists.py b/examples/dwarf_location_lists.py
deleted file mode 100644 (file)
index a3a3982..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-#-------------------------------------------------------------------------------
-# elftools example: dwarf_location_lists.py
-#
-# Examine DIE entries which have location list values, and decode these
-# location lists.
-#
-# Eli Bendersky (eliben@gmail.com)
-# This code is in the public domain
-#-------------------------------------------------------------------------------
-from __future__ import print_function
-import sys
-
-# If pyelftools is not installed, the example can also run from the root or
-# examples/ dir of the source distribution.
-sys.path[0:0] = ['.', '..']
-
-
-from elftools.common.py3compat import itervalues
-from elftools.elf.elffile import ELFFile
-from elftools.dwarf.descriptions import (
-    describe_DWARF_expr, set_global_machine_arch)
-from elftools.dwarf.locationlists import LocationEntry
-
-
-def process_file(filename):
-    print('Processing file:', filename)
-    with open(filename, 'rb') as f:
-        elffile = ELFFile(f)
-
-        if not elffile.has_dwarf_info():
-            print('  file has no DWARF info')
-            return
-
-        # get_dwarf_info returns a DWARFInfo context object, which is the
-        # starting point for all DWARF-based processing in pyelftools.
-        dwarfinfo = elffile.get_dwarf_info()
-
-        # The location lists are extracted by DWARFInfo from the .debug_loc
-        # section, and returned here as a LocationLists object.
-        location_lists = dwarfinfo.location_lists()
-
-        # This is required for the descriptions module to correctly decode
-        # register names contained in DWARF expressions.
-        set_global_machine_arch(elffile.get_machine_arch())
-
-        for CU in dwarfinfo.iter_CUs():
-            # DWARFInfo allows to iterate over the compile units contained in
-            # the .debug_info section. CU is a CompileUnit object, with some
-            # computed attributes (such as its offset in the section) and
-            # a header which conforms to the DWARF standard. The access to
-            # header elements is, as usual, via item-lookup.
-            print('  Found a compile unit at offset %s, length %s' % (
-                CU.cu_offset, CU['unit_length']))
-
-            # A CU provides a simple API to iterate over all the DIEs in it.
-            for DIE in CU.iter_DIEs():
-                # Go over all attributes of the DIE. Each attribute is an
-                # AttributeValue object (from elftools.dwarf.die), which we
-                # can examine.
-                for attr in itervalues(DIE.attributes):
-                    if attribute_has_location_list(attr, CU['version']):
-                        # This is a location list. Its value is an offset into
-                        # the .debug_loc section, so we can use the location
-                        # lists object to decode it.
-                        loclist = location_lists.get_location_list_at_offset(
-                            attr.value)
-
-                        print('   DIE %s. attr %s.\n%s' % (
-                            DIE.tag,
-                            attr.name,
-                            show_loclist(loclist, dwarfinfo, indent='      ')))
-
-
-def show_loclist(loclist, dwarfinfo, indent):
-    """ Display a location list nicely, decoding the DWARF expressions
-        contained within.
-    """
-    d = []
-    for loc_entity in loclist:
-        if isinstance(loc_entity, LocationEntry):
-            d.append('%s <<%s>>' % (
-                loc_entity,
-                describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs)))
-        else:
-            d.append(str(loc_entity))
-    return '\n'.join(indent + s for s in d)
-
-
-def attribute_has_location_list(attr, dwarf_version):
-    """ Only some attributes can have location list values, if they have the
-        required DW_FORM (loclistptr "class" in DWARF spec v3)
-    """
-    if (attr.name in (  'DW_AT_location', 'DW_AT_string_length',
-                        'DW_AT_const_value', 'DW_AT_return_addr',
-                        'DW_AT_data_member_location', 'DW_AT_frame_base',
-                        'DW_AT_segment', 'DW_AT_static_link',
-                        'DW_AT_use_location', 'DW_AT_vtable_elem_location')):
-        if (dwarf_version < 4 and attr.form in ('DW_FORM_data4', 'DW_FORM_data8') or
-            attr.form == 'DW_FORM_sec_offset'):
-            return True
-    return False
-
-
-if __name__ == '__main__':
-    if sys.argv[1] == '--test':
-        for filename in sys.argv[2:]:
-            process_file(filename)
diff --git a/examples/reference_output/dwarf_location_info.out b/examples/reference_output/dwarf_location_info.out
new file mode 100644 (file)
index 0000000..9e1fe8e
--- /dev/null
@@ -0,0 +1,33 @@
+Processing file: ./examples/sample_exe64.elf
+  Found a compile unit at offset 0, length 115
+  Found a compile unit at offset 119, length 135
+   DIE DW_TAG_variable. attr DW_AT_location.
+      (DW_OP_addr: 400608)
+  Found a compile unit at offset 258, length 156
+   DIE DW_TAG_subprogram. attr DW_AT_frame_base.
+      LocationEntry(begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>>
+      LocationEntry(begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>>
+      LocationEntry(begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>>
+   DIE DW_TAG_formal_parameter. attr DW_AT_location.
+      (DW_OP_fbreg: -20)
+   DIE DW_TAG_formal_parameter. attr DW_AT_location.
+      (DW_OP_fbreg: -32)
+   DIE DW_TAG_variable. attr DW_AT_location.
+      (DW_OP_addr: 601018)
+  Found a compile unit at offset 418, length 300
+   DIE DW_TAG_subprogram. attr DW_AT_frame_base.
+      (DW_OP_breg7 (rsp): 8)
+   DIE DW_TAG_subprogram. attr DW_AT_frame_base.
+      LocationEntry(begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>>
+      LocationEntry(begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>>
+   DIE DW_TAG_formal_parameter. attr DW_AT_location.
+      LocationEntry(begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>>
+      LocationEntry(begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>>
+   DIE DW_TAG_formal_parameter. attr DW_AT_location.
+      LocationEntry(begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>>
+      LocationEntry(begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>>
+   DIE DW_TAG_formal_parameter. attr DW_AT_location.
+      LocationEntry(begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>>
+      LocationEntry(begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>>
+   DIE DW_TAG_variable. attr DW_AT_location.
+      LocationEntry(begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>>
diff --git a/examples/reference_output/dwarf_location_lists.out b/examples/reference_output/dwarf_location_lists.out
deleted file mode 100644 (file)
index 8788755..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-Processing file: ./examples/sample_exe64.elf
-  Found a compile unit at offset 0, length 115
-  Found a compile unit at offset 119, length 135
-  Found a compile unit at offset 258, length 156
-   DIE DW_TAG_subprogram. attr DW_AT_frame_base.
-      LocationEntry(begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>>
-      LocationEntry(begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>>
-      LocationEntry(begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>>
-  Found a compile unit at offset 418, length 300
-   DIE DW_TAG_subprogram. attr DW_AT_frame_base.
-      LocationEntry(begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>>
-      LocationEntry(begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>>
-   DIE DW_TAG_formal_parameter. attr DW_AT_location.
-      LocationEntry(begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>>
-      LocationEntry(begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>>
-   DIE DW_TAG_formal_parameter. attr DW_AT_location.
-      LocationEntry(begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>>
-      LocationEntry(begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>>
-   DIE DW_TAG_formal_parameter. attr DW_AT_location.
-      LocationEntry(begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>>
-      LocationEntry(begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>>
-   DIE DW_TAG_variable. attr DW_AT_location.
-      LocationEntry(begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>>
diff --git a/test/testfiles_for_location_info/test-dwarf2.o b/test/testfiles_for_location_info/test-dwarf2.o
new file mode 100755 (executable)
index 0000000..9bc2a28
Binary files /dev/null and b/test/testfiles_for_location_info/test-dwarf2.o differ
diff --git a/test/testfiles_for_location_info/test-dwarf4.o b/test/testfiles_for_location_info/test-dwarf4.o
new file mode 100755 (executable)
index 0000000..187ce70
Binary files /dev/null and b/test/testfiles_for_location_info/test-dwarf4.o differ