DWARF 5 operations and DWARF5 location lists (#418)
authorSeva Alekseyev <sevaa@yarxi.ru>
Thu, 16 Jun 2022 12:19:30 +0000 (08:19 -0400)
committerGitHub <noreply@github.com>
Thu, 16 Jun 2022 12:19:30 +0000 (05:19 -0700)
* Test binary for DWARFv5 operations

* DWARFv5 ops, part 1: entry_value, const_type, deref_type

* DWARFv5 ops, part 2: regval_type, implicit_pointer, convert

* DWARFv5 loclists section parsing, take 1

* Foamtting fix

* Test fixes

* Lineprogram header file_entries with DWARFv5 now are indexable by string

* Excising the View column, if present, from GNU readelf..decodedline output

* Readelf test fixes

* Typo

* Formatting and comments

* More style fixes

14 files changed:
elftools/common/construct_utils.py
elftools/common/utils.py
elftools/dwarf/descriptions.py
elftools/dwarf/dwarf_expr.py
elftools/dwarf/dwarfinfo.py
elftools/dwarf/enums.py
elftools/dwarf/locationlists.py
elftools/dwarf/structs.py
elftools/elf/elffile.py
examples/reference_output/dwarf_location_info.out
scripts/readelf.py
test/run_readelf_tests.py
test/test_refaddr_bitness.py
test/testfiles_for_readelf/dwarf_v5ops.so.elf [new file with mode: 0644]

index 4b4a39205e9f96712cb66893d647fb0085e3c8de..64f1f9e7501c726a5526d79b77eb6b2c633de50d 100644 (file)
@@ -8,7 +8,7 @@
 #-------------------------------------------------------------------------------
 from ..construct import (
     Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil,
-    Rename, SizeofError
+    Rename, SizeofError, Construct
     )
 
 
@@ -89,3 +89,24 @@ def SLEB128(name):
     """ A construct creator for SLEB128 encoding.
     """
     return Rename(name, _SLEB128Adapter(_LEB128_reader()))
+
+class StreamOffset(Construct):
+    """
+    Captures the current stream offset 
+
+    Parameters:
+    * name - the name of the value
+
+    Example:
+    StreamOffset("item_offset")
+    """
+    __slots__ = []
+    def __init__(self, name):
+        Construct.__init__(self, name)
+        self._set_flag(self.FLAG_DYNAMIC)
+    def _parse(self, stream, context):
+        return stream.tell()
+    def _build(self, obj, stream, context):
+        context[self.name] = stream.tell()
+    def _sizeof(self, context):
+        return 0     
index d1fde2cacbba144b95c83a7b4b99942c4f1311b4..0ea417ce06979f803d0818378564d037abb93234 100644 (file)
@@ -10,6 +10,7 @@ from contextlib import contextmanager
 from .exceptions import ELFParseError, ELFError, DWARFError
 from .py3compat import int2byte
 from ..construct import ConstructError, ULInt8
+import os
 
 
 def merge_dicts(*dicts):
@@ -107,6 +108,19 @@ def read_blob(stream, length):
     """
     return [struct_parse(ULInt8(''), stream) for i in range(length)]
 
+def save_dwarf_section(section, filename):
+    """Debug helper: dump section contents into a file
+    Section is expected to be one of the debug_xxx_sec elements of DWARFInfo
+    """
+    stream = section.stream
+    pos = stream.tell()
+    stream.seek(0, os.SEEK_SET)
+    section.stream.seek(0)
+    with open(filename, 'wb') as file:
+        data = stream.read(section.size)
+        file.write(data)
+    stream.seek(pos, os.SEEK_SET)    
+
 #------------------------- PRIVATE -------------------------
 
 def _assert_with_exception(cond, msg, exception_type):
index 059c22c812375e0b536b5459951137921a9e5ae8..1934a2eeabd5ba5ca306774be83da06519d5527a 100644 (file)
@@ -523,6 +523,7 @@ _EXTRA_INFO_DESCRIPTION_MAP = defaultdict(
     DW_AT_associated=_location_list_extra,
     DW_AT_data_location=_location_list_extra,
     DW_AT_stride=_location_list_extra,
+    DW_AT_call_value=_location_list_extra,
     DW_AT_import=_import_extra,
     DW_AT_GNU_call_site_value=_location_list_extra,
     DW_AT_GNU_call_site_data_value=_location_list_extra,
@@ -651,21 +652,21 @@ class ExprDumper(object):
             return '%s: %x' % (opcode_name, args[0])
         elif opcode_name in self._ops_with_two_decimal_args:
             return '%s: %s %s' % (opcode_name, args[0], args[1])
-        elif opcode_name == 'DW_OP_GNU_entry_value':
-            return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args) for deo in args[0]]))
+        elif opcode_name in ('DW_OP_GNU_entry_value', 'DW_OP_entry_value'):
+            return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args, cu_offset) for deo in args[0]]))
         elif opcode_name == 'DW_OP_implicit_value':
             return "%s %s byte block: %s" % (opcode_name, len(args[0]), ''.join(["%x " % b for b in args[0]]))
         elif opcode_name == 'DW_OP_GNU_parameter_ref':
             return "%s: <0x%x>" % (opcode_name, args[0] + cu_offset)
-        elif opcode_name == 'DW_OP_GNU_implicit_pointer':
+        elif opcode_name in ('DW_OP_GNU_implicit_pointer', 'DW_OP_implicit_pointer'):
             return "%s: <0x%x> %d" % (opcode_name, args[0], args[1])
-        elif opcode_name == 'DW_OP_GNU_convert':
+        elif opcode_name in ('DW_OP_GNU_convert', 'DW_OP_convert'):
             return "%s <0x%x>" % (opcode_name, args[0] + cu_offset)
-        elif opcode_name == 'DW_OP_GNU_deref_type':
+        elif opcode_name in ('DW_OP_GNU_deref_type', 'DW_OP_deref_type'):
             return "%s: %d <0x%x>" % (opcode_name, args[0], args[1] + cu_offset)
-        elif opcode_name == 'DW_OP_GNU_const_type':
+        elif opcode_name in ('DW_OP_GNU_const_type', 'DW_OP_const_type'):
             return "%s: <0x%x>  %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1]))
-        elif opcode_name == 'DW_OP_GNU_regval_type':
+        elif opcode_name in ('DW_OP_GNU_regval_type', 'DW_OP_regval_type'):
             return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset)
         else:
             return '<unknown %s>' % opcode_name
index 39ceee71c154c75f2ad1601b20382d9b1f2a04b6..1e4f658c5d4445305d6a8cbfe16335ccade9b1a5 100644 (file)
@@ -244,6 +244,15 @@ def _init_dispatch_table(structs):
     add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32('')))
     add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset('')))
     add('DW_OP_implicit_value', parse_blob())
+    add('DW_OP_entry_value', parse_nestedexpr())
+    add('DW_OP_const_type', parse_typedblob())
+    add('DW_OP_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''),
+                                                   structs.Dwarf_uleb128('')))    
+    add('DW_OP_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''),
+                                              structs.Dwarf_uleb128('')))   
+    add('DW_OP_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''),
+                                                        structs.Dwarf_sleb128(''))) 
+    add('DW_OP_convert', parse_arg_struct(structs.Dwarf_uleb128('')))                                              
     add('DW_OP_GNU_entry_value', parse_nestedexpr())
     add('DW_OP_GNU_const_type', parse_typedblob())
     add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''),
index 9642cc88cfbf4a70f3605a39cdd503f5efcdaf1a..8dc7028f160e16731ba82d9d787c538d2a04e801 100644 (file)
@@ -6,9 +6,11 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
+import os
 from collections import namedtuple
 from bisect import bisect_right
 
+from ..construct.lib.container import Container
 from ..common.exceptions import DWARFError
 from ..common.utils import (struct_parse, dwarf_assert,
                             parse_cstring_from_stream)
@@ -74,7 +76,9 @@ class DWARFInfo(object):
             debug_pubnames_sec,
             debug_addr_sec,
             debug_str_offsets_sec,
-            debug_line_str_sec):
+            debug_line_str_sec,
+            debug_loclists_sec,
+            debug_rnglists_sec): # Not parsed for now
         """ config:
                 A DwarfConfig object
 
@@ -93,9 +97,12 @@ class DWARFInfo(object):
         self.debug_loc_sec = debug_loc_sec
         self.debug_ranges_sec = debug_ranges_sec
         self.debug_line_sec = debug_line_sec
+        self.debug_addr_sec = debug_addr_sec
         self.debug_line_str_sec = debug_line_str_sec
         self.debug_pubtypes_sec = debug_pubtypes_sec
         self.debug_pubnames_sec = debug_pubnames_sec
+        self.debug_loclists_sec = debug_loclists_sec
+        self.debug_rnglists_sec = debug_rnglists_sec # Ignored for now
 
         # This is the DWARFStructs the context uses, so it doesn't depend on
         # DWARF format and address_size (these are determined per CU) - set them
@@ -339,7 +346,10 @@ class DWARFInfo(object):
         """ Get a LocationLists object representing the .debug_loc section of
             the DWARF data, or None if this section doesn't exist.
         """
-        if self.debug_loc_sec:
+        if self.debug_loclists_sec:
+            assert(self.debug_loc_sec is None) # Are there ever files with both kinds of location sections?
+            return LocationLists(self.debug_loclists_sec.stream, self.structs, 5, self)
+        elif self.debug_loc_sec:
             return LocationLists(self.debug_loc_sec.stream, self.structs)
         else:
             return None
@@ -487,9 +497,12 @@ class DWARFInfo(object):
         if lineprog_header.get('directories', False):
             lineprog_header.include_directory = tuple(d.DW_LNCT_path for d in lineprog_header.directories)
         if lineprog_header.get('file_names', False):
-            translate = namedtuple("file_entry", "name dir_index mtime length")
             lineprog_header.file_entry = tuple(
-                translate(e.get('DW_LNCT_path'), e.get('DW_LNCT_directory_index'), e.get('DW_LNCT_timestamp'), e.get('DW_LNCT_size'))
+                Container(**{
+                    'name':e.get('DW_LNCT_path'),
+                    'dir_index': e.get('DW_LNCT_directory_index'),
+                    'mtime': e.get('DW_LNCT_timestamp'),
+                    'length': e.get('DW_LNCT_size')})
                 for e in lineprog_header.file_names)
 
         # Calculate the offset to the next line program (see DWARF 6.2.4)
@@ -502,3 +515,4 @@ class DWARFInfo(object):
             structs=structs,
             program_start_offset=self.debug_line_sec.stream.tell(),
             program_end_offset=end_offset)
+
index a52e8034413350d07aa402145ac1aa668dcd3f9f..c38ebe01ec26f2af8a33cabcd10ea4fa072eb737 100644 (file)
@@ -290,6 +290,8 @@ ENUM_DW_AT = dict(
     DW_AT_GNU_pubnames                      = 0x2134,
     DW_AT_GNU_pubtypes                      = 0x2135,
     DW_AT_GNU_discriminator                 = 0x2136,
+    DW_AT_GNU_locviews                      = 0x2137,
+    DW_AT_GNU_entry_view                    = 0x2138,
 
     DW_AT_LLVM_include_path  = 0x3e00,
     DW_AT_LLVM_config_macros = 0x3e01,
@@ -415,3 +417,15 @@ ENUM_DW_UT = dict(
     DW_UT_lo_user       = 0x80,
     DW_UT_hi_user       = 0xff
 )
+
+ENUM_DW_LLE = dict(
+    DW_LLE_end_of_list      = 0x00,
+    DW_LLE_base_addressx    = 0x01,
+    DW_LLE_startx_endx      = 0x02,
+    DW_LLE_startx_length    = 0x03,
+    DW_LLE_offset_pair      = 0x04,
+    DW_LLE_default_location = 0x05,
+    DW_LLE_base_address     = 0x06,
+    DW_LLE_start_end        = 0x07,
+    DW_LLE_start_length     = 0x08    
+)
index e6c735f585e3a94f63d4cb9f148786d553f5ea29..e674d671c3206a9348f56c1e4eb7d848f3361952 100644 (file)
 #-------------------------------------------------------------------------------
 import os
 from collections import namedtuple
-
+from ..common.exceptions import DWARFError
 from ..common.utils import struct_parse
 
 LocationExpr = namedtuple('LocationExpr', 'loc_expr')
-LocationEntry = namedtuple('LocationEntry', 'entry_offset begin_offset end_offset loc_expr')
-BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address')
+LocationEntry = namedtuple('LocationEntry', 'entry_offset entry_length begin_offset end_offset loc_expr is_absolute')
+BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset entry_length base_address')
+LocationViewPair = namedtuple('LocationViewPair', 'entry_offset begin end')
 
 class LocationLists(object):
     """ A single location list is a Python list consisting of LocationEntry or
         BaseAddressEntry objects.
+
+        Starting with DWARF5, it may also contain LocationViewPair, but only
+        if scanning the section, never when requested for a DIE attribute.
+
+        The default location entries are returned as LocationEntry with 
+        begin_offset == end_offset == -1
+
+        Version determines whether the executable contains a debug_loc
+        section, or a DWARFv5 style debug_loclists one. Only the 4/5
+        distinction matters.
+
+        Dwarfinfo is only needed for DWARFv5 location entry encodings
+        that contain references to other sections (e. g. DW_LLE_startx_endx),
+        and only for location list enumeration.
     """
-    def __init__(self, stream, structs):
+    def __init__(self, stream, structs, version=4, dwarfinfo=None):
         self.stream = stream
         self.structs = structs
+        self.dwarfinfo = dwarfinfo
+        self.version = version
         self._max_addr = 2 ** (self.structs.address_size * 8) - 1
 
-    def get_location_list_at_offset(self, offset):
+    def get_location_list_at_offset(self, offset, die=None):
         """ Get a location list at the given offset in the section.
+        Passing the die is only neccessary in DWARF5+, for decoding
+        location entry encodings that contain references to other sections.
         """
         self.stream.seek(offset, os.SEEK_SET)
-        return self._parse_location_list_from_stream()
+        return self._parse_location_list_from_stream_v5(die) if self.version >= 5 else self._parse_location_list_from_stream()
 
     def iter_location_lists(self):
-        """ Yield all location lists found in the section.
+        """ Iterates through location lists and view pairs. Returns lists of
+        LocationEntry, BaseAddressEntry, and LocationViewPair objects.
         """
-        # Just call _parse_location_list_from_stream until the stream ends
-        self.stream.seek(0, os.SEEK_END)
-        endpos = self.stream.tell()
+        # The location lists section was never meant for sequential access.
+        # Location lists are referenced by DIE attributes by offset or by index.
+        
+        # As of DWARFv5, it may contain, in addition to proper location lists,
+        #location list view pairs, which are referenced by the nonstandard DW_AT_GNU_locviews
+        # attribute. A set of locview pairs (which is a couple of ULEB128 values) may preceed
+        # a location list; the former is referenced by the DW_AT_GNU_locviews attribute, the 
+        # latter - by DW_AT_location (in the same DIE). Binutils' readelf dumps those.
+        # There is a view pair for each location-type entry in the list.
+        #
+        # Also, the section may contain gaps.
+        #
+        # Taking a cue from binutils, we would have to scan this section while looking at
+        # what's in DIEs.
+        stream = self.stream
+        stream.seek(0, os.SEEK_END)
+        endpos = stream.tell()
+
+        stream.seek(0, os.SEEK_SET)        
+
+        if self.version >= 5:
+            # Need to provide support for DW_AT_GNU_locviews. They are interspersed in
+            # the locations section, no way to tell where short of checking all DIEs
+            all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist
+            locviews = dict() # Map of locview offset to the respective loclist offset
+            cu_map = dict() # Map of loclist offsets to CUs
+            for cu in self.dwarfinfo.iter_CUs():
+                cu_ver = cu['version']
+                for die in cu.iter_DIEs():
+                    # A combination of location and locviews means there is a location list
+                    # preceed by several locview pairs
+                    if 'DW_AT_GNU_locviews' in die.attributes:
+                        assert('DW_AT_location' in die.attributes and
+                            LocationParser._attribute_has_loc_list(die.attributes['DW_AT_location'], cu_ver))
+                        views_offset = die.attributes['DW_AT_GNU_locviews'].value
+                        list_offset = die.attributes['DW_AT_location'].value
+                        locviews[views_offset] = list_offset
+                        cu_map[list_offset] = cu
+                        all_offsets.add(views_offset)
+
+                    # Scan other attributes for location lists
+                    for key in die.attributes:
+                        attr = die.attributes[key]
+                        if (key != 'DW_AT_location' and
+                            LocationParser.attribute_has_location(attr, cu_ver) and
+                            LocationParser._attribute_has_loc_list(attr, cu_ver)):
+                            list_offset = attr.value
+                            all_offsets.add(list_offset)
+                            cu_map[list_offset] = cu
+            all_offsets = list(all_offsets)
+            all_offsets.sort()
 
-        self.stream.seek(0, os.SEEK_SET)
-        while self.stream.tell() < endpos:
-            yield self._parse_location_list_from_stream()
+            # Loclists section is organized as an array of CUs, each length prefixed.
+            # We don't assume that the CUs go in the same order as the ones in info.
+            offset_index = 0
+            while stream.tell() < endpos:
+                # We are at the start of the CU block in the loclists now
+                unit_length = struct_parse(self.structs.Dwarf_initial_length(''), stream)
+                offset_past_len = stream.tell()
+                cu_header = struct_parse(self.structs.Dwarf_loclists_CU_header, stream)
+                assert(cu_header.version == 5)
+
+                # GNU binutils supports two traversal modes: by offsets in CU header, and sequential.
+                # We don't have a binary for the former yet. On an off chance that we one day might,
+                # let's parse the header anyway.
+
+                cu_end_offset = offset_past_len + unit_length
+                # Unit_length includes the header but doesn't include the length
+                
+                while stream.tell() < cu_end_offset:
+                    # Skip the gap to the next object
+                    next_offset = all_offsets[offset_index]
+                    if next_offset == stream.tell(): # At an object, either a loc list or a loc view pair
+                        locview_pairs = self._parse_locview_pairs(locviews)
+                        entries = self._parse_location_list_from_stream_v5()
+                        yield locview_pairs + entries
+                        offset_index += 1
+                    else: # We are at a gap - skip the gap to the next object or to the next CU
+                        if next_offset > cu_end_offset: # Gap at the CU end - the next object is in the next CU
+                            next_offset = cu_end_offset # And implicitly quit the loop within the CU
+                        stream.seek(next_offset, os.SEEK_SET)
+        else:
+            # Just call _parse_location_list_from_stream until the stream ends
+            while stream.tell() < endpos:
+                yield self._parse_location_list_from_stream()
 
     #------ PRIVATE ------#
 
@@ -56,7 +154,8 @@ class LocationLists(object):
                 break
             elif begin_offset == self._max_addr:
                 # Base address selection entry
-                lst.append(BaseAddressEntry(entry_offset=entry_offset, base_address=end_offset))
+                entry_length = self.stream.tell() - entry_offset
+                lst.append(BaseAddressEntry(entry_offset=entry_offset, entry_length=entry_length, base_address=end_offset))
             else:
                 # Location list entry
                 expr_len = struct_parse(
@@ -64,13 +163,60 @@ class LocationLists(object):
                 loc_expr = [struct_parse(self.structs.Dwarf_uint8(''),
                                          self.stream)
                                 for i in range(expr_len)]
+                entry_length = self.stream.tell() - entry_offset
                 lst.append(LocationEntry(
                     entry_offset=entry_offset,
+                    entry_length=entry_length,
                     begin_offset=begin_offset,
                     end_offset=end_offset,
-                    loc_expr=loc_expr))
+                    loc_expr=loc_expr,
+                    is_absolute = False))
+        return lst
+
+    # Also returns an array with BaseAddressEntry and LocationEntry
+    # Can't possibly support indexed values, since parsing those requires
+    # knowing the DIE context it came from
+    def _parse_location_list_from_stream_v5(self, die = None):
+        # This won't contain the terminator entry
+        lst = [self._translate_entry_v5(entry, die)
+            for entry
+            in struct_parse(self.structs.Dwarf_loclists_entries, self.stream)]
         return lst
 
+    # From V5 style entries to a LocationEntry/BaseAddressEntry
+    def _translate_entry_v5(self, entry, die):
+        off = entry.entry_offset
+        len = entry.entry_end_offset - off
+        type = entry.entry_type
+        if type == 'DW_LLE_base_address':
+            return BaseAddressEntry(off, len, entry.address)
+        elif type == 'DW_LLE_offset_pair':
+            return LocationEntry(off, len, entry.start_offset, entry.end_offset, entry.loc_expr, False)
+        elif type == 'DW_LLE_start_length':
+            return LocationEntry(off, len, entry.start_address, entry.start_address + entry.length, entry.loc_expr, True)
+        elif type == 'DW_LLE_start_end': # No test for this yet, but the format seems straightforward
+            return LocationEntry(off, len, entry.start_address, entry.end_address, entry.loc_expr, True)
+        elif type == 'DW_LLE_default_location': # No test for this either, and this is new in the API
+            return LocationEntry(off, len, -1, -1, entry.loc_expr, True)
+        elif type in ('DW_LLE_base_addressx', 'DW_LLE_startx_endx', 'DW_LLE_startx_length'):           
+            # We don't have sample binaries for those LLEs. Their proper parsing would
+            # require knowing the CU context (so that indices can be resolved to code offsets)
+            raise NotImplementedError("Location list entry type %s is not supported yet" % (type,))
+        else:
+            raise DWARFError(False, "Unknown DW_LLE code: %s" % (type,))
+
+    # Locviews is the dict, mapping locview offsets to corresponding loclist offsets
+    def _parse_locview_pairs(self, locviews):
+        stream = self.stream
+        list_offset = locviews.get(stream.tell(), None)
+        pairs = []
+        if list_offset is not None:
+            while stream.tell() < list_offset:
+                pair = struct_parse(self.structs.Dwarf_locview_pair, stream)
+                pairs.append(LocationViewPair(pair.entry_offset, pair.begin, pair.end))
+            assert(stream.tell() == list_offset)
+        return pairs
+
 class LocationParser(object):
     """ A parser for location information in DIEs.
         Handles both location information contained within the attribute
@@ -89,7 +235,7 @@ class LocationParser(object):
                 (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or
                  LocationParser._attribute_has_loc_list(attr, dwarf_version)))
 
-    def parse_from_attribute(self, attr, dwarf_version):
+    def parse_from_attribute(self, attr, dwarf_version, die = None):
         """ Parses a DIE attribute and returns either a LocationExpr or
             a list.
         """
@@ -98,7 +244,11 @@ class LocationParser(object):
                 return LocationExpr(attr.value)
             elif self._attribute_has_loc_list(attr, dwarf_version):
                 return self.location_lists.get_location_list_at_offset(
-                    attr.value)
+                    attr.value, die)
+                # We don't yet know if the DIE context will be needed.
+                # We might get it without a full tree traversal using 
+                # attr.offset as a key, but we assume a good DWARF5
+                # aware consumer would pass a DIE along.
         else:
             raise ValueError("Attribute does not have location information")
 
index fb9b0b7d4fa6814309af437f4bf771b8e7d261e1..a1a286b41840ee994f0204aa7cfb9cd9278dc637 100644 (file)
@@ -7,15 +7,16 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
-from elftools.construct.core import Subconstruct
-from elftools.construct.macros import Embedded
+from logging.config import valid_ident
 from ..construct import (
     UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
     SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
     Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
-    CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence
+    CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence,
+    Switch
     )
-from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
+from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128,
+    StreamOffset)
 from .enums import *
 
 
@@ -142,6 +143,7 @@ class DWARFStructs(object):
         self._create_nameLUT_header()
         self._create_string_offsets_table_header()
         self._create_address_table_header()
+        self._create_loclists_parsers()
 
     def _create_initial_length(self):
         def _InitialLength(name):
@@ -396,6 +398,41 @@ class DWARFStructs(object):
                     subcon=self.Dwarf_uint8('elem'),
                     length_field=length_field(''))
 
+    def _create_loclists_parsers(self):
+        """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
+        """
+        self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
+            # Unit_length parsed separately
+            self.Dwarf_uint16('version'),
+            self.Dwarf_uint8('address_size'),
+            self.Dwarf_uint8('segment_selector_size'),
+            PrefixedArray(
+                self.Dwarf_offset('offsets'),
+                self.Dwarf_uint32('')))
+
+        cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
+
+        self.Dwarf_loclists_entries = RepeatUntilExcluding(
+            lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list',
+            Struct('entry',
+                StreamOffset('entry_offset'),
+                Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE),
+                Embed(Switch('', lambda ctx: ctx.entry_type,
+                {
+                    'DW_LLE_end_of_list'      : Struct('end_of_list'),
+                    'DW_LLE_base_addressx'    : Struct('base_addressx', self.Dwarf_uleb128('index')),
+                    'DW_LLE_startx_endx'      : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld),
+                    'DW_LLE_startx_length'    : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld),
+                    'DW_LLE_offset_pair'      : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld),
+                    'DW_LLE_default_location' : Struct('default_location', cld),
+                    'DW_LLE_base_address'     : Struct('base_address', self.Dwarf_target_addr('address')),
+                    'DW_LLE_start_end'        : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld),
+                    'DW_LLE_start_length'     : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld),
+                })),
+                StreamOffset('entry_end_offset')))
+
+        self.Dwarf_locview_pair = Struct('locview_pair',
+            StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
 
 class _InitialLengthAdapter(Adapter):
     """ A standard Construct adapter that expects a sub-construct
index 10367ad75a7ab8dbb57651a80456fc55924a5b5f..bdda624d7a43cdedd9ddb4a766f276ad8b4b397a 100644 (file)
@@ -218,7 +218,8 @@ class ELFFile(object):
                          '.debug_str', '.debug_line', '.debug_frame',
                          '.debug_loc', '.debug_ranges', '.debug_pubtypes',
                          '.debug_pubnames', '.debug_addr',
-                         '.debug_str_offsets', '.debug_line_str')
+                         '.debug_str_offsets', '.debug_line_str',
+                         '.debug_loclists', '.debug_rnglists')
 
 
         compressed = bool(self.get_section_by_name('.zdebug_info'))
@@ -232,7 +233,8 @@ class ELFFile(object):
          debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name,
          debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name,
          debug_pubnames_name, debug_addr_name, debug_str_offsets_name,
-         debug_line_str_name, eh_frame_sec_name) = section_names
+         debug_line_str_name, debug_loclists_sec_name, debug_rnglists_sec_name,
+         eh_frame_sec_name) = section_names
 
         debug_sections = {}
         for secname in section_names:
@@ -265,7 +267,9 @@ class ELFFile(object):
                 debug_pubnames_sec=debug_sections[debug_pubnames_name],
                 debug_addr_sec=debug_sections[debug_addr_name],
                 debug_str_offsets_sec=debug_sections[debug_str_offsets_name],
-                debug_line_str_sec=debug_sections[debug_line_str_name]
+                debug_line_str_sec=debug_sections[debug_line_str_name],
+                debug_loclists_sec=debug_sections[debug_loclists_sec_name],
+                debug_rnglists_sec=debug_sections[debug_rnglists_sec_name]
                 )
 
     def has_ehabi_info(self):
index 01c8933b9768d3aed610879ea9911368cf1cdf20..57912444abcca45ef4d4784b681e54386ea5a850 100644 (file)
@@ -5,9 +5,9 @@ Processing file: ./examples/sample_exe64.elf
       (DW_OP_addr: 400608)
   Found a compile unit at offset 258, length 156
    DIE DW_TAG_subprogram. attr DW_AT_frame_base.
-      LocationEntry(entry_offset=0, begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>>
-      LocationEntry(entry_offset=20, begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>>
-      LocationEntry(entry_offset=40, begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>>
+      LocationEntry(entry_offset=0, entry_length=20, begin_offset=0, end_offset=1, loc_expr=[119, 8], is_absolute=False) <<(DW_OP_breg7 (rsp): 8)>>
+      LocationEntry(entry_offset=20, entry_length=20, begin_offset=1, end_offset=4, loc_expr=[119, 16], is_absolute=False) <<(DW_OP_breg7 (rsp): 16)>>
+      LocationEntry(entry_offset=40, entry_length=20, begin_offset=4, end_offset=43, loc_expr=[118, 16], is_absolute=False) <<(DW_OP_breg6 (rbp): 16)>>
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
       (DW_OP_fbreg: -20)
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
@@ -18,16 +18,16 @@ Processing file: ./examples/sample_exe64.elf
    DIE DW_TAG_subprogram. attr DW_AT_frame_base.
       (DW_OP_breg7 (rsp): 8)
    DIE DW_TAG_subprogram. attr DW_AT_frame_base.
-      LocationEntry(entry_offset=76, begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>>
-      LocationEntry(entry_offset=96, begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>>
+      LocationEntry(entry_offset=76, entry_length=20, begin_offset=16, end_offset=64, loc_expr=[119, 8], is_absolute=False) <<(DW_OP_breg7 (rsp): 8)>>
+      LocationEntry(entry_offset=96, entry_length=21, begin_offset=64, end_offset=153, loc_expr=[119, 192, 0], is_absolute=False) <<(DW_OP_breg7 (rsp): 64)>>
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
-      LocationEntry(entry_offset=133, begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>>
-      LocationEntry(entry_offset=152, begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>>
+      LocationEntry(entry_offset=133, entry_length=19, begin_offset=16, end_offset=85, loc_expr=[85], is_absolute=False) <<(DW_OP_reg5 (rdi))>>
+      LocationEntry(entry_offset=152, entry_length=19, begin_offset=85, end_offset=143, loc_expr=[94], is_absolute=False) <<(DW_OP_reg14 (r14))>>
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
-      LocationEntry(entry_offset=187, begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>>
-      LocationEntry(entry_offset=206, begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>>
+      LocationEntry(entry_offset=187, entry_length=19, begin_offset=16, end_offset=85, loc_expr=[84], is_absolute=False) <<(DW_OP_reg4 (rsi))>>
+      LocationEntry(entry_offset=206, entry_length=19, begin_offset=85, end_offset=138, loc_expr=[93], is_absolute=False) <<(DW_OP_reg13 (r13))>>
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
-      LocationEntry(entry_offset=241, begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>>
-      LocationEntry(entry_offset=260, begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>>
+      LocationEntry(entry_offset=241, entry_length=19, begin_offset=16, end_offset=85, loc_expr=[81], is_absolute=False) <<(DW_OP_reg1 (rdx))>>
+      LocationEntry(entry_offset=260, entry_length=19, begin_offset=85, end_offset=133, loc_expr=[92], is_absolute=False) <<(DW_OP_reg12 (r12))>>
    DIE DW_TAG_variable. attr DW_AT_location.
-      LocationEntry(entry_offset=295, begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>>
+      LocationEntry(entry_offset=295, entry_length=19, begin_offset=92, end_offset=123, loc_expr=[83], is_absolute=False) <<(DW_OP_reg3 (rbx))>>
index a45ec3019556d8533bb5515cfa0e359d5db3e689..6bd776badd2ed93c3befeefec53b04ff1adf329f 100755 (executable)
@@ -62,7 +62,7 @@ from elftools.dwarf.descriptions import (
     )
 from elftools.dwarf.constants import (
     DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file)
-from elftools.dwarf.locationlists import LocationParser, LocationEntry
+from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationViewPair, BaseAddressEntry
 from elftools.dwarf.callframe import CIE, FDE, ZERO
 from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
 from elftools.dwarf.enums import ENUM_DW_UT
@@ -1137,9 +1137,9 @@ class ReadElf(object):
                 cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
 
             self._emitline('CU: %s:' % cu_filename)
-            self._emitline('File name                            Line number    Starting address    View    Stmt' if ver5
-                else 'File name                            Line number    Starting address    Stmt')
-            # What goes into View on V5? To be seen...
+            self._emitline('File name                            Line number    Starting address    Stmt')
+            # GNU readelf has a View column that we don't try to replicate
+            # The autotest has logic in place to ignore that
 
             # Print each state's file, line and address information. For some
             # instructions other output is needed to be compatible with
@@ -1427,7 +1427,7 @@ class ReadElf(object):
                     self._dwarfinfo.CFI_entries())
 
     def _dump_debug_locations(self):
-        """ Dump the location lists from .debug_location section
+        """ Dump the location lists from .debug_loc/.debug_loclists section
         """
         def _get_cu_base(cu):
             top_die = cu.get_top_DIE()
@@ -1447,48 +1447,88 @@ class ReadElf(object):
         loc_lists = list(loc_lists.iter_location_lists())
         if len(loc_lists) == 0:
             # Present but empty locations section - readelf outputs a message
-            self._emitline("\nSection '%s' has no debugging data." % di.debug_loc_sec.name)
+            self._emitline("\nSection '%s' has no debugging data." % (di.debug_loclists_sec or di.debug_loc_sec).name)
             return
 
         # To dump a location list, one needs to know the CU.
-        # Scroll through DIEs once, list the known location list offsets
+        # Scroll through DIEs once, list the known location list offsets.
+        # Don't need this CU/DIE scan if all entries are absolute or prefixed by base,
+        # but let's not optimize for that yet.
         cu_map = dict() # Loc list offset => CU
         for cu in di.iter_CUs():
             for die in cu.iter_DIEs():
                 for key in die.attributes:
                     attr = die.attributes[key]
                     if (LocationParser.attribute_has_location(attr, cu['version']) and
-                        not LocationParser._attribute_has_loc_expr(attr, cu['version'])):
+                        LocationParser._attribute_has_loc_list(attr, cu['version'])):
                         cu_map[attr.value] = cu
 
         addr_size = di.config.default_address_size # In bytes, 4 or 8
         addr_width = addr_size * 2 # In hex digits, 8 or 16
         line_template = "    %%08x %%0%dx %%0%dx %%s%%s" % (addr_width, addr_width)
 
-        self._emitline('Contents of the %s section:\n' % di.debug_loc_sec.name)
+        self._emitline('Contents of the %s section:\n' % (di.debug_loclists_sec or di.debug_loc_sec).name)
         self._emitline('    Offset   Begin            End              Expression')
         for loc_list in loc_lists:
-            cu = cu_map.get(loc_list[0].entry_offset, False)
-            if not cu:
-                raise ValueError("Location list can't be tracked to a CU")
-            base_ip = _get_cu_base(cu)
+            in_views = False
+            has_views = False
+            base_ip = None
+            loc_entry_count = 0
+            cu = None
             for entry in loc_list:
-                # TODO: support BaseAddressEntry lines
-                expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset)
-                postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else ''
-                self._emitline(line_template % (
-                    entry.entry_offset,
-                    base_ip + entry.begin_offset,
-                    base_ip + entry.end_offset,
-                    expr,
-                    postfix))
+                if isinstance(entry, LocationViewPair):
+                    has_views = in_views = True
+                    # The "v" before address is conditional in binutils, haven't figured out how
+                    self._emitline("    %08x v%015x v%015x location view pair" % (entry.entry_offset, entry.begin, entry.end))
+                else:
+                    if in_views:
+                        in_views = False             
+                        self._emitline("")
+                    # Need the CU for this loclist, but the map is keyed by the offset
+                    # of the first entry in the loclist. Got to skip the views first.
+                    if cu is None:
+                        cu = cu_map.get(entry.entry_offset, False)
+                        if not cu:
+                            raise ValueError("Location list can't be tracked to a CU")                        
+
+                    if isinstance(entry, LocationEntry):
+                        if base_ip is None and not entry.is_absolute:
+                            base_ip = _get_cu_base(cu)                                
+
+                        begin_offset = (0 if entry.is_absolute else base_ip) + entry.begin_offset
+                        end_offset = (0 if entry.is_absolute else base_ip) + entry.end_offset
+                        expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset)
+                        if has_views:
+                            view = loc_list[loc_entry_count]
+                            postfix = ' (start == end)' if entry.begin_offset == entry.end_offset and view.begin == view.end else ''
+                            self._emitline('    %08x v%015x v%015x views at %08x for:' %(
+                                entry.entry_offset,
+                                view.begin,
+                                view.end,
+                                view.entry_offset))
+                            self._emitline('             %016x %016x %s%s' %(
+                                begin_offset,
+                                end_offset,
+                                expr,
+                                postfix))
+                            loc_entry_count += 1
+                        else:
+                            postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else ''
+                            self._emitline(line_template % (
+                                entry.entry_offset,
+                                begin_offset,
+                                end_offset,
+                                expr,
+                                postfix))
+                    elif isinstance(entry, BaseAddressEntry):
+                        base_ip = entry.base_address
+                        self._emitline("    %08x %016x (base address)" % (entry.entry_offset, entry.base_address))
+
             # Pyelftools doesn't store the terminating entry,
             # but readelf emits its offset, so this should too.
             last = loc_list[-1]
-            last_len = 2*addr_size
-            if isinstance(last, LocationEntry):
-                last_len += 2 + len(last.loc_expr)
-            self._emitline("    %08x <End of list>" % (last.entry_offset + last_len))
+            self._emitline("    %08x <End of list>" % (last.entry_offset + last.entry_length))
 
     def _display_arch_specific_arm(self):
         """ Display the ARM architecture-specific info contained in the file.
index ceca7ec06c7809f4d4a3eb3a7a7cd679c22006df..56767c5ffd81c6298d60f52a9664e2670ac2289c 100755 (executable)
@@ -33,6 +33,9 @@ testlog.addHandler(logging.StreamHandler(sys.stdout))
 # same minor release and keeping track is a headache.
 if platform.system() == "Darwin": # MacOS
     READELF_PATH = 'greadelf'
+elif platform.system() == "Windows":
+    # Point the environment variable READELF at Cygwin's readelf.exe, or some other Windows build
+    READELF_PATH = os.environ.get('READELF', "readelf.exe")
 else:
     READELF_PATH = 'test/external_tools/readelf'
     if not os.path.exists(READELF_PATH):
@@ -142,6 +145,9 @@ def compare_output(s1, s2):
         return False, 'Number of lines different: %s vs %s' % (
                 len(lines1), len(lines2))
 
+    # Position of the View column in the output file, if parsing readelf..decodedline
+    # output, and the GNU readelf output contains the View column. Otherwise stays -1.
+    view_col_position = -1 
     for i in range(len(lines1)):
         if lines1[i].endswith('debug_line section:'):
             # .debug_line or .zdebug_line
@@ -149,6 +155,23 @@ def compare_output(s1, s2):
 
         # readelf spelling error for GNU property notes
         lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type')
+        
+        # The view column position may change from CU to CU:
+        if view_col_position >= 0 and lines1[i].startswith('cu:'):
+            view_col_position = -1    
+    
+        # Check if readelf..decodedline output line contains the view column
+        if flag_in_debug_line_section and lines1[i].startswith('file name') and view_col_position < 0:
+            view_col_position = lines1[i].find("view")
+            stmt_col_position = lines1[i].find("stmt")
+
+        # Excise the View column from the table, if any.
+        # View_col_position is only set to a nonzero number if one of the previous
+        # lines was a table header line with a "view" in it.
+        # We assume careful formatting on GNU readelf's part - View column values
+        # are not out of line with the View header.
+        if view_col_position >= 0 and not lines1[i].endswith(':'):
+            lines1[i] = lines1[i][:view_col_position] + lines1[i][stmt_col_position:]
 
         # Compare ignoring whitespace
         lines1_parts = lines1[i].split()
@@ -169,16 +192,7 @@ def compare_output(s1, s2):
             sm = SequenceMatcher()
             sm.set_seqs(lines1[i], lines2[i])
             changes = sm.get_opcodes()
-            if flag_in_debug_line_section:
-                # readelf outputs an additional "View" column: ignore it
-                if len(lines1_parts) >= 2 and lines1_parts[-2] == 'view':
-                    ok = True
-                else:
-                    # Fast check special-cased for the only ELF we have which
-                    # has this information (dwarf_gnuops4.so.elf)
-                    ok = (    lines1_parts[-2:] == ['1', 'x']
-                          and lines2_parts[-1] == 'x')
-            elif '[...]' in lines1[i]:
+            if '[...]' in lines1[i]:
                 # Special case truncations with ellipsis like these:
                 #     .note.gnu.bu[...]        redelf
                 #     .note.gnu.build-i        pyelftools
index ea01db64da0fd2939a182bee3835833b136de7d0..c92e442f2a02d4724fb4e93c5d2a643785506705 100644 (file)
@@ -47,6 +47,8 @@ class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase):
             debug_addr_sec=None,
             debug_str_offsets_sec=None,
             debug_line_str_sec=None,
+            debug_loclists_sec = None,
+            debug_rnglists_sec = None
         )
 
         CUs = [cu for cu in di.iter_CUs()]
diff --git a/test/testfiles_for_readelf/dwarf_v5ops.so.elf b/test/testfiles_for_readelf/dwarf_v5ops.so.elf
new file mode 100644 (file)
index 0000000..9da7825
Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_v5ops.so.elf differ