From: Seva Alekseyev Date: Thu, 16 Jun 2022 12:19:30 +0000 (-0400) Subject: DWARF 5 operations and DWARF5 location lists (#418) X-Git-Tag: v0.29~19 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ece0db4145c05390dd49d86471184dcf3958f553;p=pyelftools.git DWARF 5 operations and DWARF5 location lists (#418) * Test binary for DWARFv5 operations * DWARFv5 ops, part 1: entry_value, const_type, deref_type * DWARFv5 ops, part 2: regval_type, implicit_pointer, convert * DWARFv5 loclists section parsing, take 1 * Foamtting fix * Test fixes * Lineprogram header file_entries with DWARFv5 now are indexable by string * Excising the View column, if present, from GNU readelf..decodedline output * Readelf test fixes * Typo * Formatting and comments * More style fixes --- diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py index 4b4a392..64f1f9e 100644 --- a/elftools/common/construct_utils.py +++ b/elftools/common/construct_utils.py @@ -8,7 +8,7 @@ #------------------------------------------------------------------------------- from ..construct import ( Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil, - Rename, SizeofError + Rename, SizeofError, Construct ) @@ -89,3 +89,24 @@ def SLEB128(name): """ A construct creator for SLEB128 encoding. """ return Rename(name, _SLEB128Adapter(_LEB128_reader())) + +class StreamOffset(Construct): + """ + Captures the current stream offset + + Parameters: + * name - the name of the value + + Example: + StreamOffset("item_offset") + """ + __slots__ = [] + def __init__(self, name): + Construct.__init__(self, name) + self._set_flag(self.FLAG_DYNAMIC) + def _parse(self, stream, context): + return stream.tell() + def _build(self, obj, stream, context): + context[self.name] = stream.tell() + def _sizeof(self, context): + return 0 diff --git a/elftools/common/utils.py b/elftools/common/utils.py index d1fde2c..0ea417c 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -10,6 +10,7 @@ from contextlib import contextmanager from .exceptions import ELFParseError, ELFError, DWARFError from .py3compat import int2byte from ..construct import ConstructError, ULInt8 +import os def merge_dicts(*dicts): @@ -107,6 +108,19 @@ def read_blob(stream, length): """ return [struct_parse(ULInt8(''), stream) for i in range(length)] +def save_dwarf_section(section, filename): + """Debug helper: dump section contents into a file + Section is expected to be one of the debug_xxx_sec elements of DWARFInfo + """ + stream = section.stream + pos = stream.tell() + stream.seek(0, os.SEEK_SET) + section.stream.seek(0) + with open(filename, 'wb') as file: + data = stream.read(section.size) + file.write(data) + stream.seek(pos, os.SEEK_SET) + #------------------------- PRIVATE ------------------------- def _assert_with_exception(cond, msg, exception_type): diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 059c22c..1934a2e 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -523,6 +523,7 @@ _EXTRA_INFO_DESCRIPTION_MAP = defaultdict( DW_AT_associated=_location_list_extra, DW_AT_data_location=_location_list_extra, DW_AT_stride=_location_list_extra, + DW_AT_call_value=_location_list_extra, DW_AT_import=_import_extra, DW_AT_GNU_call_site_value=_location_list_extra, DW_AT_GNU_call_site_data_value=_location_list_extra, @@ -651,21 +652,21 @@ class ExprDumper(object): return '%s: %x' % (opcode_name, args[0]) elif opcode_name in self._ops_with_two_decimal_args: return '%s: %s %s' % (opcode_name, args[0], args[1]) - elif opcode_name == 'DW_OP_GNU_entry_value': - return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args) for deo in args[0]])) + elif opcode_name in ('DW_OP_GNU_entry_value', 'DW_OP_entry_value'): + return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args, cu_offset) for deo in args[0]])) elif opcode_name == 'DW_OP_implicit_value': return "%s %s byte block: %s" % (opcode_name, len(args[0]), ''.join(["%x " % b for b in args[0]])) elif opcode_name == 'DW_OP_GNU_parameter_ref': return "%s: <0x%x>" % (opcode_name, args[0] + cu_offset) - elif opcode_name == 'DW_OP_GNU_implicit_pointer': + elif opcode_name in ('DW_OP_GNU_implicit_pointer', 'DW_OP_implicit_pointer'): return "%s: <0x%x> %d" % (opcode_name, args[0], args[1]) - elif opcode_name == 'DW_OP_GNU_convert': + elif opcode_name in ('DW_OP_GNU_convert', 'DW_OP_convert'): return "%s <0x%x>" % (opcode_name, args[0] + cu_offset) - elif opcode_name == 'DW_OP_GNU_deref_type': + elif opcode_name in ('DW_OP_GNU_deref_type', 'DW_OP_deref_type'): return "%s: %d <0x%x>" % (opcode_name, args[0], args[1] + cu_offset) - elif opcode_name == 'DW_OP_GNU_const_type': + elif opcode_name in ('DW_OP_GNU_const_type', 'DW_OP_const_type'): return "%s: <0x%x> %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1])) - elif opcode_name == 'DW_OP_GNU_regval_type': + elif opcode_name in ('DW_OP_GNU_regval_type', 'DW_OP_regval_type'): return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset) else: return '' % opcode_name diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index 39ceee7..1e4f658 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -244,6 +244,15 @@ def _init_dispatch_table(structs): add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32(''))) add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset(''))) add('DW_OP_implicit_value', parse_blob()) + add('DW_OP_entry_value', parse_nestedexpr()) + add('DW_OP_const_type', parse_typedblob()) + add('DW_OP_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''), + structs.Dwarf_sleb128(''))) + add('DW_OP_convert', parse_arg_struct(structs.Dwarf_uleb128(''))) add('DW_OP_GNU_entry_value', parse_nestedexpr()) add('DW_OP_GNU_const_type', parse_typedblob()) add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''), diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 9642cc8..8dc7028 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -6,9 +6,11 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +import os from collections import namedtuple from bisect import bisect_right +from ..construct.lib.container import Container from ..common.exceptions import DWARFError from ..common.utils import (struct_parse, dwarf_assert, parse_cstring_from_stream) @@ -74,7 +76,9 @@ class DWARFInfo(object): debug_pubnames_sec, debug_addr_sec, debug_str_offsets_sec, - debug_line_str_sec): + debug_line_str_sec, + debug_loclists_sec, + debug_rnglists_sec): # Not parsed for now """ config: A DwarfConfig object @@ -93,9 +97,12 @@ class DWARFInfo(object): self.debug_loc_sec = debug_loc_sec self.debug_ranges_sec = debug_ranges_sec self.debug_line_sec = debug_line_sec + self.debug_addr_sec = debug_addr_sec self.debug_line_str_sec = debug_line_str_sec self.debug_pubtypes_sec = debug_pubtypes_sec self.debug_pubnames_sec = debug_pubnames_sec + self.debug_loclists_sec = debug_loclists_sec + self.debug_rnglists_sec = debug_rnglists_sec # Ignored for now # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them @@ -339,7 +346,10 @@ class DWARFInfo(object): """ Get a LocationLists object representing the .debug_loc section of the DWARF data, or None if this section doesn't exist. """ - if self.debug_loc_sec: + if self.debug_loclists_sec: + assert(self.debug_loc_sec is None) # Are there ever files with both kinds of location sections? + return LocationLists(self.debug_loclists_sec.stream, self.structs, 5, self) + elif self.debug_loc_sec: return LocationLists(self.debug_loc_sec.stream, self.structs) else: return None @@ -487,9 +497,12 @@ class DWARFInfo(object): if lineprog_header.get('directories', False): lineprog_header.include_directory = tuple(d.DW_LNCT_path for d in lineprog_header.directories) if lineprog_header.get('file_names', False): - translate = namedtuple("file_entry", "name dir_index mtime length") lineprog_header.file_entry = tuple( - translate(e.get('DW_LNCT_path'), e.get('DW_LNCT_directory_index'), e.get('DW_LNCT_timestamp'), e.get('DW_LNCT_size')) + Container(**{ + 'name':e.get('DW_LNCT_path'), + 'dir_index': e.get('DW_LNCT_directory_index'), + 'mtime': e.get('DW_LNCT_timestamp'), + 'length': e.get('DW_LNCT_size')}) for e in lineprog_header.file_names) # Calculate the offset to the next line program (see DWARF 6.2.4) @@ -502,3 +515,4 @@ class DWARFInfo(object): structs=structs, program_start_offset=self.debug_line_sec.stream.tell(), program_end_offset=end_offset) + diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py index a52e803..c38ebe0 100644 --- a/elftools/dwarf/enums.py +++ b/elftools/dwarf/enums.py @@ -290,6 +290,8 @@ ENUM_DW_AT = dict( DW_AT_GNU_pubnames = 0x2134, DW_AT_GNU_pubtypes = 0x2135, DW_AT_GNU_discriminator = 0x2136, + DW_AT_GNU_locviews = 0x2137, + DW_AT_GNU_entry_view = 0x2138, DW_AT_LLVM_include_path = 0x3e00, DW_AT_LLVM_config_macros = 0x3e01, @@ -415,3 +417,15 @@ ENUM_DW_UT = dict( DW_UT_lo_user = 0x80, DW_UT_hi_user = 0xff ) + +ENUM_DW_LLE = dict( + DW_LLE_end_of_list = 0x00, + DW_LLE_base_addressx = 0x01, + DW_LLE_startx_endx = 0x02, + DW_LLE_startx_length = 0x03, + DW_LLE_offset_pair = 0x04, + DW_LLE_default_location = 0x05, + DW_LLE_base_address = 0x06, + DW_LLE_start_end = 0x07, + DW_LLE_start_length = 0x08 +) diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index e6c735f..e674d67 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -8,38 +8,136 @@ #------------------------------------------------------------------------------- import os from collections import namedtuple - +from ..common.exceptions import DWARFError from ..common.utils import struct_parse LocationExpr = namedtuple('LocationExpr', 'loc_expr') -LocationEntry = namedtuple('LocationEntry', 'entry_offset begin_offset end_offset loc_expr') -BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address') +LocationEntry = namedtuple('LocationEntry', 'entry_offset entry_length begin_offset end_offset loc_expr is_absolute') +BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset entry_length base_address') +LocationViewPair = namedtuple('LocationViewPair', 'entry_offset begin end') class LocationLists(object): """ A single location list is a Python list consisting of LocationEntry or BaseAddressEntry objects. + + Starting with DWARF5, it may also contain LocationViewPair, but only + if scanning the section, never when requested for a DIE attribute. + + The default location entries are returned as LocationEntry with + begin_offset == end_offset == -1 + + Version determines whether the executable contains a debug_loc + section, or a DWARFv5 style debug_loclists one. Only the 4/5 + distinction matters. + + Dwarfinfo is only needed for DWARFv5 location entry encodings + that contain references to other sections (e. g. DW_LLE_startx_endx), + and only for location list enumeration. """ - def __init__(self, stream, structs): + def __init__(self, stream, structs, version=4, dwarfinfo=None): self.stream = stream self.structs = structs + self.dwarfinfo = dwarfinfo + self.version = version self._max_addr = 2 ** (self.structs.address_size * 8) - 1 - def get_location_list_at_offset(self, offset): + def get_location_list_at_offset(self, offset, die=None): """ Get a location list at the given offset in the section. + Passing the die is only neccessary in DWARF5+, for decoding + location entry encodings that contain references to other sections. """ self.stream.seek(offset, os.SEEK_SET) - return self._parse_location_list_from_stream() + return self._parse_location_list_from_stream_v5(die) if self.version >= 5 else self._parse_location_list_from_stream() def iter_location_lists(self): - """ Yield all location lists found in the section. + """ Iterates through location lists and view pairs. Returns lists of + LocationEntry, BaseAddressEntry, and LocationViewPair objects. """ - # Just call _parse_location_list_from_stream until the stream ends - self.stream.seek(0, os.SEEK_END) - endpos = self.stream.tell() + # The location lists section was never meant for sequential access. + # Location lists are referenced by DIE attributes by offset or by index. + + # As of DWARFv5, it may contain, in addition to proper location lists, + #location list view pairs, which are referenced by the nonstandard DW_AT_GNU_locviews + # attribute. A set of locview pairs (which is a couple of ULEB128 values) may preceed + # a location list; the former is referenced by the DW_AT_GNU_locviews attribute, the + # latter - by DW_AT_location (in the same DIE). Binutils' readelf dumps those. + # There is a view pair for each location-type entry in the list. + # + # Also, the section may contain gaps. + # + # Taking a cue from binutils, we would have to scan this section while looking at + # what's in DIEs. + stream = self.stream + stream.seek(0, os.SEEK_END) + endpos = stream.tell() + + stream.seek(0, os.SEEK_SET) + + if self.version >= 5: + # Need to provide support for DW_AT_GNU_locviews. They are interspersed in + # the locations section, no way to tell where short of checking all DIEs + all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist + locviews = dict() # Map of locview offset to the respective loclist offset + cu_map = dict() # Map of loclist offsets to CUs + for cu in self.dwarfinfo.iter_CUs(): + cu_ver = cu['version'] + for die in cu.iter_DIEs(): + # A combination of location and locviews means there is a location list + # preceed by several locview pairs + if 'DW_AT_GNU_locviews' in die.attributes: + assert('DW_AT_location' in die.attributes and + LocationParser._attribute_has_loc_list(die.attributes['DW_AT_location'], cu_ver)) + views_offset = die.attributes['DW_AT_GNU_locviews'].value + list_offset = die.attributes['DW_AT_location'].value + locviews[views_offset] = list_offset + cu_map[list_offset] = cu + all_offsets.add(views_offset) + + # Scan other attributes for location lists + for key in die.attributes: + attr = die.attributes[key] + if (key != 'DW_AT_location' and + LocationParser.attribute_has_location(attr, cu_ver) and + LocationParser._attribute_has_loc_list(attr, cu_ver)): + list_offset = attr.value + all_offsets.add(list_offset) + cu_map[list_offset] = cu + all_offsets = list(all_offsets) + all_offsets.sort() - self.stream.seek(0, os.SEEK_SET) - while self.stream.tell() < endpos: - yield self._parse_location_list_from_stream() + # Loclists section is organized as an array of CUs, each length prefixed. + # We don't assume that the CUs go in the same order as the ones in info. + offset_index = 0 + while stream.tell() < endpos: + # We are at the start of the CU block in the loclists now + unit_length = struct_parse(self.structs.Dwarf_initial_length(''), stream) + offset_past_len = stream.tell() + cu_header = struct_parse(self.structs.Dwarf_loclists_CU_header, stream) + assert(cu_header.version == 5) + + # GNU binutils supports two traversal modes: by offsets in CU header, and sequential. + # We don't have a binary for the former yet. On an off chance that we one day might, + # let's parse the header anyway. + + cu_end_offset = offset_past_len + unit_length + # Unit_length includes the header but doesn't include the length + + while stream.tell() < cu_end_offset: + # Skip the gap to the next object + next_offset = all_offsets[offset_index] + if next_offset == stream.tell(): # At an object, either a loc list or a loc view pair + locview_pairs = self._parse_locview_pairs(locviews) + entries = self._parse_location_list_from_stream_v5() + yield locview_pairs + entries + offset_index += 1 + else: # We are at a gap - skip the gap to the next object or to the next CU + if next_offset > cu_end_offset: # Gap at the CU end - the next object is in the next CU + next_offset = cu_end_offset # And implicitly quit the loop within the CU + stream.seek(next_offset, os.SEEK_SET) + else: + # Just call _parse_location_list_from_stream until the stream ends + while stream.tell() < endpos: + yield self._parse_location_list_from_stream() #------ PRIVATE ------# @@ -56,7 +154,8 @@ class LocationLists(object): break elif begin_offset == self._max_addr: # Base address selection entry - lst.append(BaseAddressEntry(entry_offset=entry_offset, base_address=end_offset)) + entry_length = self.stream.tell() - entry_offset + lst.append(BaseAddressEntry(entry_offset=entry_offset, entry_length=entry_length, base_address=end_offset)) else: # Location list entry expr_len = struct_parse( @@ -64,13 +163,60 @@ class LocationLists(object): loc_expr = [struct_parse(self.structs.Dwarf_uint8(''), self.stream) for i in range(expr_len)] + entry_length = self.stream.tell() - entry_offset lst.append(LocationEntry( entry_offset=entry_offset, + entry_length=entry_length, begin_offset=begin_offset, end_offset=end_offset, - loc_expr=loc_expr)) + loc_expr=loc_expr, + is_absolute = False)) + return lst + + # Also returns an array with BaseAddressEntry and LocationEntry + # Can't possibly support indexed values, since parsing those requires + # knowing the DIE context it came from + def _parse_location_list_from_stream_v5(self, die = None): + # This won't contain the terminator entry + lst = [self._translate_entry_v5(entry, die) + for entry + in struct_parse(self.structs.Dwarf_loclists_entries, self.stream)] return lst + # From V5 style entries to a LocationEntry/BaseAddressEntry + def _translate_entry_v5(self, entry, die): + off = entry.entry_offset + len = entry.entry_end_offset - off + type = entry.entry_type + if type == 'DW_LLE_base_address': + return BaseAddressEntry(off, len, entry.address) + elif type == 'DW_LLE_offset_pair': + return LocationEntry(off, len, entry.start_offset, entry.end_offset, entry.loc_expr, False) + elif type == 'DW_LLE_start_length': + return LocationEntry(off, len, entry.start_address, entry.start_address + entry.length, entry.loc_expr, True) + elif type == 'DW_LLE_start_end': # No test for this yet, but the format seems straightforward + return LocationEntry(off, len, entry.start_address, entry.end_address, entry.loc_expr, True) + elif type == 'DW_LLE_default_location': # No test for this either, and this is new in the API + return LocationEntry(off, len, -1, -1, entry.loc_expr, True) + elif type in ('DW_LLE_base_addressx', 'DW_LLE_startx_endx', 'DW_LLE_startx_length'): + # We don't have sample binaries for those LLEs. Their proper parsing would + # require knowing the CU context (so that indices can be resolved to code offsets) + raise NotImplementedError("Location list entry type %s is not supported yet" % (type,)) + else: + raise DWARFError(False, "Unknown DW_LLE code: %s" % (type,)) + + # Locviews is the dict, mapping locview offsets to corresponding loclist offsets + def _parse_locview_pairs(self, locviews): + stream = self.stream + list_offset = locviews.get(stream.tell(), None) + pairs = [] + if list_offset is not None: + while stream.tell() < list_offset: + pair = struct_parse(self.structs.Dwarf_locview_pair, stream) + pairs.append(LocationViewPair(pair.entry_offset, pair.begin, pair.end)) + assert(stream.tell() == list_offset) + return pairs + class LocationParser(object): """ A parser for location information in DIEs. Handles both location information contained within the attribute @@ -89,7 +235,7 @@ class LocationParser(object): (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or LocationParser._attribute_has_loc_list(attr, dwarf_version))) - def parse_from_attribute(self, attr, dwarf_version): + def parse_from_attribute(self, attr, dwarf_version, die = None): """ Parses a DIE attribute and returns either a LocationExpr or a list. """ @@ -98,7 +244,11 @@ class LocationParser(object): return LocationExpr(attr.value) elif self._attribute_has_loc_list(attr, dwarf_version): return self.location_lists.get_location_list_at_offset( - attr.value) + attr.value, die) + # We don't yet know if the DIE context will be needed. + # We might get it without a full tree traversal using + # attr.offset as a key, but we assume a good DWARF5 + # aware consumer would pass a DIE along. else: raise ValueError("Attribute does not have location information") diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index fb9b0b7..a1a286b 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -7,15 +7,16 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from elftools.construct.core import Subconstruct -from elftools.construct.macros import Embedded +from logging.config import valid_ident from ..construct import ( UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray, - CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence + CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence, + Switch ) -from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128 +from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128, + StreamOffset) from .enums import * @@ -142,6 +143,7 @@ class DWARFStructs(object): self._create_nameLUT_header() self._create_string_offsets_table_header() self._create_address_table_header() + self._create_loclists_parsers() def _create_initial_length(self): def _InitialLength(name): @@ -396,6 +398,41 @@ class DWARFStructs(object): subcon=self.Dwarf_uint8('elem'), length_field=length_field('')) + def _create_loclists_parsers(self): + """ Create a struct for debug_loclists CU header, DWARFv5, 7,29 + """ + self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header', + # Unit_length parsed separately + self.Dwarf_uint16('version'), + self.Dwarf_uint8('address_size'), + self.Dwarf_uint8('segment_selector_size'), + PrefixedArray( + self.Dwarf_offset('offsets'), + self.Dwarf_uint32(''))) + + cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128('')) + + self.Dwarf_loclists_entries = RepeatUntilExcluding( + lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list', + Struct('entry', + StreamOffset('entry_offset'), + Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE), + Embed(Switch('', lambda ctx: ctx.entry_type, + { + 'DW_LLE_end_of_list' : Struct('end_of_list'), + 'DW_LLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')), + 'DW_LLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld), + 'DW_LLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld), + 'DW_LLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld), + 'DW_LLE_default_location' : Struct('default_location', cld), + 'DW_LLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')), + 'DW_LLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld), + 'DW_LLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld), + })), + StreamOffset('entry_end_offset'))) + + self.Dwarf_locview_pair = Struct('locview_pair', + StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end')) class _InitialLengthAdapter(Adapter): """ A standard Construct adapter that expects a sub-construct diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 10367ad..bdda624 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -218,7 +218,8 @@ class ELFFile(object): '.debug_str', '.debug_line', '.debug_frame', '.debug_loc', '.debug_ranges', '.debug_pubtypes', '.debug_pubnames', '.debug_addr', - '.debug_str_offsets', '.debug_line_str') + '.debug_str_offsets', '.debug_line_str', + '.debug_loclists', '.debug_rnglists') compressed = bool(self.get_section_by_name('.zdebug_info')) @@ -232,7 +233,8 @@ class ELFFile(object): debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name, debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name, debug_pubnames_name, debug_addr_name, debug_str_offsets_name, - debug_line_str_name, eh_frame_sec_name) = section_names + debug_line_str_name, debug_loclists_sec_name, debug_rnglists_sec_name, + eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -265,7 +267,9 @@ class ELFFile(object): debug_pubnames_sec=debug_sections[debug_pubnames_name], debug_addr_sec=debug_sections[debug_addr_name], debug_str_offsets_sec=debug_sections[debug_str_offsets_name], - debug_line_str_sec=debug_sections[debug_line_str_name] + debug_line_str_sec=debug_sections[debug_line_str_name], + debug_loclists_sec=debug_sections[debug_loclists_sec_name], + debug_rnglists_sec=debug_sections[debug_rnglists_sec_name] ) def has_ehabi_info(self): diff --git a/examples/reference_output/dwarf_location_info.out b/examples/reference_output/dwarf_location_info.out index 01c8933..5791244 100644 --- a/examples/reference_output/dwarf_location_info.out +++ b/examples/reference_output/dwarf_location_info.out @@ -5,9 +5,9 @@ Processing file: ./examples/sample_exe64.elf (DW_OP_addr: 400608) Found a compile unit at offset 258, length 156 DIE DW_TAG_subprogram. attr DW_AT_frame_base. - LocationEntry(entry_offset=0, begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> - LocationEntry(entry_offset=20, begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>> - LocationEntry(entry_offset=40, begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>> + LocationEntry(entry_offset=0, entry_length=20, begin_offset=0, end_offset=1, loc_expr=[119, 8], is_absolute=False) <<(DW_OP_breg7 (rsp): 8)>> + LocationEntry(entry_offset=20, entry_length=20, begin_offset=1, end_offset=4, loc_expr=[119, 16], is_absolute=False) <<(DW_OP_breg7 (rsp): 16)>> + LocationEntry(entry_offset=40, entry_length=20, begin_offset=4, end_offset=43, loc_expr=[118, 16], is_absolute=False) <<(DW_OP_breg6 (rbp): 16)>> DIE DW_TAG_formal_parameter. attr DW_AT_location. (DW_OP_fbreg: -20) DIE DW_TAG_formal_parameter. attr DW_AT_location. @@ -18,16 +18,16 @@ Processing file: ./examples/sample_exe64.elf DIE DW_TAG_subprogram. attr DW_AT_frame_base. (DW_OP_breg7 (rsp): 8) DIE DW_TAG_subprogram. attr DW_AT_frame_base. - LocationEntry(entry_offset=76, begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> - LocationEntry(entry_offset=96, begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>> + LocationEntry(entry_offset=76, entry_length=20, begin_offset=16, end_offset=64, loc_expr=[119, 8], is_absolute=False) <<(DW_OP_breg7 (rsp): 8)>> + LocationEntry(entry_offset=96, entry_length=21, begin_offset=64, end_offset=153, loc_expr=[119, 192, 0], is_absolute=False) <<(DW_OP_breg7 (rsp): 64)>> DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(entry_offset=133, begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>> - LocationEntry(entry_offset=152, begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>> + LocationEntry(entry_offset=133, entry_length=19, begin_offset=16, end_offset=85, loc_expr=[85], is_absolute=False) <<(DW_OP_reg5 (rdi))>> + LocationEntry(entry_offset=152, entry_length=19, begin_offset=85, end_offset=143, loc_expr=[94], is_absolute=False) <<(DW_OP_reg14 (r14))>> DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(entry_offset=187, begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>> - LocationEntry(entry_offset=206, begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>> + LocationEntry(entry_offset=187, entry_length=19, begin_offset=16, end_offset=85, loc_expr=[84], is_absolute=False) <<(DW_OP_reg4 (rsi))>> + LocationEntry(entry_offset=206, entry_length=19, begin_offset=85, end_offset=138, loc_expr=[93], is_absolute=False) <<(DW_OP_reg13 (r13))>> DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(entry_offset=241, begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>> - LocationEntry(entry_offset=260, begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>> + LocationEntry(entry_offset=241, entry_length=19, begin_offset=16, end_offset=85, loc_expr=[81], is_absolute=False) <<(DW_OP_reg1 (rdx))>> + LocationEntry(entry_offset=260, entry_length=19, begin_offset=85, end_offset=133, loc_expr=[92], is_absolute=False) <<(DW_OP_reg12 (r12))>> DIE DW_TAG_variable. attr DW_AT_location. - LocationEntry(entry_offset=295, begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>> + LocationEntry(entry_offset=295, entry_length=19, begin_offset=92, end_offset=123, loc_expr=[83], is_absolute=False) <<(DW_OP_reg3 (rbx))>> diff --git a/scripts/readelf.py b/scripts/readelf.py index a45ec30..6bd776b 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -62,7 +62,7 @@ from elftools.dwarf.descriptions import ( ) from elftools.dwarf.constants import ( DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file) -from elftools.dwarf.locationlists import LocationParser, LocationEntry +from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationViewPair, BaseAddressEntry from elftools.dwarf.callframe import CIE, FDE, ZERO from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry from elftools.dwarf.enums import ENUM_DW_UT @@ -1137,9 +1137,9 @@ class ReadElf(object): cu_filename = '%s/%s' % (bytes2str(dir), cu_filename) self._emitline('CU: %s:' % cu_filename) - self._emitline('File name Line number Starting address View Stmt' if ver5 - else 'File name Line number Starting address Stmt') - # What goes into View on V5? To be seen... + self._emitline('File name Line number Starting address Stmt') + # GNU readelf has a View column that we don't try to replicate + # The autotest has logic in place to ignore that # Print each state's file, line and address information. For some # instructions other output is needed to be compatible with @@ -1427,7 +1427,7 @@ class ReadElf(object): self._dwarfinfo.CFI_entries()) def _dump_debug_locations(self): - """ Dump the location lists from .debug_location section + """ Dump the location lists from .debug_loc/.debug_loclists section """ def _get_cu_base(cu): top_die = cu.get_top_DIE() @@ -1447,48 +1447,88 @@ class ReadElf(object): loc_lists = list(loc_lists.iter_location_lists()) if len(loc_lists) == 0: # Present but empty locations section - readelf outputs a message - self._emitline("\nSection '%s' has no debugging data." % di.debug_loc_sec.name) + self._emitline("\nSection '%s' has no debugging data." % (di.debug_loclists_sec or di.debug_loc_sec).name) return # To dump a location list, one needs to know the CU. - # Scroll through DIEs once, list the known location list offsets + # Scroll through DIEs once, list the known location list offsets. + # Don't need this CU/DIE scan if all entries are absolute or prefixed by base, + # but let's not optimize for that yet. cu_map = dict() # Loc list offset => CU for cu in di.iter_CUs(): for die in cu.iter_DIEs(): for key in die.attributes: attr = die.attributes[key] if (LocationParser.attribute_has_location(attr, cu['version']) and - not LocationParser._attribute_has_loc_expr(attr, cu['version'])): + LocationParser._attribute_has_loc_list(attr, cu['version'])): cu_map[attr.value] = cu addr_size = di.config.default_address_size # In bytes, 4 or 8 addr_width = addr_size * 2 # In hex digits, 8 or 16 line_template = " %%08x %%0%dx %%0%dx %%s%%s" % (addr_width, addr_width) - self._emitline('Contents of the %s section:\n' % di.debug_loc_sec.name) + self._emitline('Contents of the %s section:\n' % (di.debug_loclists_sec or di.debug_loc_sec).name) self._emitline(' Offset Begin End Expression') for loc_list in loc_lists: - cu = cu_map.get(loc_list[0].entry_offset, False) - if not cu: - raise ValueError("Location list can't be tracked to a CU") - base_ip = _get_cu_base(cu) + in_views = False + has_views = False + base_ip = None + loc_entry_count = 0 + cu = None for entry in loc_list: - # TODO: support BaseAddressEntry lines - expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset) - postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else '' - self._emitline(line_template % ( - entry.entry_offset, - base_ip + entry.begin_offset, - base_ip + entry.end_offset, - expr, - postfix)) + if isinstance(entry, LocationViewPair): + has_views = in_views = True + # The "v" before address is conditional in binutils, haven't figured out how + self._emitline(" %08x v%015x v%015x location view pair" % (entry.entry_offset, entry.begin, entry.end)) + else: + if in_views: + in_views = False + self._emitline("") + + # Need the CU for this loclist, but the map is keyed by the offset + # of the first entry in the loclist. Got to skip the views first. + if cu is None: + cu = cu_map.get(entry.entry_offset, False) + if not cu: + raise ValueError("Location list can't be tracked to a CU") + + if isinstance(entry, LocationEntry): + if base_ip is None and not entry.is_absolute: + base_ip = _get_cu_base(cu) + + begin_offset = (0 if entry.is_absolute else base_ip) + entry.begin_offset + end_offset = (0 if entry.is_absolute else base_ip) + entry.end_offset + expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset) + if has_views: + view = loc_list[loc_entry_count] + postfix = ' (start == end)' if entry.begin_offset == entry.end_offset and view.begin == view.end else '' + self._emitline(' %08x v%015x v%015x views at %08x for:' %( + entry.entry_offset, + view.begin, + view.end, + view.entry_offset)) + self._emitline(' %016x %016x %s%s' %( + begin_offset, + end_offset, + expr, + postfix)) + loc_entry_count += 1 + else: + postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else '' + self._emitline(line_template % ( + entry.entry_offset, + begin_offset, + end_offset, + expr, + postfix)) + elif isinstance(entry, BaseAddressEntry): + base_ip = entry.base_address + self._emitline(" %08x %016x (base address)" % (entry.entry_offset, entry.base_address)) + # Pyelftools doesn't store the terminating entry, # but readelf emits its offset, so this should too. last = loc_list[-1] - last_len = 2*addr_size - if isinstance(last, LocationEntry): - last_len += 2 + len(last.loc_expr) - self._emitline(" %08x " % (last.entry_offset + last_len)) + self._emitline(" %08x " % (last.entry_offset + last.entry_length)) def _display_arch_specific_arm(self): """ Display the ARM architecture-specific info contained in the file. diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index ceca7ec..56767c5 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -33,6 +33,9 @@ testlog.addHandler(logging.StreamHandler(sys.stdout)) # same minor release and keeping track is a headache. if platform.system() == "Darwin": # MacOS READELF_PATH = 'greadelf' +elif platform.system() == "Windows": + # Point the environment variable READELF at Cygwin's readelf.exe, or some other Windows build + READELF_PATH = os.environ.get('READELF', "readelf.exe") else: READELF_PATH = 'test/external_tools/readelf' if not os.path.exists(READELF_PATH): @@ -142,6 +145,9 @@ def compare_output(s1, s2): return False, 'Number of lines different: %s vs %s' % ( len(lines1), len(lines2)) + # Position of the View column in the output file, if parsing readelf..decodedline + # output, and the GNU readelf output contains the View column. Otherwise stays -1. + view_col_position = -1 for i in range(len(lines1)): if lines1[i].endswith('debug_line section:'): # .debug_line or .zdebug_line @@ -149,6 +155,23 @@ def compare_output(s1, s2): # readelf spelling error for GNU property notes lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type') + + # The view column position may change from CU to CU: + if view_col_position >= 0 and lines1[i].startswith('cu:'): + view_col_position = -1 + + # Check if readelf..decodedline output line contains the view column + if flag_in_debug_line_section and lines1[i].startswith('file name') and view_col_position < 0: + view_col_position = lines1[i].find("view") + stmt_col_position = lines1[i].find("stmt") + + # Excise the View column from the table, if any. + # View_col_position is only set to a nonzero number if one of the previous + # lines was a table header line with a "view" in it. + # We assume careful formatting on GNU readelf's part - View column values + # are not out of line with the View header. + if view_col_position >= 0 and not lines1[i].endswith(':'): + lines1[i] = lines1[i][:view_col_position] + lines1[i][stmt_col_position:] # Compare ignoring whitespace lines1_parts = lines1[i].split() @@ -169,16 +192,7 @@ def compare_output(s1, s2): sm = SequenceMatcher() sm.set_seqs(lines1[i], lines2[i]) changes = sm.get_opcodes() - if flag_in_debug_line_section: - # readelf outputs an additional "View" column: ignore it - if len(lines1_parts) >= 2 and lines1_parts[-2] == 'view': - ok = True - else: - # Fast check special-cased for the only ELF we have which - # has this information (dwarf_gnuops4.so.elf) - ok = ( lines1_parts[-2:] == ['1', 'x'] - and lines2_parts[-1] == 'x') - elif '[...]' in lines1[i]: + if '[...]' in lines1[i]: # Special case truncations with ellipsis like these: # .note.gnu.bu[...] redelf # .note.gnu.build-i pyelftools diff --git a/test/test_refaddr_bitness.py b/test/test_refaddr_bitness.py index ea01db6..c92e442 100644 --- a/test/test_refaddr_bitness.py +++ b/test/test_refaddr_bitness.py @@ -47,6 +47,8 @@ class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase): debug_addr_sec=None, debug_str_offsets_sec=None, debug_line_str_sec=None, + debug_loclists_sec = None, + debug_rnglists_sec = None ) CUs = [cu for cu in di.iter_CUs()] diff --git a/test/testfiles_for_readelf/dwarf_v5ops.so.elf b/test/testfiles_for_readelf/dwarf_v5ops.so.elf new file mode 100644 index 0000000..9da7825 Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_v5ops.so.elf differ