From: Seva Alekseyev Date: Fri, 8 Jul 2022 12:41:52 +0000 (-0400) Subject: Mixing v4 and v5 loclists and rangelists sections (#429) X-Git-Tag: v0.29~11 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9f8e99b9d9cb3cb3537f8ab5d170b22019d111bf;p=pyelftools.git Mixing v4 and v5 loclists and rangelists sections (#429) * More GNU note dumping * aranges fix for empty sections * Mixed v4/v5 sections. * Test for readelf * Comments, typo --- diff --git a/elftools/dwarf/aranges.py b/elftools/dwarf/aranges.py index 3f140f4..ae40975 100644 --- a/elftools/dwarf/aranges.py +++ b/elftools/dwarf/aranges.py @@ -56,8 +56,13 @@ class ARanges(object): #------ PRIVATE ------# - def _get_entries(self): + def _get_entries(self, need_empty=False): """ Populate self.entries with ARangeEntry tuples for each range of addresses + + Terminating null entries of CU blocks are not returned, unless + need_empty is set to True and the CU block contains nothing but + a null entry. The null entry will have both address and length + set to 0. """ self.stream.seek(0) entries = [] @@ -77,10 +82,15 @@ class ARanges(object): seek_to = int(math.ceil(fp/float(tuple_size)) * tuple_size) self.stream.seek(seek_to) + # We now have a binary with empty arange sections - nothing but a NULL entry. + # To keep compatibility with readelf, we need to return those. + # A two level list would be a prettier solution, but this will be compatible. + got_entries = False + # entries in this set/CU addr = struct_parse(addr_size('addr'), self.stream) length = struct_parse(addr_size('length'), self.stream) - while addr != 0 or length != 0: + while addr != 0 or length != 0 or (not got_entries and need_empty): # 'begin_addr length info_offset version address_size segment_size' entries.append( ARangeEntry(begin_addr=addr, @@ -90,8 +100,11 @@ class ARanges(object): version=aranges_header["version"], address_size=aranges_header["address_size"], segment_size=aranges_header["segment_size"])) - addr = struct_parse(addr_size('addr'), self.stream) - length = struct_parse(addr_size('length'), self.stream) + got_entries = True + if addr != 0 or length != 0: + addr = struct_parse(addr_size('addr'), self.stream) + length = struct_parse(addr_size('length'), self.stream) + # Segmentation exists in executable elif aranges_header["segment_size"] != 0: raise NotImplementedError("Segmentation not implemented") diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 7c7060e..1cc2001 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -19,8 +19,8 @@ from .compileunit import CompileUnit from .abbrevtable import AbbrevTable from .lineprogram import LineProgram from .callframe import CallFrameInfo -from .locationlists import LocationLists -from .ranges import RangeLists +from .locationlists import LocationLists, LocationListsPair +from .ranges import RangeLists, RangeListsPair from .aranges import ARanges from .namelut import NameLUT @@ -343,26 +343,32 @@ class DWARFInfo(object): return None def location_lists(self): - """ Get a LocationLists object representing the .debug_loc section of + """ Get a LocationLists object representing the .debug_loc/debug_loclists section of the DWARF data, or None if this section doesn't exist. + + If both sections exist, it returns a LocationListsPair. """ - if self.debug_loclists_sec: - assert(self.debug_loc_sec is None) # Are there ever files with both kinds of location sections? + if self.debug_loclists_sec and self.debug_loc_sec is None: return LocationLists(self.debug_loclists_sec.stream, self.structs, 5, self) - elif self.debug_loc_sec: - return LocationLists(self.debug_loc_sec.stream, self.structs) + elif self.debug_loc_sec and self.debug_loclists_sec is None: + return LocationLists(self.debug_loc_sec.stream, self.structs, 4, self) + elif self.debug_loc_sec and self.debug_loclists_sec: + return LocationListsPair(self.debug_loclists_sec.stream, self.debug_loclists_sec.stream, self.structs, self) else: return None def range_lists(self): - """ Get a RangeLists object representing the .debug_ranges section of + """ Get a RangeLists object representing the .debug_ranges/.debug_rnglists section of the DWARF data, or None if this section doesn't exist. + + If both sections exist, it returns a RangeListsPair. """ - if self.debug_rnglists_sec: - assert(self.debug_ranges_sec is None) + if self.debug_rnglists_sec and self.debug_ranges_sec is None: return RangeLists(self.debug_rnglists_sec.stream, self.structs, 5, self) - elif self.debug_ranges_sec: + elif self.debug_ranges_sec and self.debug_rnglists_sec is None: return RangeLists(self.debug_ranges_sec.stream, self.structs, 4, self) + elif self.debug_ranges_sec and self.debug_rnglists_sec: + return RangeListsPair(self.debug_ranges_sec.stream, self.debug_rnglists_sec.stream, self.structs, self) else: return None diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index eae55c5..0792d45 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -10,12 +10,42 @@ import os from collections import namedtuple from ..common.exceptions import DWARFError from ..common.utils import struct_parse +from .dwarf_util import _iter_CUs_in_section LocationExpr = namedtuple('LocationExpr', 'loc_expr') LocationEntry = namedtuple('LocationEntry', 'entry_offset entry_length begin_offset end_offset loc_expr is_absolute') BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset entry_length base_address') LocationViewPair = namedtuple('LocationViewPair', 'entry_offset begin end') +class LocationListsPair(object): + """For those binaries that contain both a debug_loc and a debug_loclists section, + it holds a LocationLists object for both and forwards API calls to the right one. + """ + def __init__(self, streamv4, streamv5, structs, dwarfinfo=None): + self._loc = LocationLists(streamv4, structs, 4, dwarfinfo) + self._loclists = LocationLists(streamv5, structs, 5, dwarfinfo) + + def get_location_list_at_offset(self, offset, die=None): + """See LocationLists.get_location_list_at_offset(). + """ + if die is None: + raise DWARFError("For this binary, \"die\" needs to be provided") + section = self._loclists if die.cu.version >= 5 else self._loc + return section.get_location_list_at_offset(offset, die) + + def iter_location_lists(self): + """Tricky proposition, since the structure of loc and loclists + is not identical. A realistic readelf implementation needs to be aware of both + """ + raise DWARFError("Iterating through two sections is not supported") + + def iter_CUs(self): + """See LocationLists.iter_CUs() + + There are no CUs in DWARFv4 sections. + """ + raise DWARFError("Iterating through two sections is not supported") + class LocationLists(object): """ A single location list is a Python list consisting of LocationEntry or BaseAddressEntry objects. @@ -57,7 +87,7 @@ class LocationLists(object): # Location lists are referenced by DIE attributes by offset or by index. # As of DWARFv5, it may contain, in addition to proper location lists, - #location list view pairs, which are referenced by the nonstandard DW_AT_GNU_locviews + # location list view pairs, which are referenced by the nonstandard DW_AT_GNU_locviews # attribute. A set of locview pairs (which is a couple of ULEB128 values) may preceed # a location list; the former is referenced by the DW_AT_GNU_locviews attribute, the # latter - by DW_AT_location (in the same DIE). Binutils' readelf dumps those. @@ -67,20 +97,21 @@ class LocationLists(object): # # Taking a cue from binutils, we would have to scan this section while looking at # what's in DIEs. + ver5 = self.version >= 5 stream = self.stream stream.seek(0, os.SEEK_END) endpos = stream.tell() stream.seek(0, os.SEEK_SET) - if self.version >= 5: - # Need to provide support for DW_AT_GNU_locviews. They are interspersed in - # the locations section, no way to tell where short of checking all DIEs - all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist - locviews = dict() # Map of locview offset to the respective loclist offset - cu_map = dict() # Map of loclist offsets to CUs - for cu in self.dwarfinfo.iter_CUs(): - cu_ver = cu['version'] + # Need to provide support for DW_AT_GNU_locviews. They are interspersed in + # the locations section, no way to tell where short of checking all DIEs + all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist + locviews = dict() # Map of locview offset to the respective loclist offset + cu_map = dict() # Map of loclist offsets to CUs + for cu in self.dwarfinfo.iter_CUs(): + cu_ver = cu['version'] + if (cu_ver >= 5) == ver5: for die in cu.iter_DIEs(): # A combination of location and locviews means there is a location list # preceed by several locview pairs @@ -96,15 +127,16 @@ class LocationLists(object): # Scan other attributes for location lists for key in die.attributes: attr = die.attributes[key] - if (key != 'DW_AT_location' and + if ((key != 'DW_AT_location' or 'DW_AT_GNU_locviews' not in die.attributes) and LocationParser.attribute_has_location(attr, cu_ver) and LocationParser._attribute_has_loc_list(attr, cu_ver)): list_offset = attr.value all_offsets.add(list_offset) cu_map[list_offset] = cu - all_offsets = list(all_offsets) - all_offsets.sort() + all_offsets = list(all_offsets) + all_offsets.sort() + if ver5: # Loclists section is organized as an array of CUs, each length prefixed. # We don't assume that the CUs go in the same order as the ones in info. offset_index = 0 @@ -133,9 +165,22 @@ class LocationLists(object): next_offset = cu_end_offset # And implicitly quit the loop within the CU stream.seek(next_offset, os.SEEK_SET) else: - # Just call _parse_location_list_from_stream until the stream ends - while stream.tell() < endpos: - yield self._parse_location_list_from_stream() + for offset in all_offsets: + list_offset = locviews.get(offset, offset) + if cu_map[list_offset].header.version < 5: + stream.seek(offset, os.SEEK_SET) + locview_pairs = self._parse_locview_pairs(locviews) + entries = self._parse_location_list_from_stream() + yield locview_pairs + entries + + def iter_CUs(self): + """For DWARF5 returns an array of objects, where each one has an array of offsets + """ + if self.version < 5: + raise DWARFError("CU iteration in loclists is not supported with DWARF<5") + + structs = next(self.dwarfinfo.iter_CUs()).structs # Just pick one + return _iter_CUs_in_section(self.stream, structs, structs.Dwarf_loclists_CU_header) #------ PRIVATE ------# diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index e5c9fde..0ed5545 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -31,6 +31,51 @@ entry_translate = { 'DW_RLE_startx_length': not_implemented } +class RangeListsPair(object): + """For those binaries that contain both a debug_ranges and a debug_rnglists section, + it holds a RangeLists object for both and forwards API calls to the right one based + on the CU version. + """ + def __init__(self, streamv4, streamv5, structs, dwarfinfo=None): + self._ranges = RangeLists(streamv4, structs, 4, dwarfinfo) + self._rnglists = RangeLists(streamv5, structs, 5, dwarfinfo) + + def get_range_list_at_offset(self, offset, cu=None): + """Forwards the call to either v4 section or v5 one, + depending on DWARF version in the CU. + """ + if cu is None: + raise DWARFError("For this binary, \"cu\" needs to be provided") + section = self._rnglists if cu.header.version >= 5 else self._ranges + return section.get_range_list_at_offset(offset, cu) + + def get_range_list_at_offset_ex(self, offset): + """Gets an untranslated v5 rangelist from the v5 section. + """ + return self._rnglists.get_range_list_at_offset_ex(offset) + + def iter_range_lists(self): + """Tricky proposition, since the structure of ranges and rnglists + is not identical. A realistic readelf implementation needs to be aware of both. + """ + raise DWARFError("Iterating through two sections is not supported") + + def iter_CUs(self): + """See RangeLists.iter_CUs() + + CU structure is only present in DWARFv5 rnglists sections. A well written + section dumper should check if one is present. + """ + return self._rnglists.iter_CUs() + + def iter_CU_range_lists_ex(self, cu): + """See RangeLists.iter_CU_range_lists_ex() + + CU structure is only present in DWARFv5 rnglists sections. A well written + section dumper should check if one is present. + """ + return self._rnglists.iter_CU_range_lists_ex(cu) + class RangeLists(object): """ A single range list is a Python list consisting of RangeEntry or BaseAddressEntry objects. @@ -50,7 +95,7 @@ class RangeLists(object): self.version = version self._dwarfinfo = dwarfinfo - def get_range_list_at_offset(self, offset): + def get_range_list_at_offset(self, offset, cu=None): """ Get a range list at the given offset in the section. """ self.stream.seek(offset, os.SEEK_SET) @@ -63,14 +108,16 @@ class RangeLists(object): return struct_parse(self.structs.Dwarf_rnglists_entries, self.stream, offset) def iter_range_lists(self): - """ Yield all range lists found in the section. + """ Yield all range lists found in the section according to readelf rules. + Scans the DIEs for rangelist offsets, then pulls those. """ # Calling parse until the stream ends is wrong, because ranges can overlap. # Need to scan the DIEs to know all range locations + ver5 = self.version >= 5 all_offsets = list(set(die.attributes['DW_AT_ranges'].value for cu in self._dwarfinfo.iter_CUs() for die in cu.iter_DIEs() - if 'DW_AT_ranges' in die.attributes)) + if 'DW_AT_ranges' in die.attributes and (cu.header.version >= 5) == ver5)) all_offsets.sort() for offset in all_offsets: diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index 4ac33c1..38c80b6 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -259,12 +259,19 @@ def describe_note_gnu_property_x86_feature_1(value): descs.append(desc) return 'x86 feature: ' + ', '.join(descs) -def describe_note_gnu_property_x86_isa_1(value): +def describe_note_gnu_property_x86_feature_2_used(value): + descs = [] + for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_2_FLAGS: + if value & mask: + descs.append(desc) + return 'x86 feature used: ' + ', '.join(descs) + +def describe_note_gnu_property_x86_isa_1(value, verb): descs = [] for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS: if value & mask: descs.append(desc) - return 'x86 ISA needed: ' + ', '.join(descs) + return 'x86 ISA %s: %s' % (verb, ', '.join(descs)) def describe_note_gnu_properties(properties): descriptions = [] @@ -285,11 +292,21 @@ def describe_note_gnu_properties(properties): prop_desc = ' ' % sz else: prop_desc = describe_note_gnu_property_x86_feature_1(d) + elif t == 'GNU_PROPERTY_X86_FEATURE_2_USED': + if sz != 4: + prop_desc = ' ' % sz + else: + prop_desc = describe_note_gnu_property_x86_feature_2_used(d) elif t == 'GNU_PROPERTY_X86_ISA_1_NEEDED': if sz != 4: prop_desc = ' ' % sz else: - prop_desc = describe_note_gnu_property_x86_isa_1(d) + prop_desc = describe_note_gnu_property_x86_isa_1(d, "needed") + elif t == 'GNU_PROPERTY_X86_ISA_1_USED': + if sz != 4: + prop_desc = ' ' % sz + else: + prop_desc = describe_note_gnu_property_x86_isa_1(d, "used") elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC: prop_desc = '' % (t, bytes2hex(d, sep=' ')) elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER: @@ -615,6 +632,17 @@ _DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS = ( (8, 'LAM_U57'), ) +# Bit masks for GNU_PROPERTY_X86_FEATURE_2_xxx flags in the form +# (mask, flag_description) in the desired output order +_DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_2_FLAGS = ( + (1, 'x86'), + (2, 'x87'), + (4, 'MMX'), + (8, 'XMM'), + (16, 'YMM'), + (32, 'ZMM'), +) + # Same for GNU_PROPERTY_X86_SET_1_xxx _DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS = ( (1, 'x86-64-baseline'), diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py index a5855c2..745aefc 100644 --- a/elftools/elf/enums.py +++ b/elftools/elf/enums.py @@ -878,6 +878,8 @@ ENUM_NOTE_GNU_PROPERTY_TYPE = dict( GNU_PROPERTY_NO_COPY_ON_PROTECTED=2, GNU_PROPERTY_X86_FEATURE_1_AND=0xc0000002, GNU_PROPERTY_X86_ISA_1_NEEDED=0xc0008002, + GNU_PROPERTY_X86_FEATURE_2_USED=0xc0010001, + GNU_PROPERTY_X86_ISA_1_USED=0xc0010002, _default_=Pass, ) diff --git a/scripts/readelf.py b/scripts/readelf.py index 3895226..2095c91 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -62,9 +62,8 @@ from elftools.dwarf.descriptions import ( ) from elftools.dwarf.constants import ( DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file) -from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationViewPair, BaseAddressEntry -from elftools.dwarf.ranges import RangeEntry # ranges.BaseAddressEntry collides with the one above -import elftools.dwarf.ranges +from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationViewPair, BaseAddressEntry as LocBaseAddressEntry, LocationListsPair +from elftools.dwarf.ranges import RangeEntry, BaseAddressEntry as RangeBaseAddressEntry, RangeListsPair from elftools.dwarf.callframe import CIE, FDE, ZERO from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry from elftools.dwarf.enums import ENUM_DW_UT @@ -76,6 +75,24 @@ def _get_cu_base(cu): return attr['DW_AT_low_pc'].value elif 'DW_AT_entry_pc' in attr: return attr['DW_AT_entry_pc'].value + elif 'DW_AT_ranges' in attr: + # Rare case but happens: rangelist in the top DIE. + # If there is a base or at least one absolute entry, + # this will give us the base IP for the CU. + rl = cu.dwarfinfo.range_lists().get_range_list_at_offset(attr['DW_AT_ranges'].value, cu) + base_ip = None + for r in rl: + if isinstance(r, RangeBaseAddressEntry): + ip = r.base_address + elif isinstance(r, RangeEntry) and r.is_absolute: + ip = r.begin_offset + else: + ip = None + if ip is not None and (base_ip is None or ip < base_ip): + base_ip = ip + if base_ip is None: + raise ValueError("Can't find the base IP (low_pc) for a CU") + return base_ip else: raise ValueError("Can't find the base IP (low_pc) for a CU") @@ -1178,14 +1195,13 @@ class ReadElf(object): '0' if state.address == 0 else self._format_hex(state.address), 'x' if state.is_stmt and not state.end_sequence else '')) else: - # What's the deal with op_index after address on DWARF 5? Is omitting it - # a function of DWARF version, or ISA, or what? - # Used to be unconditional, even on non-VLIW machines. + # In readelf, on non-VLIW machines there is no op_index postfix after address. + # It used to be unconditional. self._emitline('%-35s %s %18s%s %s' % ( bytes2str(lineprogram['file_entry'][state.file - 1].name), "%11d" % (state.line,) if not state.end_sequence else '-', '0' if state.address == 0 else self._format_hex(state.address), - '' if ver5 else '[%d]' % (state.op_index,), + '' if lineprogram.header.maximum_operations_per_instruction == 1 else '[%d]' % (state.op_index,), 'x' if state.is_stmt and not state.end_sequence else '')) if entry.command == DW_LNS_copy: # Another readelf oddity... @@ -1296,8 +1312,11 @@ class ReadElf(object): aranges_table = self._dwarfinfo.get_aranges() if aranges_table == None: return - # seems redundent, but we need to get the unsorted set of entries to match system readelf - unordered_entries = aranges_table._get_entries() + # Seems redundant, but we need to get the unsorted set of entries + # to match system readelf. + # Also, sometimes there are blank sections in aranges, but readelf + # dumps them, so we should too. + unordered_entries = aranges_table._get_entries(need_empty=True) if len(unordered_entries) == 0: self._emitline() @@ -1320,9 +1339,10 @@ class ReadElf(object): self._emitline(' Segment Size: %d' % (entry.segment_size)) self._emitline() self._emitline(' Address Length') - self._emitline(' %s %s' % ( - self._format_hex(entry.begin_addr, fullhex=True, lead0x=False), - self._format_hex(entry.length, fullhex=True, lead0x=False))) + if entry.begin_addr != 0 or entry.length != 0: + self._emitline(' %s %s' % ( + self._format_hex(entry.begin_addr, fullhex=True, lead0x=False), + self._format_hex(entry.length, fullhex=True, lead0x=False))) prev_offset = entry.info_offset self._emitline(' %s %s' % ( self._format_hex(0, fullhex=True, lead0x=False), @@ -1440,15 +1460,21 @@ class ReadElf(object): """ Dump the location lists from .debug_loc/.debug_loclists section """ di = self._dwarfinfo - loc_lists = di.location_lists() - if not loc_lists: # No locations section - readelf outputs nothing + loc_lists_sec = di.location_lists() + if not loc_lists_sec: # No locations section - readelf outputs nothing return - loc_lists = list(loc_lists.iter_location_lists()) - if len(loc_lists) == 0: - # Present but empty locations section - readelf outputs a message - self._emitline("\nSection '%s' has no debugging data." % (di.debug_loclists_sec or di.debug_loc_sec).name) - return + if isinstance(loc_lists_sec, LocationListsPair): + self._dump_debug_locsection(di, loc_lists_sec._loc) + self._dump_debug_locsection(di, loc_lists_sec._loclists) + else: + self._dump_debug_locsection(di, loc_lists_sec) + + def _dump_debug_locsection(self, di, loc_lists_sec): + """ Dump the location lists from .debug_loc/.debug_loclists section + """ + ver5 = loc_lists_sec.version >= 5 + section_name = (di.debug_loclists_sec if ver5 else di.debug_loc_sec).name # To dump a location list, one needs to know the CU. # Scroll through DIEs once, list the known location list offsets. @@ -1467,81 +1493,106 @@ class ReadElf(object): addr_width = addr_size * 2 # In hex digits, 8 or 16 line_template = " %%08x %%0%dx %%0%dx %%s%%s" % (addr_width, addr_width) - self._emitline('Contents of the %s section:\n' % (di.debug_loclists_sec or di.debug_loc_sec).name) + loc_lists = list(loc_lists_sec.iter_location_lists()) + if len(loc_lists) == 0: + # Present but empty locations section - readelf outputs a message + self._emitline("\nSection '%s' has no debugging data." % (section_name,)) + return + + self._emitline('Contents of the %s section:\n' % (section_name,)) self._emitline(' Offset Begin End Expression') for loc_list in loc_lists: - in_views = False - has_views = False - base_ip = None - loc_entry_count = 0 - cu = None - for entry in loc_list: - if isinstance(entry, LocationViewPair): - has_views = in_views = True - # The "v" before address is conditional in binutils, haven't figured out how - self._emitline(" %08x v%015x v%015x location view pair" % (entry.entry_offset, entry.begin, entry.end)) - else: - if in_views: - in_views = False - self._emitline("") - - # Need the CU for this loclist, but the map is keyed by the offset - # of the first entry in the loclist. Got to skip the views first. - if cu is None: - cu = cu_map.get(entry.entry_offset, False) - if not cu: - raise ValueError("Location list can't be tracked to a CU") - - if isinstance(entry, LocationEntry): - if base_ip is None and not entry.is_absolute: - base_ip = _get_cu_base(cu) - - begin_offset = (0 if entry.is_absolute else base_ip) + entry.begin_offset - end_offset = (0 if entry.is_absolute else base_ip) + entry.end_offset - expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset) - if has_views: - view = loc_list[loc_entry_count] - postfix = ' (start == end)' if entry.begin_offset == entry.end_offset and view.begin == view.end else '' - self._emitline(' %08x v%015x v%015x views at %08x for:' %( - entry.entry_offset, - view.begin, - view.end, - view.entry_offset)) - self._emitline(' %016x %016x %s%s' %( - begin_offset, - end_offset, - expr, - postfix)) - loc_entry_count += 1 - else: - postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else '' - self._emitline(line_template % ( - entry.entry_offset, - begin_offset, - end_offset, - expr, - postfix)) - elif isinstance(entry, BaseAddressEntry): - base_ip = entry.base_address - self._emitline(" %08x %016x (base address)" % (entry.entry_offset, entry.base_address)) - - # Pyelftools doesn't store the terminating entry, - # but readelf emits its offset, so this should too. - last = loc_list[-1] - self._emitline(" %08x " % (last.entry_offset + last.entry_length)) + self._dump_loclist(loc_list, line_template, cu_map) + + def _dump_loclist(self, loc_list, line_template, cu_map): + in_views = False + has_views = False + base_ip = None + loc_entry_count = 0 + cu = None + for entry in loc_list: + if isinstance(entry, LocationViewPair): + has_views = in_views = True + # The "v" before address is conditional in binutils, haven't figured out how + self._emitline(" %08x v%015x v%015x location view pair" % (entry.entry_offset, entry.begin, entry.end)) + else: + if in_views: + in_views = False + self._emitline("") + + # Readelf quirk: indexed loclists don't show the real base IP + if cu_map is None: + base_ip = 0 + elif cu is None: + cu = cu_map.get(entry.entry_offset, False) + if not cu: + raise ValueError("Location list can't be tracked to a CU") + + if isinstance(entry, LocationEntry): + if base_ip is None and not entry.is_absolute: + base_ip = _get_cu_base(cu) + + begin_offset = (0 if entry.is_absolute else base_ip) + entry.begin_offset + end_offset = (0 if entry.is_absolute else base_ip) + entry.end_offset + expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset) + if has_views: + view = loc_list[loc_entry_count] + postfix = ' (start == end)' if entry.begin_offset == entry.end_offset and view.begin == view.end else '' + self._emitline(' %08x v%015x v%015x views at %08x for:' %( + entry.entry_offset, + view.begin, + view.end, + view.entry_offset)) + self._emitline(' %016x %016x %s%s' %( + begin_offset, + end_offset, + expr, + postfix)) + loc_entry_count += 1 + else: + postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else '' + self._emitline(line_template % ( + entry.entry_offset, + begin_offset, + end_offset, + expr, + postfix)) + elif isinstance(entry, LocBaseAddressEntry): + base_ip = entry.base_address + self._emitline(" %08x %016x (base address)" % (entry.entry_offset, entry.base_address)) + + # Pyelftools doesn't store the terminating entry, + # but readelf emits its offset, so this should too. + last = loc_list[-1] + self._emitline(" %08x " % (last.entry_offset + last.entry_length)) def _dump_debug_ranges(self): # TODO: GNU readelf format doesn't need entry_length? di = self._dwarfinfo - range_lists = di.range_lists() - if not range_lists: # No ranges section - readelf outputs nothing + range_lists_sec = di.range_lists() + if not range_lists_sec: # No ranges section - readelf outputs nothing return - ver5 = range_lists.version >= 5 - range_lists = list(range_lists.iter_range_lists()) + if isinstance(range_lists_sec, RangeListsPair): + self._dump_debug_rangesection(di, range_lists_sec._ranges) + self._dump_debug_rangesection(di, range_lists_sec._rnglists) + else: + self._dump_debug_rangesection(di, range_lists_sec) + + def _dump_debug_rangesection(self, di, range_lists_sec): + # In the master branch of binutils, the v5 dump format is way different by now. + + ver5 = range_lists_sec.version >= 5 + section_name = (di.debug_rnglists_sec if ver5 else di.debug_ranges_sec).name + addr_size = di.config.default_address_size # In bytes, 4 or 8 + addr_width = addr_size * 2 # In hex digits, 8 or 16 + line_template = " %%08x %%0%dx %%0%dx %%s" % (addr_width, addr_width) + base_template = " %%08x %%0%dx (base address)" % (addr_width) + + range_lists = list(range_lists_sec.iter_range_lists()) if len(range_lists) == 0: # Present but empty locations section - readelf outputs a message - self._emitline("\nSection '%s' has no debugging data." % (di.debug_rnglists_sec or di.debug_ranges_sec).name) + self._emitline("\nSection '%s' has no debugging data." % section_name) return # In order to determine the base address of the range @@ -1551,36 +1602,34 @@ class ReadElf(object): for die in cu.iter_DIEs() if 'DW_AT_ranges' in die.attributes} - addr_size = di.config.default_address_size # In bytes, 4 or 8 - addr_width = addr_size * 2 # In hex digits, 8 or 16 - line_template = " %%08x %%0%dx %%0%dx %%s" % (addr_width, addr_width) - base_template = " %%08x %%0%dx (base address)" % (addr_width) - - self._emitline('Contents of the %s section:\n' % (di.debug_rnglists_sec or di.debug_ranges_sec).name) + self._emitline('Contents of the %s section:\n' % section_name) self._emitline(' Offset Begin End') for range_list in range_lists: - # Weird discrepancy in binutils: for DWARFv5 it outputs entry offset, - # for DWARF<=4 list offset. - first = range_list[0] - base_ip = _get_cu_base(cu_map[first.entry_offset]) - for entry in range_list: - if isinstance(entry, RangeEntry): - postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else '' - self._emitline(line_template % ( - entry.entry_offset if ver5 else first.entry_offset, - (0 if entry.is_absolute else base_ip) + entry.begin_offset, - (0 if entry.is_absolute else base_ip) + entry.end_offset, - postfix)) - elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry): - base_ip = entry.base_address - self._emitline(base_template % ( - entry.entry_offset if ver5 else first.entry_offset, - entry.base_address)) - else: - raise NotImplementedError("Unknown object in a range list") - last = range_list[-1] - self._emitline(' %08x ' % (last.entry_offset + last.entry_length if ver5 else first.entry_offset)) + self._dump_rangelist(range_list, cu_map, ver5, line_template, base_template) + + def _dump_rangelist(self, range_list, cu_map, ver5, line_template, base_template): + # Weird discrepancy in binutils: for DWARFv5 it outputs entry offset, + # for DWARF<=4 list offset. + first = range_list[0] + base_ip = _get_cu_base(cu_map[first.entry_offset]) + for entry in range_list: + if isinstance(entry, RangeEntry): + postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else '' + self._emitline(line_template % ( + entry.entry_offset if ver5 else first.entry_offset, + (0 if entry.is_absolute else base_ip) + entry.begin_offset, + (0 if entry.is_absolute else base_ip) + entry.end_offset, + postfix)) + elif isinstance(entry,RangeBaseAddressEntry): + base_ip = entry.base_address + self._emitline(base_template % ( + entry.entry_offset if ver5 else first.entry_offset, + entry.base_address)) + else: + raise NotImplementedError("Unknown object in a range list") + last = range_list[-1] + self._emitline(' %08x ' % (last.entry_offset + last.entry_length if ver5 else first.entry_offset)) def _display_arch_specific_arm(self): """ Display the ARM architecture-specific info contained in the file. diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index ad56f4e..c1fc48c 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -88,7 +88,10 @@ def run_test_on_file(filename, verbose=False, opt=None): # patched from 0x07 0x10 to 00 00. # Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction # from "DW_CFA_undefined 16" to two NOPs. - # GNU readelf had a bug here, had to work around. See PR #411. + # GNU readelf 2.38 had a bug here, had to work around: + # https://sourceware.org/bugzilla/show_bug.cgi?id=29250 + # It's been fixed in the binutils' master since, but the latest master will break a lot. + # Same patch in dwarf_test_versions_mix.elf at 0x2061: 07 10 -> 00 00 # stdouts will be a 2-element list: output of readelf and output # of scripts/readelf.py diff --git a/test/testfiles_for_readelf/dwarf_test_versions_mix.elf b/test/testfiles_for_readelf/dwarf_test_versions_mix.elf new file mode 100644 index 0000000..6ae3333 Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_test_versions_mix.elf differ