From e94aca1bad327a83463487d3d349eeea60a825f5 Mon Sep 17 00:00:00 2001 From: Seva Alekseyev Date: Fri, 17 Jun 2022 09:09:32 -0400 Subject: [PATCH] Support for DWARFv5 debug_rnglists section (#419) * Pre-DWARFv5 range section dumping, Ranges in readelf autotest * DWARFv5 rnglists section support * Autotest fixes * Misleading comment * Version, dwarfinfo now required in RangeLists constructor --- elftools/dwarf/dwarfinfo.py | 9 +- elftools/dwarf/enums.py | 11 +++ elftools/dwarf/ranges.py | 89 +++++++++++++------ elftools/dwarf/structs.py | 24 ++++- .../reference_output/dwarf_range_lists.out | 2 +- scripts/readelf.py | 77 +++++++++++++--- test/run_readelf_tests.py | 3 +- 7 files changed, 171 insertions(+), 44 deletions(-) diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 8dc7028..7c7060e 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -102,7 +102,7 @@ class DWARFInfo(object): self.debug_pubtypes_sec = debug_pubtypes_sec self.debug_pubnames_sec = debug_pubnames_sec self.debug_loclists_sec = debug_loclists_sec - self.debug_rnglists_sec = debug_rnglists_sec # Ignored for now + self.debug_rnglists_sec = debug_rnglists_sec # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them @@ -358,8 +358,11 @@ class DWARFInfo(object): """ Get a RangeLists object representing the .debug_ranges section of the DWARF data, or None if this section doesn't exist. """ - if self.debug_ranges_sec: - return RangeLists(self.debug_ranges_sec.stream, self.structs) + if self.debug_rnglists_sec: + assert(self.debug_ranges_sec is None) + return RangeLists(self.debug_rnglists_sec.stream, self.structs, 5, self) + elif self.debug_ranges_sec: + return RangeLists(self.debug_ranges_sec.stream, self.structs, 4, self) else: return None diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py index c38ebe0..c0ae429 100644 --- a/elftools/dwarf/enums.py +++ b/elftools/dwarf/enums.py @@ -429,3 +429,14 @@ ENUM_DW_LLE = dict( DW_LLE_start_end = 0x07, DW_LLE_start_length = 0x08 ) + +ENUM_DW_RLE = dict( + DW_RLE_end_of_list = 0x00, + DW_RLE_base_addressx = 0x01, + DW_RLE_startx_endx = 0x02, + DW_RLE_startx_length = 0x03, + DW_RLE_offset_pair = 0x04, + DW_RLE_base_address = 0x05, + DW_RLE_start_end = 0x06, + DW_RLE_start_length = 0x07 +) diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index 5f99473..e5476de 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -12,18 +12,41 @@ from collections import namedtuple from ..common.utils import struct_parse -RangeEntry = namedtuple('RangeEntry', 'begin_offset end_offset') -BaseAddressEntry = namedtuple('BaseAddressEntry', 'base_address') +RangeEntry = namedtuple('RangeEntry', 'entry_offset entry_length begin_offset end_offset is_absolute') +BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address') +def not_implemented(e): + raise NotImplementedError("Range list entry %s is not supported yet" % (e.entry_type,)) + +# Maps parsed entry types to RangeEntry/BaseAddressEntry objects +entry_translate = { + 'DW_RLE_base_address' : lambda e: BaseAddressEntry(e.entry_offset, e.address), + 'DW_RLE_offset_pair' : lambda e: RangeEntry(e.entry_offset, e.entry_length, e.start_offset, e.end_offset, False), + 'DW_RLE_start_end' : lambda e: RangeEntry(e.entry_offset, e.entry_length, e.start_address, e.end_address, True), + 'DW_RLE_start_length' : lambda e: RangeEntry(e.entry_offset, e.entry_length, e.start_address, e.start_address + e.length, True), + 'DW_RLE_base_addressx': not_implemented, + 'DW_RLE_startx_endx' : not_implemented, + 'DW_RLE_startx_length': not_implemented +} class RangeLists(object): """ A single range list is a Python list consisting of RangeEntry or BaseAddressEntry objects. + + Since v0.29, two new parameters - version and dwarfinfo + + version is used to distinguish DWARFv5 rnglists section from + the DWARF<=4 ranges section. Only the 4/5 distinction matters. + + The dwarfinfo is needed for enumeration, because enumeration + requires scanning the DIEs, because ranges may overlap, even on DWARF<=4 """ - def __init__(self, stream, structs): + def __init__(self, stream, structs, version, dwarfinfo): self.stream = stream self.structs = structs self._max_addr = 2 ** (self.structs.address_size * 8) - 1 + self.version = version + self._dwarfinfo = dwarfinfo def get_range_list_at_offset(self, offset): """ Get a range list at the given offset in the section. @@ -34,32 +57,44 @@ class RangeLists(object): def iter_range_lists(self): """ Yield all range lists found in the section. """ - # Just call _parse_range_list_from_stream until the stream ends - self.stream.seek(0, os.SEEK_END) - endpos = self.stream.tell() + # Calling parse until the stream ends is wrong, because ranges can overlap. + # Need to scan the DIEs to know all range locations + all_offsets = list(set(die.attributes['DW_AT_ranges'].value + for cu in self._dwarfinfo.iter_CUs() + for die in cu.iter_DIEs() + if 'DW_AT_ranges' in die.attributes)) + all_offsets.sort() - self.stream.seek(0, os.SEEK_SET) - while self.stream.tell() < endpos: - yield self._parse_range_list_from_stream() + for offset in all_offsets: + yield self.get_range_list_at_offset(offset) #------ PRIVATE ------# def _parse_range_list_from_stream(self): - lst = [] - while True: - begin_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) - end_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) - if begin_offset == 0 and end_offset == 0: - # End of list - we're done. - break - elif begin_offset == self._max_addr: - # Base address selection entry - lst.append(BaseAddressEntry(base_address=end_offset)) - else: - # Range entry - lst.append(RangeEntry( - begin_offset=begin_offset, - end_offset=end_offset)) - return lst + if self.version >= 5: + return list(entry_translate[entry.entry_type](entry) + for entry + in struct_parse(self.structs.Dwarf_rnglists_entries, self.stream)) + else: + lst = [] + while True: + entry_offset = self.stream.tell() + begin_offset = struct_parse( + self.structs.Dwarf_target_addr(''), self.stream) + end_offset = struct_parse( + self.structs.Dwarf_target_addr(''), self.stream) + if begin_offset == 0 and end_offset == 0: + # End of list - we're done. + break + elif begin_offset == self._max_addr: + # Base address selection entry + lst.append(BaseAddressEntry(entry_offset=entry_offset, base_address=end_offset)) + else: + # Range entry + lst.append(RangeEntry( + entry_offset=entry_offset, + entry_length=self.stream.tell() - entry_offset, + begin_offset=begin_offset, + end_offset=end_offset, + is_absolute=False)) + return lst diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index a1a286b..5aa4a12 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -13,7 +13,7 @@ from ..construct import ( SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray, CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence, - Switch + Switch, Value ) from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128, StreamOffset) @@ -144,6 +144,7 @@ class DWARFStructs(object): self._create_string_offsets_table_header() self._create_address_table_header() self._create_loclists_parsers() + self._create_rnglists_parsers() def _create_initial_length(self): def _InitialLength(name): @@ -434,6 +435,27 @@ class DWARFStructs(object): self.Dwarf_locview_pair = Struct('locview_pair', StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end')) + def _create_rnglists_parsers(self): + self.Dwarf_rnglists_entries = RepeatUntilExcluding( + lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list', + Struct('entry', + StreamOffset('entry_offset'), + Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_RLE), + Embed(Switch('', lambda ctx: ctx.entry_type, + { + 'DW_RLE_end_of_list' : Struct('end_of_list'), + 'DW_RLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')), + 'DW_RLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index')), + 'DW_RLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length')), + 'DW_RLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset')), + 'DW_RLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')), + 'DW_RLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address')), + 'DW_RLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length')) + })), + StreamOffset('entry_end_offset'), + Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset))) + + class _InitialLengthAdapter(Adapter): """ A standard Construct adapter that expects a sub-construct as a struct with one or two values (first, second). diff --git a/examples/reference_output/dwarf_range_lists.out b/examples/reference_output/dwarf_range_lists.out index f8939b8..2b800c5 100644 --- a/examples/reference_output/dwarf_range_lists.out +++ b/examples/reference_output/dwarf_range_lists.out @@ -4,4 +4,4 @@ Processing file: ./examples/sample_exe64.elf Found a compile unit at offset 258, length 156 Found a compile unit at offset 418, length 300 DIE DW_TAG_lexical_block. attr DW_AT_ranges. -[RangeEntry(begin_offset=26, end_offset=40), RangeEntry(begin_offset=85, end_offset=118), RangeEntry(begin_offset=73, end_offset=77), RangeEntry(begin_offset=64, end_offset=67)] +[RangeEntry(entry_offset=0, entry_length=16, begin_offset=26, end_offset=40, is_absolute=False), RangeEntry(entry_offset=16, entry_length=16, begin_offset=85, end_offset=118, is_absolute=False), RangeEntry(entry_offset=32, entry_length=16, begin_offset=73, end_offset=77, is_absolute=False), RangeEntry(entry_offset=48, entry_length=16, begin_offset=64, end_offset=67, is_absolute=False)] diff --git a/scripts/readelf.py b/scripts/readelf.py index 6bd776b..070032a 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -63,10 +63,21 @@ from elftools.dwarf.descriptions import ( from elftools.dwarf.constants import ( DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file) from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationViewPair, BaseAddressEntry +from elftools.dwarf.ranges import RangeEntry # ranges.BaseAddressEntry collides with the one above +import elftools.dwarf.ranges from elftools.dwarf.callframe import CIE, FDE, ZERO from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry from elftools.dwarf.enums import ENUM_DW_UT +def _get_cu_base(cu): + top_die = cu.get_top_DIE() + attr = top_die.attributes + if 'DW_AT_low_pc' in attr: + return attr['DW_AT_low_pc'].value + elif 'DW_AT_entry_pc' in attr: + return attr['DW_AT_entry_pc'].value + else: + raise ValueError("Can't find the base IP (low_pc) for a CU") class ReadElf(object): """ display_* methods are used to emit output into the output stream @@ -859,6 +870,8 @@ class ReadElf(object): self._dump_debug_namelut(dump_what) elif dump_what == 'loc': self._dump_debug_locations() + elif dump_what == 'Ranges': + self._dump_debug_ranges() else: self._emitline('debug dump not yet supported for "%s"' % dump_what) @@ -1429,16 +1442,6 @@ class ReadElf(object): def _dump_debug_locations(self): """ Dump the location lists from .debug_loc/.debug_loclists section """ - def _get_cu_base(cu): - top_die = cu.get_top_DIE() - attr = top_die.attributes - if 'DW_AT_low_pc' in attr: - return attr['DW_AT_low_pc'].value - elif 'DW_AT_entry_pc' in attr: - return attr['DW_AT_entry_pc'].value - else: - raise ValueError("Can't find the base IP (low_pc) for a CU") - di = self._dwarfinfo loc_lists = di.location_lists() if not loc_lists: # No locations section - readelf outputs nothing @@ -1530,6 +1533,58 @@ class ReadElf(object): last = loc_list[-1] self._emitline(" %08x " % (last.entry_offset + last.entry_length)) + def _dump_debug_ranges(self): + # TODO: GNU readelf format doesn't need entry_length? + di = self._dwarfinfo + range_lists = di.range_lists() + if not range_lists: # No ranges section - readelf outputs nothing + return + + ver5 = range_lists.version >= 5 + range_lists = list(range_lists.iter_range_lists()) + if len(range_lists) == 0: + # Present but empty locations section - readelf outputs a message + self._emitline("\nSection '%s' has no debugging data." % (di.debug_rnglists_sec or di.debug_ranges_sec).name) + return + + # In order to determine the base address of the range + # We need to know the corresponding CU. + cu_map = {die.attributes['DW_AT_ranges'].value : cu # Range list offset => CU + for cu in di.iter_CUs() + for die in cu.iter_DIEs() + if 'DW_AT_ranges' in die.attributes} + + addr_size = di.config.default_address_size # In bytes, 4 or 8 + addr_width = addr_size * 2 # In hex digits, 8 or 16 + line_template = " %%08x %%0%dx %%0%dx %%s" % (addr_width, addr_width) + base_template = " %%08x %%0%dx (base address)" % (addr_width) + + self._emitline('Contents of the %s section:\n' % (di.debug_rnglists_sec or di.debug_ranges_sec).name) + self._emitline(' Offset Begin End') + + for range_list in range_lists: + # Weird discrepancy in binutils: for DWARFv5 it outputs entry offset, + # for DWARF<=4 list offset. + first = range_list[0] + base_ip = _get_cu_base(cu_map[first.entry_offset]) + for entry in range_list: + if isinstance(entry, RangeEntry): + postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else '' + self._emitline(line_template % ( + entry.entry_offset if ver5 else first.entry_offset, + (0 if entry.is_absolute else base_ip) + entry.begin_offset, + (0 if entry.is_absolute else base_ip) + entry.end_offset, + postfix)) + elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry): + base_ip = entry.base_address + self._emitline(base_template % ( + entry.entry_offset if ver5 else first.entry_offset, + entry.base_address)) + else: + raise NotImplementedError("Unknown object in a range list") + last = range_list[-1] + self._emitline(' %08x ' % (last.entry_offset + last.entry_length if ver5 else first.entry_offset)) + def _display_arch_specific_arm(self): """ Display the ARM architecture-specific info contained in the file. """ @@ -1620,7 +1675,7 @@ def main(stream=None): action='store', dest='debug_dump_what', metavar='', help=( 'Display the contents of DWARF debug sections. can ' + - 'one of {info,decodedline,frames,frames-interp,aranges,pubtypes,pubnames,loc}')) + 'one of {info,decodedline,frames,frames-interp,aranges,pubtypes,pubnames,loc,Ranges}')) argparser.add_argument('--traceback', action='store_true', dest='show_traceback', help='Dump the Python traceback on ELFError' diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 3d92d9d..03fc5a9 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -66,7 +66,8 @@ def run_test_on_file(filename, verbose=False, opt=None): '--debug-dump=info', '--debug-dump=decodedline', '--debug-dump=frames', '--debug-dump=frames-interp', '--debug-dump=aranges', '--debug-dump=pubtypes', - '--debug-dump=pubnames', '--debug-dump=loc' + '--debug-dump=pubnames', '--debug-dump=loc', + '--debug-dump=Ranges' ] else: options = [opt] -- 2.30.2