From f9f793d6cce9dc3a80bb138dcf5b789e0916d121 Mon Sep 17 00:00:00 2001 From: Dorothy Chen Date: Fri, 1 Jul 2016 16:44:36 -0400 Subject: [PATCH] parsed .debug_aranges section, simple interface mapping address ranges to CU offsets fixed bug in finding correct arange entry, given an addr cosmetic changes per pull request thread #108, and updated relevant tests minor arange test changes filled in comments update test formatting for aranges update test formatting for aranges Fix off-by-one bug For real this time. If the query addr == beginning address, bisect_left won't work. --- elftools/dwarf/aranges.py | 110 ++++++++++++++++++++++++++++++++++++ elftools/dwarf/dwarfinfo.py | 14 +++++ elftools/dwarf/structs.py | 10 ++++ elftools/elf/elffile.py | 5 +- scripts/readelf.py | 40 +++++++++++++ test/run_readelf_tests.py | 14 +++-- 6 files changed, 186 insertions(+), 7 deletions(-) create mode 100644 elftools/dwarf/aranges.py diff --git a/elftools/dwarf/aranges.py b/elftools/dwarf/aranges.py new file mode 100644 index 0000000..32c287d --- /dev/null +++ b/elftools/dwarf/aranges.py @@ -0,0 +1,110 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/aranges.py +# +# DWARF aranges section decoding (.debug_aranges) +# +# Dorothy Chen (dorothchen@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +from collections import namedtuple +from ..common.utils import struct_parse +from bisect import bisect_right +import math + +# An entry in the aranges table; +# begin_addr: The beginning address in the CU +# length: The length of the address range in this entry +# info_offset: The CU's offset into .debug_info +# see 6.1.2 in DWARF4 docs for explanation of the remaining fields +ARangeEntry = namedtuple('ARangeEntry', + 'begin_addr length info_offset unit_length version address_size segment_size') + +class ARanges(object): + """ ARanges table in DWARF + + stream, size: + A stream holding the .debug_aranges section, and its size + + structs: + A DWARFStructs instance for parsing the data + """ + def __init__(self, stream, size, structs): + self.stream = stream + self.size = size + self.structs = structs + + # Get entries of aranges table in the form of ARangeEntry tuples + self.entries = self._get_entries() + + # Sort entries by the beginning address + self.entries.sort(key=lambda entry: entry.begin_addr) + + # Create list of keys (first addresses) for better searching + self.keys = [entry.begin_addr for entry in self.entries] + + + def cu_offset_at_addr(self, addr): + """ Given an address, get the offset of the CU it belongs to, where + 'offset' refers to the offset in the .debug_info section. + """ + tup = self.entries[bisect_right(self.keys, addr) - 1] + return tup.info_offset + + + #------ PRIVATE ------# + def _get_entries(self): + """ Populate self.entries with ARangeEntry tuples for each range of addresses + """ + self.stream.seek(0) + entries = [] + offset = 0 + + # one loop == one "set" == one CU + while offset < self.size : + aranges_header = struct_parse(self.structs.Dwarf_aranges_header, + self.stream, offset) + addr_size = self._get_addr_size_struct(aranges_header["address_size"]) + + # No segmentation + if aranges_header["segment_size"] == 0: + # pad to nearest multiple of tuple size + tuple_size = aranges_header["address_size"] * 2 + fp = self.stream.tell() + seek_to = int(math.ceil(fp/float(tuple_size)) * tuple_size) + self.stream.seek(seek_to) + + # entries in this set/CU + addr = struct_parse(addr_size('addr'), self.stream) + length = struct_parse(addr_size('length'), self.stream) + while addr != 0 or length != 0: + # 'begin_addr length info_offset version address_size segment_size' + entries.append( + ARangeEntry(begin_addr=addr, + length=length, + info_offset=aranges_header["debug_info_offset"], + unit_length=aranges_header["unit_length"], + version=aranges_header["version"], + address_size=aranges_header["address_size"], + segment_size=aranges_header["segment_size"])) + addr = struct_parse(addr_size('addr'), self.stream) + length = struct_parse(addr_size('length'), self.stream) + # Segmentation exists in executable + elif aranges_header["segment_size"] != 0: + raise NotImplementedError("Segmentation not implemented") + + offset = (offset + + aranges_header.unit_length + + self.structs.initial_length_field_size()) + + return entries + + def _get_addr_size_struct(self, addr_header_value): + """ Given this set's header value (int) for the address size, + get the Construct representation of that size + """ + if addr_header_value == 4: + return self.structs.Dwarf_uint32 + else: + assert addr_header_value == 8 + return self.structs.Dwarf_uint64 diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 5a5c41a..330a238 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -18,6 +18,7 @@ from .lineprogram import LineProgram from .callframe import CallFrameInfo from .locationlists import LocationLists from .ranges import RangeLists +from .aranges import ARanges # Describes a debug section @@ -57,6 +58,7 @@ class DWARFInfo(object): def __init__(self, config, debug_info_sec, + debug_aranges_sec, debug_abbrev_sec, debug_frame_sec, eh_frame_sec, @@ -74,6 +76,7 @@ class DWARFInfo(object): """ self.config = config self.debug_info_sec = debug_info_sec + self.debug_aranges_sec = debug_aranges_sec self.debug_abbrev_sec = debug_abbrev_sec self.debug_frame_sec = debug_frame_sec self.eh_frame_sec = eh_frame_sec @@ -168,6 +171,17 @@ class DWARFInfo(object): base_structs=self.structs) return cfi.get_entries() + def get_aranges(self): + """ Get an ARanges object representing the .debug_aranges section of + the DWARF data, or None if the section doesn't exist + """ + if self.debug_aranges_sec: + return ARanges(self.debug_aranges_sec.stream, + self.debug_aranges_sec.size, + self.structs) + else: + return None + def location_lists(self): """ Get a LocationLists object representing the .debug_loc section of the DWARF data, or None if this section doesn't exist. diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index f90a80a..e25dc84 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -132,6 +132,7 @@ class DWARFStructs(object): self._create_dw_form() self._create_lineprog_header() self._create_callframe_entry_headers() + self._create_aranges_header() def _create_initial_length(self): def _InitialLength(name): @@ -210,6 +211,15 @@ class DWARFStructs(object): DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''), ) + def _create_aranges_header(self): + self.Dwarf_aranges_header = Struct("Dwarf_aranges_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_offset('debug_info_offset'), # a little tbd + self.Dwarf_uint8('address_size'), + self.Dwarf_uint8('segment_size') + ) + def _create_lineprog_header(self): # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 2bc2651..d198749 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -151,7 +151,7 @@ class ELFFile(object): # Sections that aren't found will be passed as None to DWARFInfo. # - section_names = ('.debug_info', '.debug_abbrev', '.debug_str', + section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str', '.debug_line', '.debug_frame', '.debug_loc', '.debug_ranges') @@ -159,7 +159,7 @@ class ELFFile(object): if compressed: section_names = tuple(map(lambda x: '.z' + x[1:], section_names)) - debug_info_sec_name, debug_abbrev_sec_name, debug_str_sec_name, \ + debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name, debug_str_sec_name, \ debug_line_sec_name, debug_frame_sec_name, debug_loc_sec_name, \ debug_ranges_sec_name = section_names @@ -182,6 +182,7 @@ class ELFFile(object): default_address_size=self.elfclass // 8, machine_arch=self.get_machine_arch()), debug_info_sec=debug_sections[debug_info_sec_name], + debug_aranges_sec=debug_sections[debug_aranges_sec_name], debug_abbrev_sec=debug_sections[debug_abbrev_sec_name], debug_frame_sec=debug_sections[debug_frame_sec_name], # TODO(eliben): reading of eh_frame is not hooked up yet diff --git a/scripts/readelf.py b/scripts/readelf.py index 909faff..60c9c3e 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -662,6 +662,8 @@ class ReadElf(object): self._dump_debug_frames() elif dump_what == 'frames-interp': self._dump_debug_frames_interp() + elif dump_what == 'aranges': + self._dump_debug_aranges() else: self._emitline('debug dump not yet supported for "%s"' % dump_what) @@ -979,6 +981,44 @@ class ReadElf(object): self._emit(describe_CFI_instructions(entry)) self._emitline() + def _dump_debug_aranges(self): + """ Dump the aranges table + """ + aranges_table = self._dwarfinfo.get_aranges() + if aranges_table == None: + return + # seems redundent, but we need to get the unsorted set of entries to match system readelf + unordered_entries = aranges_table._get_entries() + + if len(unordered_entries) == 0: + self._emitline() + self._emitline("Section '.debug_aranges' has no debugging data.") + return + + self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_aranges_sec.name) + self._emitline() + prev_offset = None + for entry in unordered_entries: + if prev_offset != entry.info_offset: + if entry != unordered_entries[0]: + self._emitline(' %s %s' % ( + self._format_hex(0, fullhex=True, lead0x=False), + self._format_hex(0, fullhex=True, lead0x=False))) + self._emitline(' Length: %d' % (entry.unit_length)) + self._emitline(' Version: %d' % (entry.version)) + self._emitline(' Offset into .debug_info: 0x%x' % (entry.info_offset)) + self._emitline(' Pointer Size: %d' % (entry.address_size)) + self._emitline(' Segment Size: %d' % (entry.segment_size)) + self._emitline() + self._emitline(' Address Length') + self._emitline(' %s %s' % ( + self._format_hex(entry.begin_addr, fullhex=True, lead0x=False), + self._format_hex(entry.length, fullhex=True, lead0x=False))) + prev_offset = entry.info_offset + self._emitline(' %s %s' % ( + self._format_hex(0, fullhex=True, lead0x=False), + self._format_hex(0, fullhex=True, lead0x=False))) + def _dump_debug_frames_interp(self): """ Dump the interpreted (decoded) frame information from .debug_frame """ diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 00d9168..f8990e1 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -7,7 +7,7 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -import os, sys +import os, sys, platform import re from difflib import SequenceMatcher from optparse import OptionParser @@ -26,9 +26,12 @@ testlog.addHandler(logging.StreamHandler(sys.stdout)) # Set the path for calling readelf. We carry our own version of readelf around, # because binutils tend to change its output even between daily builds of the # same minor release and keeping track is a headache. -READELF_PATH = 'test/external_tools/readelf' -if not os.path.exists(READELF_PATH): - READELF_PATH = 'readelf' +if platform.system() == "Darwin": # MacOS + READELF_PATH = 'greadelf' +else: + READELF_PATH = 'test/external_tools/readelf' + if not os.path.exists(READELF_PATH): + READELF_PATH = 'readelf' def discover_testfiles(rootdir): """ Discover test files in the given directory. Yield them one by one. @@ -48,7 +51,8 @@ def run_test_on_file(filename, verbose=False): for option in [ '-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V', '--debug-dump=info', '--debug-dump=decodedline', - '--debug-dump=frames', '--debug-dump=frames-interp']: + '--debug-dump=frames', '--debug-dump=frames-interp', + '--debug-dump=aranges']: if verbose: testlog.info("..option='%s'" % option) # stdouts will be a 2-element list: output of readelf and output # of scripts/readelf.py -- 2.30.2