From ac90d06f79bdf97ec7daf6d2a0b132cbd5619362 Mon Sep 17 00:00:00 2001 From: rvijayc <44033253+rvijayc@users.noreply.github.com> Date: Mon, 24 Dec 2018 06:02:08 -0800 Subject: [PATCH] Added support for decoding .debug_pubtypes and .debug_pubnames sections (#208) * Added support for decoding .debug_pubtypes and .debug_pubnames sections * Added reference output to dwarf_pubnames_types.py example. * Added readelf support, fixed review comments and documentation updates * Avoid printing the entire die in pubnames example to workaround Python2 vs 3 imcompatibilites --- elftools/dwarf/dwarfinfo.py | 39 +++- elftools/dwarf/namelut.py | 202 ++++++++++++++++++ elftools/dwarf/structs.py | 14 ++ elftools/elf/elffile.py | 12 +- examples/dwarf_pubnames_types.py | 114 ++++++++++ .../reference_output/dwarf_pubnames_types.out | 17 ++ scripts/readelf.py | 45 +++- test/run_readelf_tests.py | 4 +- 8 files changed, 440 insertions(+), 7 deletions(-) create mode 100755 elftools/dwarf/namelut.py create mode 100644 examples/dwarf_pubnames_types.py create mode 100644 examples/reference_output/dwarf_pubnames_types.out diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index b8faf9d..45903ec 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -19,6 +19,7 @@ from .callframe import CallFrameInfo from .locationlists import LocationLists from .ranges import RangeLists from .aranges import ARanges +from .namelut import NameLUT # Describes a debug section @@ -67,7 +68,9 @@ class DWARFInfo(object): debug_str_sec, debug_loc_sec, debug_ranges_sec, - debug_line_sec): + debug_line_sec, + debug_pubtypes_sec, + debug_pubnames_sec): """ config: A DwarfConfig object @@ -86,6 +89,8 @@ class DWARFInfo(object): self.debug_loc_sec = debug_loc_sec self.debug_ranges_sec = debug_ranges_sec self.debug_line_sec = debug_line_sec + self.debug_pubtypes_sec = debug_pubtypes_sec + self.debug_pubnames_sec = debug_pubnames_sec # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them @@ -185,6 +190,38 @@ class DWARFInfo(object): for_eh_frame=True) return cfi.get_entries() + def get_pubtypes(self): + """ + Returns a NameLUT object that contains information read from the + .debug_pubtypes section in the ELF file. + + NameLUT is essentially a dictionary containing the CU/DIE offsets of + each symbol. See the NameLUT doc string for more details. + """ + + if self.debug_pubtypes_sec: + return NameLUT(self.debug_pubtypes_sec.stream, + self.debug_pubtypes_sec.size, + self.structs) + else: + return None + + def get_pubnames(self): + """ + Returns a NameLUT object that contains information read from the + .debug_pubnames section in the ELF file. + + NameLUT is essentially a dictionary containing the CU/DIE offsets of + each symbol. See the NameLUT doc string for more details. + """ + + if self.debug_pubnames_sec: + return NameLUT(self.debug_pubnames_sec.stream, + self.debug_pubnames_sec.size, + self.structs) + else: + return None + def get_aranges(self): """ Get an ARanges object representing the .debug_aranges section of the DWARF data, or None if the section doesn't exist diff --git a/elftools/dwarf/namelut.py b/elftools/dwarf/namelut.py new file mode 100755 index 0000000..b7de798 --- /dev/null +++ b/elftools/dwarf/namelut.py @@ -0,0 +1,202 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/namelut.py +# +# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames) +# +# Vijay Ramasami (rvijayc@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import collections +from collections import OrderedDict +from ..common.utils import struct_parse +from bisect import bisect_right +import math +from ..construct import CString, Struct + +NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs') + +class NameLUT(collections.Mapping): + """ + A "Name LUT" holds any of the tables specified by .debug_pubtypes or + .debug_pubnames sections. This is basically a dictionary where the key is + the symbol name (either a public variable, function or a type), and the + value is the tuple (cu_offset, die_offset) corresponding to the variable. + The die_offset is an absolute offset (meaning, it can be used to search the + CU by iterating until a match is obtained). + + An ordered dictionary is used to preserve the CU order (i.e, items are + stored on a per-CU basis (as it was originally in the .debug_* section). + + Usage: + + The NameLUT walks and talks like a dictionary and hence it can be used as + such. Some examples below: + + # get the pubnames (a NameLUT from DWARF info). + pubnames = dwarf_info.get_pubnames() + + # lookup a variable. + entry1 = pubnames["var_name1"] + entry2 = pubnames.get("var_name2", default=) + print(entry2.cu_ofs) + ... + + # iterate over items. + for (name, entry) in pubnames.items(): + # do stuff with name, entry.cu_ofs, entry.die_ofs + + # iterate over items on a per-CU basis. + import itertools + for cu_ofs, item_list in itertools.groupby(pubnames.items(), + key = lambda x: x[1].cu_ofs): + # items are now grouped by cu_ofs. + # item_list is an iterator yeilding NameLUTEntry'ies belonging + # to cu_ofs. + # We can parse the CU at cu_offset and use the parsed CU results + # to parse the pubname DIEs in the CU listed by item_list. + for item in item_list: + # work with item which is part of the CU with cu_ofs. + + """ + + def __init__(self, stream, size, structs): + + self._stream = stream + self._size = size + self._structs = structs + # entries are lazily loaded on demand. + self._entries = None + # CU headers (for readelf). + self._cu_headers = None + + def get_entries(self): + """ + Returns the parsed NameLUT entries. The returned object is a dictionary + with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as + the value. + + This is useful when dealing with very large ELF files with millions of + entries. The returned entries can be pickled to a file and restored by + calling set_entries on subsequent loads. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries + + def set_entries(self, entries, cu_headers): + """ + Set the NameLUT entries from an external source. The input is a + dictionary with the symbol name as the key and NameLUTEntry(cu_ofs, + die_ofs) as the value. + + This option is useful when dealing with very large ELF files with + millions of entries. The entries can be parsed once and pickled to a + file and can be restored via this function on subsequent loads. + """ + self._entries = entries + self._cu_headers = cu_headers + + def __len__(self): + """ + Returns the number of entries in the NameLUT. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return len(self._entries) + + def __getitem__(self, name): + """ + Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds + to the given symbol name. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries.get(name) + + def __iter__(self): + """ + Returns an iterator to the NameLUT dictionary. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return iter(self._entries) + + def items(self): + """ + Returns the NameLUT dictionary items. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries.items() + + def get(self, name, default=None): + """ + Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or + None if the symbol does not exist in the corresponding section. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries.get(name, default) + + def get_cu_headers(self): + """ + Returns all CU headers. Mainly required for readelf. + """ + if self._cu_headers is None: + self._entries, self._cu_headers = self._get_entries() + + return self._cu_headers + + def _get_entries(self): + """ + Parse the (name, cu_ofs, die_ofs) information from this section and + store as a dictionary. + """ + + self._stream.seek(0) + entries = OrderedDict() + cu_headers = [] + offset = 0 + entry_struct = Struct("Dwarf_offset_name_pair", + self._structs.Dwarf_offset('die_ofs'), + CString('name')) + die_ofs_struct = self._structs.Dwarf_offset('die_ofs') + + # each run of this loop will fetch one CU worth of entries. + while offset < self._size: + + # read the header for this CU. + namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header, + self._stream, offset) + cu_headers.append(namelut_hdr) + # compute the next offset. + offset = (offset + namelut_hdr.unit_length + + self._structs.initial_length_field_size()) + + bytes_read = 0 + # before inner loop, latch data that will be used in the inner + # loop to avoid attribute access and other computation. + hdr_cu_ofs = namelut_hdr.debug_info_offset + # read the first tuple for this CU. + entry = struct_parse(entry_struct, + self._stream) + # while die_ofs of the entry is non-zero (which indicates the end) ... + while True: + # add this entry to the look-up dictionary. + entries[entry.name.decode('utf-8')] = NameLUTEntry( + cu_ofs = hdr_cu_ofs, + die_ofs = hdr_cu_ofs + entry.die_ofs) + # get the DIE offset entry alone. + die_ofs = struct_parse(die_ofs_struct, self._stream) + # if it is zero, then we done. + if die_ofs == 0: + break + else: + # else this is a valid DIE, get the name as well and + # construct the entry + entry.name = struct_parse(CString('name'), self._stream) + entry.die_ofs = die_ofs + + # return the entries parsed so far. + return (entries, cu_headers) diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index f3b6ef3..6dde82b 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -34,6 +34,9 @@ class DWARFStructs(object): Dwarf_offset: 32-bit or 64-bit word, depending on dwarf_format + Dwarf_length: + 32-bit or 64-bit word, depending on dwarf_format + Dwarf_target_addr: 32-bit or 64-bit word, depending on address size @@ -105,6 +108,7 @@ class DWARFStructs(object): self.Dwarf_uint32 = ULInt32 self.Dwarf_uint64 = ULInt64 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64 + self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64 self.Dwarf_target_addr = ( ULInt32 if self.address_size == 4 else ULInt64) self.Dwarf_int8 = SLInt8 @@ -117,6 +121,7 @@ class DWARFStructs(object): self.Dwarf_uint32 = UBInt32 self.Dwarf_uint64 = UBInt64 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64 + self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64 self.Dwarf_target_addr = ( UBInt32 if self.address_size == 4 else UBInt64) self.Dwarf_int8 = SBInt8 @@ -132,6 +137,7 @@ class DWARFStructs(object): self._create_lineprog_header() self._create_callframe_entry_headers() self._create_aranges_header() + self._create_nameLUT_header() def _create_initial_length(self): def _InitialLength(name): @@ -218,6 +224,14 @@ class DWARFStructs(object): self.Dwarf_uint8('segment_size') ) + def _create_nameLUT_header(self): + self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_offset('debug_info_offset'), + self.Dwarf_length('debug_info_length') + ) + def _create_lineprog_header(self): # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 6c02948..537a647 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -167,7 +167,8 @@ class ELFFile(object): section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str', '.debug_line', '.debug_frame', - '.debug_loc', '.debug_ranges') + '.debug_loc', '.debug_ranges', '.debug_pubtypes', + '.debug_pubnames') compressed = bool(self.get_section_by_name('.zdebug_info')) if compressed: @@ -178,8 +179,8 @@ class ELFFile(object): (debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name, debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name, - debug_loc_sec_name, debug_ranges_sec_name, - eh_frame_sec_name) = section_names + debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name, + debug_pubnames_name, eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -207,7 +208,10 @@ class ELFFile(object): debug_str_sec=debug_sections[debug_str_sec_name], debug_loc_sec=debug_sections[debug_loc_sec_name], debug_ranges_sec=debug_sections[debug_ranges_sec_name], - debug_line_sec=debug_sections[debug_line_sec_name]) + debug_line_sec=debug_sections[debug_line_sec_name], + debug_pubtypes_sec = debug_sections[debug_pubtypes_name], + debug_pubnames_sec = debug_sections[debug_pubnames_name] + ) def get_machine_arch(self): """ Return the machine architecture, as detected from the ELF header. diff --git a/examples/dwarf_pubnames_types.py b/examples/dwarf_pubnames_types.py new file mode 100644 index 0000000..9a32ce1 --- /dev/null +++ b/examples/dwarf_pubnames_types.py @@ -0,0 +1,114 @@ +#------------------------------------------------------------------------------- +# elftools example: dwarf_pubnames_types.py +# +# Dump the contents of .debug_pubnames and .debug_pubtypes sections from the +# ELF file. +# +# Note: sample_exe64.elf doesn't have a .debug_pubtypes section. +# +# Vijay Ramasami (rvijayc@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from __future__ import print_function +import sys + +# If pyelftools is not installed, the example can also run from the root or +# examples/ dir of the source distribution. +sys.path[0:0] = ['.', '..'] + +from elftools.elf.elffile import ELFFile +from elftools.common.py3compat import bytes2str + +def process_file(filename): + print('Processing file:', filename) + with open(filename, 'rb') as f: + elffile = ELFFile(f) + + if not elffile.has_dwarf_info(): + print(' file has no DWARF info') + return + + # get_dwarf_info returns a DWARFInfo context object, which is the + # starting point for all DWARF-based processing in pyelftools. + dwarfinfo = elffile.get_dwarf_info() + + # get .debug_pubtypes section. + pubnames = dwarfinfo.get_pubnames() + if pubnames is None: + print('ERROR: No .debug_pubnames section found in ELF.') + else: + print('%d entries found in .debug_pubnames' % len(pubnames)) + + # try getting information on a global symbol. + print('Trying pubnames example ...') + sym_name = 'main' + try: + entry = pubnames[sym_name] + except KeyError: + print('ERROR: No pubname entry found for ' + sym_name) + else: + print('%s: cu_ofs = %d, die_ofs = %d' % + (sym_name, entry.cu_ofs, entry.die_ofs)) + + # get the actual CU/DIE that has this information. + print('Fetching the actual die for %s ...' % sym_name) + for cu in dwarfinfo.iter_CUs(): + if cu.cu_offset == entry.cu_ofs: + for die in cu.iter_DIEs(): + if die.offset == entry.die_ofs: + print('Die Name: %s' % + bytes2str(die.attributes['DW_AT_name'].value)) + + # dump all entries in .debug_pubnames section. + print('Dumping .debug_pubnames table ...') + print('-' * 66) + print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS')) + print('-' * 66) + for (name, entry) in pubnames.items(): + print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs)) + print('-' * 66) + + # get .debug_pubtypes section. + pubtypes = dwarfinfo.get_pubtypes() + if pubtypes is None: + print('ERROR: No .debug_pubtypes section found in ELF') + else: + print('%d entries found in .debug_pubtypes' % len(pubtypes)) + + # try getting information on a global type. + sym_name = 'char' + # note: using the .get() API (pubtypes[key] will also work). + entry = pubtypes.get(sym_name) + if entry is None: + print('ERROR: No pubtype entry for %s' % sym_name) + else: + print('%s: cu_ofs %d, die_ofs %d' % + (sym_name, entry.cu_ofs, entry.die_ofs)) + + # get the actual CU/DIE that has this information. + print('Fetching the actual die for %s ...' % sym_name) + for cu in dwarfinfo.iter_CUs(): + if cu.cu_offset == entry.cu_ofs: + for die in cu.iter_DIEs(): + if die.offset == entry.die_ofs: + print('Die Name: %s' % + bytes2str(die.attributes['DW_AT_name'].value)) + + # dump all entries in .debug_pubtypes section. + print('Dumping .debug_pubtypes table ...') + print('-' * 66) + print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS')) + print('-' * 66) + for (name, entry) in pubtypes.items(): + print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs)) + print('-' * 66) + +if __name__ == '__main__': + if sys.argv[1] == '--test': + process_file(sys.argv[2]) + sys.exit(0) + + if len(sys.argv) < 2: + print('Expected usage: {0} '.format(sys.argv[0])) + sys.exit(1) + process_file(sys.argv[1]) diff --git a/examples/reference_output/dwarf_pubnames_types.out b/examples/reference_output/dwarf_pubnames_types.out new file mode 100644 index 0000000..3ed3d26 --- /dev/null +++ b/examples/reference_output/dwarf_pubnames_types.out @@ -0,0 +1,17 @@ +Processing file: ./examples/sample_exe64.elf +5 entries found in .debug_pubnames +Trying pubnames example ... +main: cu_ofs = 258, die_ofs = 303 +Fetching the actual die for main ... +Die Name: main +Dumping .debug_pubnames table ... +------------------------------------------------------------------ + Symbol CU_OFS DIE_OFS +------------------------------------------------------------------ + _IO_stdin_used 119 230 + main 258 303 + glob 258 395 + __libc_csu_fini 418 495 + __libc_csu_init 418 523 +------------------------------------------------------------------ +ERROR: No .debug_pubtypes section found in ELF diff --git a/scripts/readelf.py b/scripts/readelf.py index 087218a..dfcd98d 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -10,6 +10,13 @@ import argparse import os, sys import string +import itertools +# Note: zip has different behaviour between Python 2.x and 3.x. +# - Using izip ensures compatibility. +try: + from itertools import izip +except: + izip = zip # For running from development directory. It should take precedence over the # installed pyelftools. @@ -457,7 +464,7 @@ class ReadElf(object): for note in section.iter_notes(): self._emitline("\nDisplaying notes found in: {}".format( section.name)) - self._emitline(' Owner Data size Description') + self._emitline(' Owner Data size Description') self._emitline(' %s %s\t%s' % ( note['n_name'].ljust(20), self._format_hex(note['n_descsz'], fieldsize=8), @@ -753,6 +760,8 @@ class ReadElf(object): self._dump_debug_frames_interp() elif dump_what == 'aranges': self._dump_debug_aranges() + elif dump_what in { 'pubtypes', 'pubnames' }: + self._dump_debug_namelut(dump_what) else: self._emitline('debug dump not yet supported for "%s"' % dump_what) @@ -1106,6 +1115,40 @@ class ReadElf(object): self._dwarfinfo.debug_frame_sec, self._dwarfinfo.CFI_entries()) + def _dump_debug_namelut(self, what): + """ + Dump the debug pubnames section. + """ + if what == 'pubnames': + namelut = self._dwarfinfo.get_pubnames() + section = self._dwarfinfo.debug_pubnames_sec + else: + namelut = self._dwarfinfo.get_pubtypes() + section = self._dwarfinfo.debug_pubtypes_sec + + # readelf prints nothing if the section is not present. + if namelut is None or len(namelut) == 0: + return + + self._emitline('Contents of the %s section:' % section.name) + self._emitline() + + cu_headers = namelut.get_cu_headers() + + # go over CU-by-CU first and item-by-item next. + for (cu_hdr, (cu_ofs, items)) in izip(cu_headers, itertools.groupby( + namelut.items(), key = lambda x: x[1].cu_ofs)): + + self._emitline(' Length: %d' % cu_hdr.unit_length) + self._emitline(' Version: %d' % cu_hdr.version) + self._emitline(' Offset into .debug_info section: 0x%x' % cu_hdr.debug_info_offset) + self._emitline(' Size of area in .debug_info section: %d' % cu_hdr.debug_info_length) + self._emitline() + self._emitline(' Offset Name') + for item in items: + self._emitline(' %x %s' % (item[1].die_ofs - cu_ofs, item[0])) + self._emitline() + def _dump_debug_aranges(self): """ Dump the aranges table """ diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 7f4631b..76ecd72 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -58,7 +58,9 @@ def run_test_on_file(filename, verbose=False): '-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V', '--debug-dump=info', '--debug-dump=decodedline', '--debug-dump=frames', '--debug-dump=frames-interp', - '--debug-dump=aranges']: + '--debug-dump=aranges', '--debug-dump=pubtypes', + '--debug-dump=pubnames' + ]: if verbose: testlog.info("..option='%s'" % option) # TODO(zlobober): this is a dirty hack to make tests work for ELF core -- 2.30.2