from .locationlists import LocationLists
from .ranges import RangeLists
from .aranges import ARanges
+from .namelut import NameLUT
# Describes a debug section
debug_str_sec,
debug_loc_sec,
debug_ranges_sec,
- debug_line_sec):
+ debug_line_sec,
+ debug_pubtypes_sec,
+ debug_pubnames_sec):
""" config:
A DwarfConfig object
self.debug_loc_sec = debug_loc_sec
self.debug_ranges_sec = debug_ranges_sec
self.debug_line_sec = debug_line_sec
+ self.debug_pubtypes_sec = debug_pubtypes_sec
+ self.debug_pubnames_sec = debug_pubnames_sec
# This is the DWARFStructs the context uses, so it doesn't depend on
# DWARF format and address_size (these are determined per CU) - set them
for_eh_frame=True)
return cfi.get_entries()
+ def get_pubtypes(self):
+ """
+ Returns a NameLUT object that contains information read from the
+ .debug_pubtypes section in the ELF file.
+
+ NameLUT is essentially a dictionary containing the CU/DIE offsets of
+ each symbol. See the NameLUT doc string for more details.
+ """
+
+ if self.debug_pubtypes_sec:
+ return NameLUT(self.debug_pubtypes_sec.stream,
+ self.debug_pubtypes_sec.size,
+ self.structs)
+ else:
+ return None
+
+ def get_pubnames(self):
+ """
+ Returns a NameLUT object that contains information read from the
+ .debug_pubnames section in the ELF file.
+
+ NameLUT is essentially a dictionary containing the CU/DIE offsets of
+ each symbol. See the NameLUT doc string for more details.
+ """
+
+ if self.debug_pubnames_sec:
+ return NameLUT(self.debug_pubnames_sec.stream,
+ self.debug_pubnames_sec.size,
+ self.structs)
+ else:
+ return None
+
def get_aranges(self):
""" Get an ARanges object representing the .debug_aranges section of
the DWARF data, or None if the section doesn't exist
--- /dev/null
+#-------------------------------------------------------------------------------
+# elftools: dwarf/namelut.py
+#
+# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames)
+#
+# Vijay Ramasami (rvijayc@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import os
+import collections
+from collections import OrderedDict
+from ..common.utils import struct_parse
+from bisect import bisect_right
+import math
+from ..construct import CString, Struct
+
+NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs')
+
+class NameLUT(collections.Mapping):
+ """
+ A "Name LUT" holds any of the tables specified by .debug_pubtypes or
+ .debug_pubnames sections. This is basically a dictionary where the key is
+ the symbol name (either a public variable, function or a type), and the
+ value is the tuple (cu_offset, die_offset) corresponding to the variable.
+ The die_offset is an absolute offset (meaning, it can be used to search the
+ CU by iterating until a match is obtained).
+
+ An ordered dictionary is used to preserve the CU order (i.e, items are
+ stored on a per-CU basis (as it was originally in the .debug_* section).
+
+ Usage:
+
+ The NameLUT walks and talks like a dictionary and hence it can be used as
+ such. Some examples below:
+
+ # get the pubnames (a NameLUT from DWARF info).
+ pubnames = dwarf_info.get_pubnames()
+
+ # lookup a variable.
+ entry1 = pubnames["var_name1"]
+ entry2 = pubnames.get("var_name2", default=<default_var>)
+ print(entry2.cu_ofs)
+ ...
+
+ # iterate over items.
+ for (name, entry) in pubnames.items():
+ # do stuff with name, entry.cu_ofs, entry.die_ofs
+
+ # iterate over items on a per-CU basis.
+ import itertools
+ for cu_ofs, item_list in itertools.groupby(pubnames.items(),
+ key = lambda x: x[1].cu_ofs):
+ # items are now grouped by cu_ofs.
+ # item_list is an iterator yeilding NameLUTEntry'ies belonging
+ # to cu_ofs.
+ # We can parse the CU at cu_offset and use the parsed CU results
+ # to parse the pubname DIEs in the CU listed by item_list.
+ for item in item_list:
+ # work with item which is part of the CU with cu_ofs.
+
+ """
+
+ def __init__(self, stream, size, structs):
+
+ self._stream = stream
+ self._size = size
+ self._structs = structs
+ # entries are lazily loaded on demand.
+ self._entries = None
+ # CU headers (for readelf).
+ self._cu_headers = None
+
+ def get_entries(self):
+ """
+ Returns the parsed NameLUT entries. The returned object is a dictionary
+ with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as
+ the value.
+
+ This is useful when dealing with very large ELF files with millions of
+ entries. The returned entries can be pickled to a file and restored by
+ calling set_entries on subsequent loads.
+ """
+ if self._entries is None:
+ self._entries, self._cu_headers = self._get_entries()
+ return self._entries
+
+ def set_entries(self, entries, cu_headers):
+ """
+ Set the NameLUT entries from an external source. The input is a
+ dictionary with the symbol name as the key and NameLUTEntry(cu_ofs,
+ die_ofs) as the value.
+
+ This option is useful when dealing with very large ELF files with
+ millions of entries. The entries can be parsed once and pickled to a
+ file and can be restored via this function on subsequent loads.
+ """
+ self._entries = entries
+ self._cu_headers = cu_headers
+
+ def __len__(self):
+ """
+ Returns the number of entries in the NameLUT.
+ """
+ if self._entries is None:
+ self._entries, self._cu_headers = self._get_entries()
+ return len(self._entries)
+
+ def __getitem__(self, name):
+ """
+ Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds
+ to the given symbol name.
+ """
+ if self._entries is None:
+ self._entries, self._cu_headers = self._get_entries()
+ return self._entries.get(name)
+
+ def __iter__(self):
+ """
+ Returns an iterator to the NameLUT dictionary.
+ """
+ if self._entries is None:
+ self._entries, self._cu_headers = self._get_entries()
+ return iter(self._entries)
+
+ def items(self):
+ """
+ Returns the NameLUT dictionary items.
+ """
+ if self._entries is None:
+ self._entries, self._cu_headers = self._get_entries()
+ return self._entries.items()
+
+ def get(self, name, default=None):
+ """
+ Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or
+ None if the symbol does not exist in the corresponding section.
+ """
+ if self._entries is None:
+ self._entries, self._cu_headers = self._get_entries()
+ return self._entries.get(name, default)
+
+ def get_cu_headers(self):
+ """
+ Returns all CU headers. Mainly required for readelf.
+ """
+ if self._cu_headers is None:
+ self._entries, self._cu_headers = self._get_entries()
+
+ return self._cu_headers
+
+ def _get_entries(self):
+ """
+ Parse the (name, cu_ofs, die_ofs) information from this section and
+ store as a dictionary.
+ """
+
+ self._stream.seek(0)
+ entries = OrderedDict()
+ cu_headers = []
+ offset = 0
+ entry_struct = Struct("Dwarf_offset_name_pair",
+ self._structs.Dwarf_offset('die_ofs'),
+ CString('name'))
+ die_ofs_struct = self._structs.Dwarf_offset('die_ofs')
+
+ # each run of this loop will fetch one CU worth of entries.
+ while offset < self._size:
+
+ # read the header for this CU.
+ namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header,
+ self._stream, offset)
+ cu_headers.append(namelut_hdr)
+ # compute the next offset.
+ offset = (offset + namelut_hdr.unit_length +
+ self._structs.initial_length_field_size())
+
+ bytes_read = 0
+ # before inner loop, latch data that will be used in the inner
+ # loop to avoid attribute access and other computation.
+ hdr_cu_ofs = namelut_hdr.debug_info_offset
+ # read the first tuple for this CU.
+ entry = struct_parse(entry_struct,
+ self._stream)
+ # while die_ofs of the entry is non-zero (which indicates the end) ...
+ while True:
+ # add this entry to the look-up dictionary.
+ entries[entry.name.decode('utf-8')] = NameLUTEntry(
+ cu_ofs = hdr_cu_ofs,
+ die_ofs = hdr_cu_ofs + entry.die_ofs)
+ # get the DIE offset entry alone.
+ die_ofs = struct_parse(die_ofs_struct, self._stream)
+ # if it is zero, then we done.
+ if die_ofs == 0:
+ break
+ else:
+ # else this is a valid DIE, get the name as well and
+ # construct the entry
+ entry.name = struct_parse(CString('name'), self._stream)
+ entry.die_ofs = die_ofs
+
+ # return the entries parsed so far.
+ return (entries, cu_headers)
Dwarf_offset:
32-bit or 64-bit word, depending on dwarf_format
+ Dwarf_length:
+ 32-bit or 64-bit word, depending on dwarf_format
+
Dwarf_target_addr:
32-bit or 64-bit word, depending on address size
self.Dwarf_uint32 = ULInt32
self.Dwarf_uint64 = ULInt64
self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
+ self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
self.Dwarf_target_addr = (
ULInt32 if self.address_size == 4 else ULInt64)
self.Dwarf_int8 = SLInt8
self.Dwarf_uint32 = UBInt32
self.Dwarf_uint64 = UBInt64
self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
+ self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
self.Dwarf_target_addr = (
UBInt32 if self.address_size == 4 else UBInt64)
self.Dwarf_int8 = SBInt8
self._create_lineprog_header()
self._create_callframe_entry_headers()
self._create_aranges_header()
+ self._create_nameLUT_header()
def _create_initial_length(self):
def _InitialLength(name):
self.Dwarf_uint8('segment_size')
)
+ def _create_nameLUT_header(self):
+ self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
+ self.Dwarf_initial_length('unit_length'),
+ self.Dwarf_uint16('version'),
+ self.Dwarf_offset('debug_info_offset'),
+ self.Dwarf_length('debug_info_length')
+ )
+
def _create_lineprog_header(self):
# A file entry is terminated by a NULL byte, so we don't want to parse
# past it. Therefore an If is used.
section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev',
'.debug_str', '.debug_line', '.debug_frame',
- '.debug_loc', '.debug_ranges')
+ '.debug_loc', '.debug_ranges', '.debug_pubtypes',
+ '.debug_pubnames')
compressed = bool(self.get_section_by_name('.zdebug_info'))
if compressed:
(debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name,
debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name,
- debug_loc_sec_name, debug_ranges_sec_name,
- eh_frame_sec_name) = section_names
+ debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name,
+ debug_pubnames_name, eh_frame_sec_name) = section_names
debug_sections = {}
for secname in section_names:
debug_str_sec=debug_sections[debug_str_sec_name],
debug_loc_sec=debug_sections[debug_loc_sec_name],
debug_ranges_sec=debug_sections[debug_ranges_sec_name],
- debug_line_sec=debug_sections[debug_line_sec_name])
+ debug_line_sec=debug_sections[debug_line_sec_name],
+ debug_pubtypes_sec = debug_sections[debug_pubtypes_name],
+ debug_pubnames_sec = debug_sections[debug_pubnames_name]
+ )
def get_machine_arch(self):
""" Return the machine architecture, as detected from the ELF header.
--- /dev/null
+#-------------------------------------------------------------------------------
+# elftools example: dwarf_pubnames_types.py
+#
+# Dump the contents of .debug_pubnames and .debug_pubtypes sections from the
+# ELF file.
+#
+# Note: sample_exe64.elf doesn't have a .debug_pubtypes section.
+#
+# Vijay Ramasami (rvijayc@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+from __future__ import print_function
+import sys
+
+# If pyelftools is not installed, the example can also run from the root or
+# examples/ dir of the source distribution.
+sys.path[0:0] = ['.', '..']
+
+from elftools.elf.elffile import ELFFile
+from elftools.common.py3compat import bytes2str
+
+def process_file(filename):
+ print('Processing file:', filename)
+ with open(filename, 'rb') as f:
+ elffile = ELFFile(f)
+
+ if not elffile.has_dwarf_info():
+ print(' file has no DWARF info')
+ return
+
+ # get_dwarf_info returns a DWARFInfo context object, which is the
+ # starting point for all DWARF-based processing in pyelftools.
+ dwarfinfo = elffile.get_dwarf_info()
+
+ # get .debug_pubtypes section.
+ pubnames = dwarfinfo.get_pubnames()
+ if pubnames is None:
+ print('ERROR: No .debug_pubnames section found in ELF.')
+ else:
+ print('%d entries found in .debug_pubnames' % len(pubnames))
+
+ # try getting information on a global symbol.
+ print('Trying pubnames example ...')
+ sym_name = 'main'
+ try:
+ entry = pubnames[sym_name]
+ except KeyError:
+ print('ERROR: No pubname entry found for ' + sym_name)
+ else:
+ print('%s: cu_ofs = %d, die_ofs = %d' %
+ (sym_name, entry.cu_ofs, entry.die_ofs))
+
+ # get the actual CU/DIE that has this information.
+ print('Fetching the actual die for %s ...' % sym_name)
+ for cu in dwarfinfo.iter_CUs():
+ if cu.cu_offset == entry.cu_ofs:
+ for die in cu.iter_DIEs():
+ if die.offset == entry.die_ofs:
+ print('Die Name: %s' %
+ bytes2str(die.attributes['DW_AT_name'].value))
+
+ # dump all entries in .debug_pubnames section.
+ print('Dumping .debug_pubnames table ...')
+ print('-' * 66)
+ print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS'))
+ print('-' * 66)
+ for (name, entry) in pubnames.items():
+ print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs))
+ print('-' * 66)
+
+ # get .debug_pubtypes section.
+ pubtypes = dwarfinfo.get_pubtypes()
+ if pubtypes is None:
+ print('ERROR: No .debug_pubtypes section found in ELF')
+ else:
+ print('%d entries found in .debug_pubtypes' % len(pubtypes))
+
+ # try getting information on a global type.
+ sym_name = 'char'
+ # note: using the .get() API (pubtypes[key] will also work).
+ entry = pubtypes.get(sym_name)
+ if entry is None:
+ print('ERROR: No pubtype entry for %s' % sym_name)
+ else:
+ print('%s: cu_ofs %d, die_ofs %d' %
+ (sym_name, entry.cu_ofs, entry.die_ofs))
+
+ # get the actual CU/DIE that has this information.
+ print('Fetching the actual die for %s ...' % sym_name)
+ for cu in dwarfinfo.iter_CUs():
+ if cu.cu_offset == entry.cu_ofs:
+ for die in cu.iter_DIEs():
+ if die.offset == entry.die_ofs:
+ print('Die Name: %s' %
+ bytes2str(die.attributes['DW_AT_name'].value))
+
+ # dump all entries in .debug_pubtypes section.
+ print('Dumping .debug_pubtypes table ...')
+ print('-' * 66)
+ print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS'))
+ print('-' * 66)
+ for (name, entry) in pubtypes.items():
+ print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs))
+ print('-' * 66)
+
+if __name__ == '__main__':
+ if sys.argv[1] == '--test':
+ process_file(sys.argv[2])
+ sys.exit(0)
+
+ if len(sys.argv) < 2:
+ print('Expected usage: {0} <executable>'.format(sys.argv[0]))
+ sys.exit(1)
+ process_file(sys.argv[1])
--- /dev/null
+Processing file: ./examples/sample_exe64.elf
+5 entries found in .debug_pubnames
+Trying pubnames example ...
+main: cu_ofs = 258, die_ofs = 303
+Fetching the actual die for main ...
+Die Name: main
+Dumping .debug_pubnames table ...
+------------------------------------------------------------------
+ Symbol CU_OFS DIE_OFS
+------------------------------------------------------------------
+ _IO_stdin_used 119 230
+ main 258 303
+ glob 258 395
+ __libc_csu_fini 418 495
+ __libc_csu_init 418 523
+------------------------------------------------------------------
+ERROR: No .debug_pubtypes section found in ELF
import argparse
import os, sys
import string
+import itertools
+# Note: zip has different behaviour between Python 2.x and 3.x.
+# - Using izip ensures compatibility.
+try:
+ from itertools import izip
+except:
+ izip = zip
# For running from development directory. It should take precedence over the
# installed pyelftools.
for note in section.iter_notes():
self._emitline("\nDisplaying notes found in: {}".format(
section.name))
- self._emitline(' Owner Data size Description')
+ self._emitline(' Owner Data size Description')
self._emitline(' %s %s\t%s' % (
note['n_name'].ljust(20),
self._format_hex(note['n_descsz'], fieldsize=8),
self._dump_debug_frames_interp()
elif dump_what == 'aranges':
self._dump_debug_aranges()
+ elif dump_what in { 'pubtypes', 'pubnames' }:
+ self._dump_debug_namelut(dump_what)
else:
self._emitline('debug dump not yet supported for "%s"' % dump_what)
self._dwarfinfo.debug_frame_sec,
self._dwarfinfo.CFI_entries())
+ def _dump_debug_namelut(self, what):
+ """
+ Dump the debug pubnames section.
+ """
+ if what == 'pubnames':
+ namelut = self._dwarfinfo.get_pubnames()
+ section = self._dwarfinfo.debug_pubnames_sec
+ else:
+ namelut = self._dwarfinfo.get_pubtypes()
+ section = self._dwarfinfo.debug_pubtypes_sec
+
+ # readelf prints nothing if the section is not present.
+ if namelut is None or len(namelut) == 0:
+ return
+
+ self._emitline('Contents of the %s section:' % section.name)
+ self._emitline()
+
+ cu_headers = namelut.get_cu_headers()
+
+ # go over CU-by-CU first and item-by-item next.
+ for (cu_hdr, (cu_ofs, items)) in izip(cu_headers, itertools.groupby(
+ namelut.items(), key = lambda x: x[1].cu_ofs)):
+
+ self._emitline(' Length: %d' % cu_hdr.unit_length)
+ self._emitline(' Version: %d' % cu_hdr.version)
+ self._emitline(' Offset into .debug_info section: 0x%x' % cu_hdr.debug_info_offset)
+ self._emitline(' Size of area in .debug_info section: %d' % cu_hdr.debug_info_length)
+ self._emitline()
+ self._emitline(' Offset Name')
+ for item in items:
+ self._emitline(' %x %s' % (item[1].die_ofs - cu_ofs, item[0]))
+ self._emitline()
+
def _dump_debug_aranges(self):
""" Dump the aranges table
"""
'-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V',
'--debug-dump=info', '--debug-dump=decodedline',
'--debug-dump=frames', '--debug-dump=frames-interp',
- '--debug-dump=aranges']:
+ '--debug-dump=aranges', '--debug-dump=pubtypes',
+ '--debug-dump=pubnames'
+ ]:
if verbose: testlog.info("..option='%s'" % option)
# TODO(zlobober): this is a dirty hack to make tests work for ELF core