# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
-from bisect import bisect_left
+from bisect import bisect_right
from .die import DIE
+from ..common.utils import dwarf_assert
class CompileUnit(object):
return top
+ @property
+ def size(self):
+ return self['unit_length'] + self.structs.initial_length_field_size()
+
+ def get_DIE_from_refaddr(self, refaddr):
+ """ Obtain a DIE contained in this CU from a reference.
+
+ refaddr:
+ The offset into the .debug_info section, which must be
+ contained in this CU or a DWARFError will be raised.
+
+ When using a reference class attribute with a form that is
+ relative to the compile unit, add unit add the compile unit's
+ .cu_addr before calling this function.
+ """
+ # All DIEs are after the cu header and within the unit
+ dwarf_assert(
+ self.cu_die_offset <= refaddr < self.cu_offset + self.size,
+ 'refaddr %s not in DIE range of CU %s' % (refaddr, self.cu_offset))
+
+ return self._get_cached_DIE(refaddr)
+
def iter_DIEs(self):
""" Iterate over all the DIEs in the CU, in order of their appearance.
Note that null DIEs will also be returned.
if not die.has_children:
return
- # `cur_offset` tracks the offset past our current DIE as we iterate
- # over children, providing the pivot as we bisect `self._diemap`
- # and ensuring that we insert our children (and child offsets)
- # in the correct order within both `self._dielist` and `self._diemap`.
+ # `cur_offset` tracks the stream offset of the next DIE to yield
+ # as we iterate over our children,
cur_offset = die.offset + die.size
while True:
- i = bisect_left(self._diemap, cur_offset)
- # Note that `self._diemap` cannot be empty because a `die`, the argument,
- # is already parsed.
- if i < len(self._diemap) and cur_offset == self._diemap[i]:
- child = self._dielist[i]
- else:
- child = DIE(
- cu=self,
- stream=die.stream,
- offset=cur_offset)
- self._dielist.insert(i, child)
- self._diemap.insert(i, cur_offset)
+ child = self._get_cached_DIE(cur_offset)
child.set_parent(die)
for d in self._iter_DIE_subtree(c):
yield d
yield die._terminator
+
+ def _get_cached_DIE(self, offset):
+ """ Given a DIE offset, look it up in the cache. If not present,
+ parse the DIE and insert it into the cache.
+
+ offset:
+ The offset of the DIE in the debug_info section to retrieve.
+
+ The stream reference is copied from the top DIE. The top die will
+ also be parsed and cached if needed.
+
+ See also get_DIE_from_refaddr(self, refaddr).
+ """
+ # The top die must be in the cache if any DIE is in the cache.
+ # The stream is the same for all DIEs in this CU, so populate
+ # the top DIE and obtain a reference to its stream.
+ top_die_stream = self.get_top_DIE().stream
+
+ # `offset` is the offset in the stream of the DIE we want to return.
+ # The map is maintined as a parallel array to the list. We call
+ # bisect each time to ensure new DIEs are inserted in the correct
+ # order within both `self._dielist` and `self._diemap`.
+ i = bisect_right(self._diemap, offset)
+
+ # Note that `self._diemap` cannot be empty because a the top DIE
+ # was inserted by the call to .get_top_DIE(). Also it has the minimal
+ # offset, so the bisect_right insert point will always be at least 1.
+ if offset == self._diemap[i - 1]:
+ die = self._dielist[i - 1]
+ else:
+ die = DIE(cu=self, stream=top_die_stream, offset=offset)
+ self._dielist.insert(i, die)
+ self._diemap.insert(i, offset)
+
+ return die
"""
return self.tag is None
+ def get_DIE_from_attribute(self, name):
+ """ Return the DIE referenced by the named attribute of this DIE.
+ The attribute must be in the reference attribute class.
+
+ name:
+ The name of the attribute in the reference class.
+ """
+ attr = self.attributes[name]
+ if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
+ 'DW_FORM_ref8', 'DW_FORM_ref'):
+ refaddr = self.cu.cu_offset + attr.raw_value
+ return self.cu.get_DIE_from_refaddr(refaddr)
+ elif attr.form in ('DW_FORM_refaddr'):
+ return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
+ elif attr.form in ('DW_FORM_ref_sig8'):
+ # Implement search type units for matching signature
+ raise NotImplementedError('%s (type unit by signature)' % attr.form)
+ elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8'):
+ raise NotImplementedError('%s to dwo' % attr.form)
+ else:
+ raise DWARFError('%s is not a reference class form attribute' % attr)
+
def get_parent(self):
- """ The parent DIE of this DIE. None if the DIE has no parent (i.e. a
- top-level DIE).
+ """ Return the parent DIE of this DIE, or None if the DIE has no
+ parent (i.e. is a top-level DIE).
"""
+ if self._parent is None:
+ self._search_ancestor_offspring()
return self._parent
def get_full_path(self):
def iter_siblings(self):
""" Yield all siblings of this DIE
"""
- if self._parent:
- for sibling in self._parent.iter_children():
+ parent = self.get_parent()
+ if parent:
+ for sibling in parent.iter_children():
if sibling is not self:
yield sibling
else:
#------ PRIVATE ------#
+ def _search_ancestor_offspring(self):
+ """ Search our ancestors identifying their offspring to find our parent.
+
+ DIEs are stored as a flattened tree. The top DIE is the ancestor
+ of all DIEs in the unit. Each parent is guaranteed to be at
+ an offset less than their children. In each generation of children
+ the sibling with the closest offset not greater than our offset is
+ our ancestor.
+ """
+ # This code is called when get_parent notices that the _parent has
+ # not been identified. To avoid execution for each sibling record all
+ # the children of any parent iterated. Assuming get_parent will also be
+ # called for siblings, it is more efficient if siblings references are
+ # provided and no worse than a single walk if they are missing, while
+ # stopping iteration early could result in O(n^2) walks.
+ search = self.cu.get_top_DIE()
+ while search.offset < self.offset:
+ prev = search
+ for child in search.iter_children():
+ child.set_parent(search)
+ if child.offset <= self.offset:
+ prev = child
+
+ # We also need to check the offset of the terminator DIE
+ if search.has_children and search._terminator.offset <= self.offset:
+ prev = search._terminator
+
+ # If we didn't find a closer parent, give up, don't loop.
+ # Either we mis-parsed an ancestor or someone created a DIE
+ # by an offset that was not actually the start of a DIE.
+ if prev is search:
+ raise ValueError("offset %s not in CU %s DIE tree" %
+ (self.offset, self.cu.cu_offset))
+
+ search = prev
+
def __repr__(self):
s = 'DIE %s, size=%s, has_children=%s\n' % (
self.tag, self.size, self.has_children)
# This code is in the public domain
#-------------------------------------------------------------------------------
from collections import namedtuple
+from bisect import bisect_right
from ..common.exceptions import DWARFError
from ..common.utils import (struct_parse, dwarf_assert,
# Cache for abbrev tables: a dict keyed by offset
self._abbrevtable_cache = {}
+ # Cache of compile units and map of their offsets for bisect lookup.
+ # Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at().
+ self._cu_cache = []
+ self._cu_offsets_map = []
+
@property
def has_debug_info(self):
""" Return whether this contains debug information.
"""
return bool(self.debug_info_sec)
+ def get_DIE_from_lut_entry(self, lut_entry):
+ """ Get the DIE from the pubnames or putbtypes lookup table entry.
+
+ lut_entry:
+ A NameLUTEntry object from a NameLUT instance (see
+ .get_pubmames and .get_pubtypes methods).
+ """
+ cu = self.get_CU_at(lut_entry.cu_ofs)
+ return self.get_DIE_from_refaddr(lut_entry.die_ofs, cu)
+
+ def get_DIE_from_refaddr(self, refaddr, cu=None):
+ """ Given a .debug_info section offset of a DIE, return the DIE.
+
+ refaddr:
+ The refaddr may come from a DW_FORM_ref_addr attribute.
+
+ cu:
+ The compile unit object, if known. If None a search
+ from the closest offset less than refaddr will be performed.
+ """
+ if cu is None:
+ cu = self.get_CU_containing(refaddr)
+ return cu.get_DIE_from_refaddr(refaddr)
+
+ def get_CU_containing(self, refaddr):
+ """ Find the CU that includes the given reference address in the
+ .debug_info section.
+
+ refaddr:
+ Either a refaddr of a DIE (possibly from a DW_FORM_refaddr
+ attribute) or the section offset of a CU (possibly from an
+ aranges table).
+
+ This function will parse and cache CUs until the search criteria
+ is met, starting from the closest known offset lessthan or equal
+ to the given address.
+ """
+ dwarf_assert(
+ self.has_debug_info,
+ 'CU lookup but no debug info section')
+ dwarf_assert(
+ 0 <= refaddr < self.debug_info_sec.size,
+ "refaddr %s beyond .debug_info size" % refaddr)
+
+ # The CU containing the DIE we desire will be to the right of the
+ # DIE insert point. If we have a CU address, then it will be a
+ # match but the right insert minus one will still be the item.
+ # The first CU starts at offset 0, so start there if cache is empty.
+ i = bisect_right(self._cu_offsets_map, refaddr)
+ start = self._cu_offsets_map[i - 1] if i > 0 else 0
+
+ # parse CUs until we find one containing the desired address
+ for cu in self._parse_CUs_iter(start):
+ if cu.cu_offset <= refaddr < cu.cu_offset + cu.size:
+ return cu
+
+ raise ValueError("CU for reference address %s not found" % refaddr)
+
+ def get_CU_at(self, offset):
+ """ Given a CU header offset, return the parsed CU.
+
+ offset:
+ The offset may be from an accelerated access table such as
+ the public names, public types, address range table, or
+ prior use.
+
+ This function will directly parse the CU doing no validation of
+ the offset beyond checking the size of the .debug_info section.
+ """
+ dwarf_assert(
+ self.has_debug_info,
+ 'CU lookup but no debug info section')
+ dwarf_assert(
+ 0 <= offset < self.debug_info_sec.size,
+ "offset %s beyond .debug_info size" % offset)
+
+ return self._cached_CU_at_offset(offset)
+
def iter_CUs(self):
""" Yield all the compile units (CompileUnit objects) in the debug info
"""
#------ PRIVATE ------#
- def _parse_CUs_iter(self):
- """ Parse CU entries from debug_info. Yield CUs in order of appearance.
+ def _parse_CUs_iter(self, offset=0):
+ """ Iterate CU objects in order of appearance in the debug_info section.
+
+ offset:
+ The offset of the first CU to yield. Additional iterations
+ will return the sequential unit objects.
+
+ See .iter_CUs(), .get_CU_containing(), and .get_CU_at().
"""
if self.debug_info_sec is None:
return
- offset = 0
while offset < self.debug_info_sec.size:
- cu = self._parse_CU_at_offset(offset)
+ cu = self._cached_CU_at_offset(offset)
# Compute the offset of the next CU in the section. The unit_length
# field of the CU header contains its size not including the length
# field itself.
cu.structs.initial_length_field_size())
yield cu
+ def _cached_CU_at_offset(self, offset):
+ """ Return the CU with unit header at the given offset into the
+ debug_info section from the cache. If not present, the unit is
+ header is parsed and the object is installed in the cache.
+
+ offset:
+ The offset of the unit header in the .debug_info section
+ to of the unit to fetch from the cache.
+
+ See get_CU_at().
+ """
+ # Find the insert point for the requested offset. With bisect_right,
+ # if this entry is present in the cache it will be the prior entry.
+ i = bisect_right(self._cu_offsets_map, offset)
+ if i >= 1 and offset == self._cu_offsets_map[i - 1]:
+ return self._cu_cache[i - 1]
+
+ # Parse the CU and insert the offset and object into the cache.
+ # The ._cu_offsets_map[] contains just the numeric offsets for the
+ # bisect_right search while the parallel indexed ._cu_cache[] holds
+ # the object references.
+ cu = self._parse_CU_at_offset(offset)
+ self._cu_offsets_map.insert(i, offset)
+ self._cu_cache.insert(i, cu)
+ return cu
+
def _parse_CU_at_offset(self, offset):
""" Parse and return a CU at the given offset in the debug_info stream.
"""
--- /dev/null
+#-------------------------------------------------------------------------------
+# elftools tests
+#
+# Eli Bendersky (eliben@gmail.com), Milton Miller <miltonm@us.ibm.com>
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import os
+import unittest
+
+from elftools.elf.elffile import ELFFile
+from elftools.common.py3compat import bytes2str
+
+class TestCacheLUTandDIEref(unittest.TestCase):
+ def dprint(self, list):
+ if False:
+ self.oprint(list)
+
+ def oprint(self, list):
+ if False:
+ print(list)
+
+ def test_die_from_LUTentry(self):
+ lines = ['']
+ with open(os.path.join('test', 'testfiles_for_unittests',
+ 'lambda.elf'), 'rb') as f:
+ elffile = ELFFile(f)
+ self.assertTrue(elffile.has_dwarf_info())
+
+ dwarf = elffile.get_dwarf_info()
+ pt = dwarf.get_pubnames()
+ for (k, v) in pt.items():
+ ndie = dwarf.get_DIE_from_lut_entry(v)
+ self.dprint(ndie)
+ if not 'DW_AT_type' in ndie.attributes:
+ continue
+ if not 'DW_AT_name' in ndie.attributes:
+ continue
+ name = bytes2str(ndie.attributes['DW_AT_name'].value)
+ tlist = []
+ tdie = ndie
+ while True:
+ tdie = tdie.get_DIE_from_attribute('DW_AT_type')
+ self.dprint(ndie)
+ ttag = tdie.tag
+ if isinstance(ttag, int):
+ ttag = 'TAG(0x%x)' % ttag
+ tlist.append(ttag)
+ if 'DW_AT_name' in tdie.attributes:
+ break
+ tlist.append(bytes2str(tdie.attributes['DW_AT_name'].value))
+ tname = ' '.join(tlist)
+ line = "%s DIE at %s is of type %s" % (
+ ndie.tag, ndie.offset, tname)
+ lines.append(line)
+ self.dprint(line)
+
+ self.oprint('\n'.join(lines))
+ self.assertGreater(len(lines), 1)