# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
+from bisect import bisect_left
from .die import DIE
# requested.
self._abbrev_table = None
- # A list of DIEs belonging to this CU. Lazily parsed.
+ # A list of DIEs belonging to this CU.
+ # This list is lazily constructed as DIEs are iterated over.
self._dielist = []
+ # A list of file offsets, corresponding (by index) to the DIEs
+ # in `self._dielist`. This list exists separately from
+ # `self._dielist` to make it binary searchable, enabling the
+ # DIE population strategy used in `iter_DIE_children`.
+ # Like `self._dielist`, this list is lazily constructed
+ # as DIEs are iterated over.
+ self._diemap = []
def dwarf_format(self):
""" Get the DWARF format (32 or 64) for this CU
""" Get the top DIE (which is either a DW_TAG_compile_unit or
DW_TAG_partial_unit) of this CU
"""
- return self._get_DIE(0)
+
+ # Note that a top DIE always has minimal offset and is therefore
+ # at the beginning of our lists, so no bisect is required.
+ if len(self._diemap) > 0:
+ return self._dielist[0]
+
+ top = DIE(
+ cu=self,
+ stream=self.dwarfinfo.debug_info_sec.stream,
+ offset=self.cu_die_offset)
+
+ self._dielist.insert(0, top)
+ self._diemap.insert(0, self.cu_die_offset)
+
+ return top
def iter_DIEs(self):
""" Iterate over all the DIEs in the CU, in order of their appearance.
Note that null DIEs will also be returned.
"""
- self._parse_DIEs()
- return iter(self._dielist)
+ return self._iter_DIE_subtree(self.get_top_DIE())
+
+ def iter_DIE_children(self, die):
+ """ Given a DIE, yields either its children, without null DIE list
+ terminator, or nothing, if that DIE has no children.
+
+ The null DIE terminator is saved in that DIE when iteration ended.
+ """
+ if not die.has_children:
+ return
+
+ # `cur_offset` tracks the offset past our current DIE as we iterate
+ # over children, providing the pivot as we bisect `self._diemap`
+ # and ensuring that we insert our children (and child offsets)
+ # in the correct order within both `self._dielist` and `self._diemap`.
+ cur_offset = die.offset + die.size
+
+ while True:
+ i = bisect_left(self._diemap, cur_offset)
+ # Note that `self._diemap` cannot be empty because a `die`, the argument,
+ # is already parsed.
+ if i < len(self._diemap) and cur_offset == self._diemap[i]:
+ child = self._dielist[i]
+ else:
+ child = DIE(
+ cu=self,
+ stream=die.stream,
+ offset=cur_offset)
+ self._dielist.insert(i, child)
+ self._diemap.insert(i, cur_offset)
+
+ child.set_parent(die)
+
+ if child.is_null():
+ die._terminator = child
+ return
+
+ yield child
+
+ if not child.has_children:
+ cur_offset += child.size
+ elif "DW_AT_sibling" in child.attributes:
+ sibling = child.attributes["DW_AT_sibling"]
+ cur_offset = sibling.value + self.cu_offset
+ else:
+ # If no DW_AT_sibling attribute is provided by the producer
+ # then the whole child subtree must be parsed to find its next
+ # sibling. There is one zero byte representing null DIE
+ # terminating children list. It is used to locate child subtree
+ # bounds.
+
+ # If children are not parsed yet, this instruction will manage
+ # to recursive call of this function which will result in
+ # setting of `_terminator` attribute of the `child`.
+ if child._terminator is None:
+ for _ in self.iter_DIE_children(child):
+ pass
+
+ cur_offset = child._terminator.offset + child._terminator.size
#------ PRIVATE ------#
"""
return self.header[name]
- def _get_DIE(self, index):
- """ Get the DIE at the given index
- """
- self._parse_DIEs()
- return self._dielist[index]
-
- def _parse_DIEs(self):
- """ Parse all the DIEs pertaining to this CU from the stream and shove
- them sequentially into self._dielist.
- Also set the child/sibling/parent links in the DIEs according
- (unflattening the prefix-order of the DIE tree).
+ def _iter_DIE_subtree(self, die):
+ """ Given a DIE, this yields it with its subtree including null DIEs
+ (child list terminators).
"""
- if len(self._dielist) > 0:
- return
-
- # Compute the boundary (one byte past the bounds) of this CU in the
- # stream
- cu_boundary = ( self.cu_offset +
- self['unit_length'] +
- self.structs.initial_length_field_size())
-
- # First pass: parse all DIEs and place them into self._dielist
- die_offset = self.cu_die_offset
- while die_offset < cu_boundary:
- die = DIE(
- cu=self,
- stream=self.dwarfinfo.debug_info_sec.stream,
- offset=die_offset)
- self._dielist.append(die)
- die_offset += die.size
-
- # Second pass - unflatten the DIE tree
- self._unflatten_tree()
-
- def _unflatten_tree(self):
- """ "Unflatten" the DIE tree from it serial representation, by setting
- the child/sibling/parent links of DIEs.
-
- Assumes self._dielist was already populated by a linear list of DIEs
- read from the stream section
- """
- # the first DIE in the list is the root node
- root = self._dielist[0]
- parentstack = [root]
-
- for die in self._dielist[1:]:
- if not die.is_null():
- cur_parent = parentstack[-1]
- # This DIE is a child of the current parent
- cur_parent.add_child(die)
- die.set_parent(cur_parent)
- if die.has_children:
- parentstack.append(die)
- else:
- # parentstack should not be really empty here. However, some
- # compilers generate DWARF that has extra NULLs in the end and
- # we don't want pyelftools to fail parsing them just because of
- # this.
- if len(parentstack) > 0:
- # end of children for the current parent
- parentstack.pop()
+ yield die
+ if die.has_children:
+ for c in die.iter_children():
+ for d in self._iter_DIE_subtree(c):
+ yield d
+ yield die._terminator