# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
+import os
from collections import namedtuple
+from bisect import bisect_right
+from ..construct.lib.container import Container
from ..common.exceptions import DWARFError
from ..common.utils import (struct_parse, dwarf_assert,
parse_cstring_from_stream)
from .abbrevtable import AbbrevTable
from .lineprogram import LineProgram
from .callframe import CallFrameInfo
-from .locationlists import LocationLists
-from .ranges import RangeLists
+from .locationlists import LocationLists, LocationListsPair
+from .ranges import RangeLists, RangeListsPair
from .aranges import ARanges
from .namelut import NameLUT
+from .dwarf_util import _get_base_offset
# Describes a debug section
class DWARFInfo(object):
""" Acts also as a "context" to other major objects, bridging between
- various parts of the debug infromation.
+ various parts of the debug information.
"""
def __init__(self,
config,
debug_ranges_sec,
debug_line_sec,
debug_pubtypes_sec,
- debug_pubnames_sec):
+ debug_pubnames_sec,
+ debug_addr_sec,
+ debug_str_offsets_sec,
+ debug_line_str_sec,
+ debug_loclists_sec,
+ debug_rnglists_sec,
+ debug_sup_sec,
+ gnu_debugaltlink_sec
+ ):
""" config:
A DwarfConfig object
self.debug_loc_sec = debug_loc_sec
self.debug_ranges_sec = debug_ranges_sec
self.debug_line_sec = debug_line_sec
+ self.debug_addr_sec = debug_addr_sec
+ self.debug_str_offsets_sec = debug_str_offsets_sec
+ self.debug_line_str_sec = debug_line_str_sec
self.debug_pubtypes_sec = debug_pubtypes_sec
self.debug_pubnames_sec = debug_pubnames_sec
+ self.debug_loclists_sec = debug_loclists_sec
+ self.debug_rnglists_sec = debug_rnglists_sec
+ self.debug_sup_sec = debug_sup_sec
+ self.gnu_debugaltlink_sec = gnu_debugaltlink_sec
+
+ # Sets the supplementary_dwarfinfo to None. Client code can set this
+ # to something else, typically a DWARFInfo file read from an ELFFile
+ # which path is stored in the debug_sup_sec or gnu_debugaltlink_sec.
+ self.supplementary_dwarfinfo = None
# This is the DWARFStructs the context uses, so it doesn't depend on
# DWARF format and address_size (these are determined per CU) - set them
# Cache for abbrev tables: a dict keyed by offset
self._abbrevtable_cache = {}
+ # Cache for program lines tables: a dict keyed by offset
+ self._linetable_cache = {}
+
+ # Cache of compile units and map of their offsets for bisect lookup.
+ # Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at().
+ self._cu_cache = []
+ self._cu_offsets_map = []
@property
def has_debug_info(self):
"""
return bool(self.debug_info_sec)
+ def get_DIE_from_lut_entry(self, lut_entry):
+ """ Get the DIE from the pubnames or putbtypes lookup table entry.
+
+ lut_entry:
+ A NameLUTEntry object from a NameLUT instance (see
+ .get_pubmames and .get_pubtypes methods).
+ """
+ cu = self.get_CU_at(lut_entry.cu_ofs)
+ return self.get_DIE_from_refaddr(lut_entry.die_ofs, cu)
+
+ def get_DIE_from_refaddr(self, refaddr, cu=None):
+ """ Given a .debug_info section offset of a DIE, return the DIE.
+
+ refaddr:
+ The refaddr may come from a DW_FORM_ref_addr attribute.
+
+ cu:
+ The compile unit object, if known. If None a search
+ from the closest offset less than refaddr will be performed.
+ """
+ if cu is None:
+ cu = self.get_CU_containing(refaddr)
+ return cu.get_DIE_from_refaddr(refaddr)
+
+ def get_CU_containing(self, refaddr):
+ """ Find the CU that includes the given reference address in the
+ .debug_info section.
+
+ refaddr:
+ Either a refaddr of a DIE (possibly from a DW_FORM_ref_addr
+ attribute) or the section offset of a CU (possibly from an
+ aranges table).
+
+ This function will parse and cache CUs until the search criteria
+ is met, starting from the closest known offset lessthan or equal
+ to the given address.
+ """
+ dwarf_assert(
+ self.has_debug_info,
+ 'CU lookup but no debug info section')
+ dwarf_assert(
+ 0 <= refaddr < self.debug_info_sec.size,
+ "refaddr %s beyond .debug_info size" % refaddr)
+
+ # The CU containing the DIE we desire will be to the right of the
+ # DIE insert point. If we have a CU address, then it will be a
+ # match but the right insert minus one will still be the item.
+ # The first CU starts at offset 0, so start there if cache is empty.
+ i = bisect_right(self._cu_offsets_map, refaddr)
+ start = self._cu_offsets_map[i - 1] if i > 0 else 0
+
+ # parse CUs until we find one containing the desired address
+ for cu in self._parse_CUs_iter(start):
+ if cu.cu_offset <= refaddr < cu.cu_offset + cu.size:
+ return cu
+
+ raise ValueError("CU for reference address %s not found" % refaddr)
+
+ def get_CU_at(self, offset):
+ """ Given a CU header offset, return the parsed CU.
+
+ offset:
+ The offset may be from an accelerated access table such as
+ the public names, public types, address range table, or
+ prior use.
+
+ This function will directly parse the CU doing no validation of
+ the offset beyond checking the size of the .debug_info section.
+ """
+ dwarf_assert(
+ self.has_debug_info,
+ 'CU lookup but no debug info section')
+ dwarf_assert(
+ 0 <= offset < self.debug_info_sec.size,
+ "offset %s beyond .debug_info size" % offset)
+
+ return self._cached_CU_at_offset(offset)
+
def iter_CUs(self):
""" Yield all the compile units (CompileUnit objects) in the debug info
"""
"""
return parse_cstring_from_stream(self.debug_str_sec.stream, offset)
+ def get_string_from_linetable(self, offset):
+ """ Obtain a string from the string table section, given an offset
+ relative to the section.
+ """
+ return parse_cstring_from_stream(self.debug_line_str_sec.stream, offset)
+
def line_program_for_CU(self, CU):
""" Given a CU object, fetch the line program it points to from the
.debug_line section.
If the CU doesn't point to a line program, return None.
+
+ Note about directory and file names. They are returned as two collections
+ in the lineprogram object's header - include_directory and file_entry.
+
+ In DWARFv5, they have introduced a different, extensible format for those
+ collections. So in a lineprogram v5+, there are two more collections in
+ the header - directories and file_names. Those might contain extra DWARFv5
+ information that is not exposed in include_directory and file_entry.
"""
# The line program is pointed to by the DW_AT_stmt_list attribute of
# the top DIE of a CU.
the DWARF data, or None if the section doesn't exist
"""
if self.debug_aranges_sec:
- return ARanges(self.debug_aranges_sec.stream,
- self.debug_aranges_sec.size,
+ return ARanges(self.debug_aranges_sec.stream,
+ self.debug_aranges_sec.size,
self.structs)
else:
return None
def location_lists(self):
- """ Get a LocationLists object representing the .debug_loc section of
+ """ Get a LocationLists object representing the .debug_loc/debug_loclists section of
the DWARF data, or None if this section doesn't exist.
+
+ If both sections exist, it returns a LocationListsPair.
"""
- if self.debug_loc_sec:
- return LocationLists(self.debug_loc_sec.stream, self.structs)
+ if self.debug_loclists_sec and self.debug_loc_sec is None:
+ return LocationLists(self.debug_loclists_sec.stream, self.structs, 5, self)
+ elif self.debug_loc_sec and self.debug_loclists_sec is None:
+ return LocationLists(self.debug_loc_sec.stream, self.structs, 4, self)
+ elif self.debug_loc_sec and self.debug_loclists_sec:
+ return LocationListsPair(self.debug_loc_sec.stream, self.debug_loclists_sec.stream, self.structs, self)
else:
return None
def range_lists(self):
- """ Get a RangeLists object representing the .debug_ranges section of
+ """ Get a RangeLists object representing the .debug_ranges/.debug_rnglists section of
the DWARF data, or None if this section doesn't exist.
+
+ If both sections exist, it returns a RangeListsPair.
"""
- if self.debug_ranges_sec:
- return RangeLists(self.debug_ranges_sec.stream, self.structs)
+ if self.debug_rnglists_sec and self.debug_ranges_sec is None:
+ return RangeLists(self.debug_rnglists_sec.stream, self.structs, 5, self)
+ elif self.debug_ranges_sec and self.debug_rnglists_sec is None:
+ return RangeLists(self.debug_ranges_sec.stream, self.structs, 4, self)
+ elif self.debug_ranges_sec and self.debug_rnglists_sec:
+ return RangeListsPair(self.debug_ranges_sec.stream, self.debug_rnglists_sec.stream, self.structs, self)
else:
return None
+ def get_addr(self, cu, addr_index):
+ """Provided a CU and an index, retrieves an address from the debug_addr section
+ """
+ if not self.debug_addr_sec:
+ raise DWARFError('The file does not contain a debug_addr section for indirect address access')
+ # Selectors are not supported, but no assert on that. TODO?
+ cu_addr_base = _get_base_offset(cu, 'DW_AT_addr_base')
+ return struct_parse(cu.structs.Dwarf_target_addr(''), self.debug_addr_sec.stream, cu_addr_base + addr_index*cu.header.address_size)
+
#------ PRIVATE ------#
- def _parse_CUs_iter(self):
- """ Parse CU entries from debug_info. Yield CUs in order of appearance.
+ def _parse_CUs_iter(self, offset=0):
+ """ Iterate CU objects in order of appearance in the debug_info section.
+
+ offset:
+ The offset of the first CU to yield. Additional iterations
+ will return the sequential unit objects.
+
+ See .iter_CUs(), .get_CU_containing(), and .get_CU_at().
"""
if self.debug_info_sec is None:
return
- offset = 0
while offset < self.debug_info_sec.size:
- cu = self._parse_CU_at_offset(offset)
+ cu = self._cached_CU_at_offset(offset)
# Compute the offset of the next CU in the section. The unit_length
# field of the CU header contains its size not including the length
# field itself.
cu.structs.initial_length_field_size())
yield cu
+ def _cached_CU_at_offset(self, offset):
+ """ Return the CU with unit header at the given offset into the
+ debug_info section from the cache. If not present, the unit is
+ header is parsed and the object is installed in the cache.
+
+ offset:
+ The offset of the unit header in the .debug_info section
+ to of the unit to fetch from the cache.
+
+ See get_CU_at().
+ """
+ # Find the insert point for the requested offset. With bisect_right,
+ # if this entry is present in the cache it will be the prior entry.
+ i = bisect_right(self._cu_offsets_map, offset)
+ if i >= 1 and offset == self._cu_offsets_map[i - 1]:
+ return self._cu_cache[i - 1]
+
+ # Parse the CU and insert the offset and object into the cache.
+ # The ._cu_offsets_map[] contains just the numeric offsets for the
+ # bisect_right search while the parallel indexed ._cu_cache[] holds
+ # the object references.
+ cu = self._parse_CU_at_offset(offset)
+ self._cu_offsets_map.insert(i, offset)
+ self._cu_cache.insert(i, cu)
+ return cu
+
def _parse_CU_at_offset(self, offset):
""" Parse and return a CU at the given offset in the debug_info stream.
"""
self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset)
dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
- # At this point we still haven't read the whole header, so we don't
- # know the address_size. Therefore, we're going to create structs
- # with a default address_size=4. If, after parsing the header, we
- # find out address_size is actually 8, we just create a new structs
- # object for this CU.
+
+ # Temporary structs for parsing the header
+ # The structs for the rest of the CU depend on the header data.
#
cu_structs = DWARFStructs(
little_endian=self.config.little_endian,
dwarf_format=dwarf_format,
- address_size=4)
+ address_size=4,
+ dwarf_version=2)
cu_header = struct_parse(
cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset)
- if cu_header['address_size'] == 8:
- cu_structs = DWARFStructs(
- little_endian=self.config.little_endian,
- dwarf_format=dwarf_format,
- address_size=8)
+
+ # structs for the rest of the CU, taking into account bitness and DWARF version
+ cu_structs = DWARFStructs(
+ little_endian=self.config.little_endian,
+ dwarf_format=dwarf_format,
+ address_size=cu_header['address_size'],
+ dwarf_version=cu_header['version'])
cu_die_offset = self.debug_info_sec.stream.tell()
dwarf_assert(
def _is_supported_version(self, version):
""" DWARF version supported by this parser
"""
- return 2 <= version <= 4
+ return 2 <= version <= 5
- def _parse_line_program_at_offset(self, debug_line_offset, structs):
+ def _parse_line_program_at_offset(self, offset, structs):
""" Given an offset to the .debug_line section, parse the line program
starting at this offset in the section and return it.
structs is the DWARFStructs object used to do this parsing.
"""
+
+ if offset in self._linetable_cache:
+ return self._linetable_cache[offset]
+
lineprog_header = struct_parse(
structs.Dwarf_lineprog_header,
self.debug_line_sec.stream,
- debug_line_offset)
+ offset)
+
+ # DWARF5: resolve names
+ def resolve_strings(self, lineprog_header, format_field, data_field):
+ if lineprog_header.get(format_field, False):
+ data = lineprog_header[data_field]
+ for field in lineprog_header[format_field]:
+ def replace_value(data, content_type, replacer):
+ for entry in data:
+ entry[content_type] = replacer(entry[content_type])
+
+ if field.form == 'DW_FORM_line_strp':
+ replace_value(data, field.content_type, self.get_string_from_linetable)
+ elif field.form == 'DW_FORM_strp':
+ replace_value(data, field.content_type, self.get_string_from_table)
+ elif field.form in ('DW_FORM_strp_sup', 'DW_FORM_GNU_strp_alt'):
+ if self.supplementary_dwarfinfo:
+ replace_value(data, field.content_type, self.supplementary_dwarfinfo.get_string_fromtable)
+ else:
+ replace_value(data, field.content_type, lambda x: str(x))
+ elif field.form in ('DW_FORM_strp_sup', 'DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4'):
+ raise NotImplementedError()
+
+ resolve_strings(self, lineprog_header, 'directory_entry_format', 'directories')
+ resolve_strings(self, lineprog_header, 'file_name_entry_format', 'file_names')
+
+ # DWARF5: provide compatible file/directory name arrays for legacy lineprogram consumers
+ if lineprog_header.get('directories', False):
+ lineprog_header.include_directory = tuple(d.DW_LNCT_path for d in lineprog_header.directories)
+ if lineprog_header.get('file_names', False):
+ lineprog_header.file_entry = tuple(
+ Container(**{
+ 'name':e.get('DW_LNCT_path'),
+ 'dir_index': e.get('DW_LNCT_directory_index'),
+ 'mtime': e.get('DW_LNCT_timestamp'),
+ 'length': e.get('DW_LNCT_size')})
+ for e in lineprog_header.file_names)
# Calculate the offset to the next line program (see DWARF 6.2.4)
- end_offset = ( debug_line_offset + lineprog_header['unit_length'] +
+ end_offset = ( offset + lineprog_header['unit_length'] +
structs.initial_length_field_size())
- return LineProgram(
+ lineprogram = LineProgram(
header=lineprog_header,
stream=self.debug_line_sec.stream,
structs=structs,
program_start_offset=self.debug_line_sec.stream.tell(),
program_end_offset=end_offset)
+ self._linetable_cache[offset] = lineprogram
+ return lineprogram
+
+ def parse_debugsupinfo(self):
+ """
+ Extract a filename from either .debug_sup or .gnu_debualtlink sections.
+ """
+ if self.debug_sup_sec is not None:
+ self.debug_sup_sec.stream.seek(0)
+ suplink = self.structs.Dwarf_debugsup.parse_stream(self.debug_sup_sec.stream)
+ if suplink.is_supplementary == 0:
+ return suplink.sup_filename
+ if self.gnu_debugaltlink_sec is not None:
+ self.gnu_debugaltlink_sec.stream.seek(0)
+ suplink = self.structs.Dwarf_debugaltlink.parse_stream(self.gnu_debugaltlink_sec.stream)
+ return suplink.sup_filename
+ return None
+