self._dielist.insert(0, top)
self._diemap.insert(0, self.cu_die_offset)
+ top._translate_indirect_attributes() # Can't translate indirect attributes until the top DIE has been parsed to the end
+
return top
+ def has_top_DIE(self):
+ """ Returns whether the top DIE in this CU has already been parsed and cached.
+ No parsing on demand!
+ """
+ return len(self._diemap) > 0
+
@property
def size(self):
return self['unit_length'] + self.structs.initial_length_field_size()
dt.tag = "ptr_to_member_type" # Not a function pointer per se
return dt
elif t.tag == 'array':
- t.dimensions = (sub.attributes['DW_AT_upper_bound'].value + 1 if 'DW_AT_upper_bound' in sub.attributes else -1
+ t.dimensions = (_array_subtype_size(sub)
for sub
in type_die.iter_children()
if sub.tag == 'DW_TAG_subrange_type')
def _strip_type_tag(die):
"""Given a DIE with DW_TAG_foo_type, returns foo"""
return die.tag[7:-5]
+
+def _array_subtype_size(sub):
+ if 'DW_AT_upper_bound' in sub.attributes:
+ return sub.attributes['DW_AT_upper_bound'].value + 1
+ if 'DW_AT_count' in sub.attributes:
+ return sub.attributes['DW_AT_count'].value
+ else:
+ return -1
+
from ..common.py3compat import bytes2str, iteritems
from ..common.utils import struct_parse, preserve_stream_pos
from .enums import DW_FORM_raw2name
+from .dwarf_util import _resolve_via_offset_table, _get_base_offset
# AttributeValue - describes an attribute value in the DIE:
def _translate_attr_value(self, form, raw_value):
""" Translate a raw attr value according to the form
"""
+ # Indirect forms can only be parsed if the top DIE of this CU has already been parsed
+ # and listed in the CU, since the top DIE would have to contain the DW_AT_xxx_base attributes.
+ # This breaks if there is an indirect encoding in the top DIE itself before the
+ # corresponding _base, and it was seen in the wild.
+ # There is a hook in get_top_DIE() to resolve those lazily.
+ translate_indirect = self.cu.has_top_DIE() or self.offset != self.cu.cu_die_offset
value = None
if form == 'DW_FORM_strp':
with preserve_stream_pos(self.stream):
self.cu.structs.Dwarf_dw_form[form], self.stream)
# Let's hope this doesn't get too deep :-)
return self._translate_attr_value(form, raw_value)
+ elif form in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect:
+ value = self.cu.dwarfinfo.get_addr(self.cu, raw_value)
+ elif form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect:
+ stream = self.dwarfinfo.debug_str_offsets_sec.stream
+ base_offset = _get_base_offset(self.cu, 'DW_AT_str_offsets_base')
+ offset_size = 4 if self.cu.structs.dwarf_format == 32 else 8
+ with preserve_stream_pos(stream):
+ str_offset = struct_parse(self.cu.structs.Dwarf_offset(''), stream, base_offset + raw_value*offset_size)
+ value = self.dwarfinfo.get_string_from_table(str_offset)
+ elif form == 'DW_FORM_loclistx' and translate_indirect:
+ value = _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base')
+ elif form == 'DW_FORM_rnglistx' and translate_indirect:
+ value = _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base')
else:
value = raw_value
return value
+
+ def _translate_indirect_attributes(self):
+ """ This is a hook to translate the DW_FORM_...x values in the top DIE
+ once the top DIE is parsed to the end. They can't be translated
+ while the top DIE is being parsed, because they implicitly make a
+ reference to the DW_AT_xxx_base attribute in the same DIE that may
+ not have been parsed yet.
+ """
+ for key in self.attributes:
+ attr = self.attributes[key]
+ if attr.form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4',
+ 'DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4',
+ 'DW_FORM_loclistx', 'DW_FORM_rnglistx'):
+ # Can't change value in place, got to replace the whole attribute record
+ self.attributes[key] = AttributeValue(
+ name=attr.name,
+ form=attr.form,
+ value=self._translate_attr_value(attr.form, attr.raw_value),
+ raw_value=attr.raw_value,
+ offset=attr.offset)
import os
from ..construct.macros import UBInt32, UBInt64, ULInt32, ULInt64, Array
from ..common.exceptions import DWARFError
-from ..common.utils import struct_parse
+from ..common.utils import preserve_stream_pos, struct_parse
def _get_base_offset(cu, base_attribute_name):
"""Retrieves a required, base offset-type atribute
# the offset table for this CU's block in that section, which in turn is indexed by the index.
offset_size = 4 if cu.structs.dwarf_format == 32 else 8
- return base_offset + struct_parse(cu.structs.Dwarf_offset(''), stream, base_offset + index*offset_size, True)
+ with preserve_stream_pos(stream):
+ return base_offset + struct_parse(cu.structs.Dwarf_offset(''), stream, base_offset + index*offset_size)
def _iter_CUs_in_section(stream, structs, parser):
"""Iterates through the list of CU sections in loclists or rangelists. Almost identical structures there.
from .ranges import RangeLists, RangeListsPair
from .aranges import ARanges
from .namelut import NameLUT
+from .dwarf_util import _get_base_offset
# Describes a debug section
self.debug_ranges_sec = debug_ranges_sec
self.debug_line_sec = debug_line_sec
self.debug_addr_sec = debug_addr_sec
+ self.debug_str_offsets_sec = debug_str_offsets_sec
self.debug_line_str_sec = debug_line_str_sec
self.debug_pubtypes_sec = debug_pubtypes_sec
self.debug_pubnames_sec = debug_pubnames_sec
else:
return None
+ def get_addr(self, cu, addr_index):
+ """Provided a CU and an index, retrieves an address from the debug_addr section
+ """
+ if not self.debug_addr_sec:
+ raise DWARFError('The file does not contain a debug_addr section for indirect address access')
+ # Selectors are not supported, but no assert on that. TODO?
+ cu_addr_base = _get_base_offset(cu, 'DW_AT_addr_base')
+ return struct_parse(cu.structs.Dwarf_target_addr(''), self.debug_addr_sec.stream, cu_addr_base + addr_index*cu.header.address_size)
+
#------ PRIVATE ------#
def _parse_CUs_iter(self, offset=0):
DW_INL_declared_not_inlined = 0x02,
DW_INL_declared_inlined = 0x03
)
+
+ENUM_DW_CC = dict(
+ DW_CC_normal = 0x01,
+ DW_CC_program = 0x02,
+ DW_CC_nocall = 0x03,
+ DW_CC_pass_by_reference = 0x04,
+ DW_CC_pass_by_value = 0x05,
+ DW_CC_lo_user = 0x40,
+ DW_CC_hi_user = 0xff
+)
BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset entry_length base_address')
LocationViewPair = namedtuple('LocationViewPair', 'entry_offset begin end')
+def _translate_startx_length(e, cu):
+ start_offset = cu.dwarfinfo.get_addr(cu, e.start_index)
+ return LocationEntry(e.entry_offset, e.entry_length, start_offset, start_offset + e.length, e.loc_expr, True)
+
+# Maps parsed entries to the tuples above; LocationViewPair is mapped elsewhere
+entry_translate = {
+ 'DW_LLE_base_address' : lambda e, cu: BaseAddressEntry(e.entry_offset, e.entry_length, e.address),
+ 'DW_LLE_offset_pair' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, e.start_offset, e.end_offset, e.loc_expr, False),
+ 'DW_LLE_start_length' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, e.start_address, e.start_address + e.length, e.loc_expr, True),
+ 'DW_LLE_start_end' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, e.start_address, e.end_address, e.loc_expr, True),
+ 'DW_LLE_default_location': lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, -1, -1, e.loc_expr, True),
+ 'DW_LLE_base_addressx' : lambda e, cu: BaseAddressEntry(e.entry_offset, e.entry_length, cu.dwarfinfo.get_addr(cu, e.index)),
+ 'DW_LLE_startx_endx' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, cu.dwarfinfo.get_addr(cu, e.start_index), cu.dwarfinfo.get_addr(cu, e.end_index), e.loc_expr, True),
+ 'DW_LLE_startx_length' : _translate_startx_length
+}
+
class LocationListsPair(object):
"""For those binaries that contain both a debug_loc and a debug_loclists section,
it holds a LocationLists object for both and forwards API calls to the right one.
location entry encodings that contain references to other sections.
"""
self.stream.seek(offset, os.SEEK_SET)
- return self._parse_location_list_from_stream_v5(die) if self.version >= 5 else self._parse_location_list_from_stream()
+ return self._parse_location_list_from_stream_v5(die.cu) if self.version >= 5 else self._parse_location_list_from_stream()
def iter_location_lists(self):
""" Iterates through location lists and view pairs. Returns lists of
next_offset = all_offsets[offset_index]
if next_offset == stream.tell(): # At an object, either a loc list or a loc view pair
locview_pairs = self._parse_locview_pairs(locviews)
- entries = self._parse_location_list_from_stream_v5()
+ entries = self._parse_location_list_from_stream_v5(cu_map[stream.tell()])
yield locview_pairs + entries
offset_index += 1
else: # We are at a gap - skip the gap to the next object or to the next CU
is_absolute = False))
return lst
- # Also returns an array with BaseAddressEntry and LocationEntry
- # Can't possibly support indexed values, since parsing those requires
- # knowing the DIE context it came from
- def _parse_location_list_from_stream_v5(self, die = None):
- # This won't contain the terminator entry
- lst = [self._translate_entry_v5(entry, die)
+ def _parse_location_list_from_stream_v5(self, cu=None):
+ """ Returns an array with BaseAddressEntry and LocationEntry.
+ No terminator entries.
+
+ The cu argument is necessary if the section is a
+ DWARFv5 debug_loclists one, and the target loclist
+ contains indirect encodings.
+ """
+ return [entry_translate[entry.entry_type](entry, cu)
for entry
in struct_parse(self.structs.Dwarf_loclists_entries, self.stream)]
- return lst
# From V5 style entries to a LocationEntry/BaseAddressEntry
def _translate_entry_v5(self, entry, die):
return ((dwarf_version < 4 and
attr.form in ('DW_FORM_data1', 'DW_FORM_data2', 'DW_FORM_data4', 'DW_FORM_data8') and
not attr.name == 'DW_AT_const_value') or
- attr.form == 'DW_FORM_sec_offset')
+ attr.form in ('DW_FORM_sec_offset', 'DW_FORM_loclistx'))
@staticmethod
def _attribute_is_loclistptr_class(attr):
RangeEntry = namedtuple('RangeEntry', 'entry_offset entry_length begin_offset end_offset is_absolute')
BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address')
+# If we ever see a list with a base entry at the end, there will be an error that entry_length is not a field.
-def not_implemented(e):
- raise NotImplementedError("Range list entry %s is not supported yet" % (e.entry_type,))
+def _translate_startx_length(e, cu):
+ start_offset = cu.dwarfinfo.get_addr(cu, e.start_index)
+ return RangeEntry(e.entry_offset, e.entry_length, start_offset, start_offset + e.length, True)
# Maps parsed entry types to RangeEntry/BaseAddressEntry objects
entry_translate = {
- 'DW_RLE_base_address' : lambda e: BaseAddressEntry(e.entry_offset, e.address),
- 'DW_RLE_offset_pair' : lambda e: RangeEntry(e.entry_offset, e.entry_length, e.start_offset, e.end_offset, False),
- 'DW_RLE_start_end' : lambda e: RangeEntry(e.entry_offset, e.entry_length, e.start_address, e.end_address, True),
- 'DW_RLE_start_length' : lambda e: RangeEntry(e.entry_offset, e.entry_length, e.start_address, e.start_address + e.length, True),
- 'DW_RLE_base_addressx': not_implemented,
- 'DW_RLE_startx_endx' : not_implemented,
- 'DW_RLE_startx_length': not_implemented
+ 'DW_RLE_base_address' : lambda e, cu: BaseAddressEntry(e.entry_offset, e.address),
+ 'DW_RLE_offset_pair' : lambda e, cu: RangeEntry(e.entry_offset, e.entry_length, e.start_offset, e.end_offset, False),
+ 'DW_RLE_start_end' : lambda e, cu: RangeEntry(e.entry_offset, e.entry_length, e.start_address, e.end_address, True),
+ 'DW_RLE_start_length' : lambda e, cu: RangeEntry(e.entry_offset, e.entry_length, e.start_address, e.start_address + e.length, True),
+ 'DW_RLE_base_addressx': lambda e, cu: BaseAddressEntry(e.entry_offset, cu.dwarfinfo.get_addr(cu, e.index)),
+ 'DW_RLE_startx_endx' : lambda e, cu: RangeEntry(e.entry_offset, e.entry_length, cu.dwarfinfo.get_addr(cu, e.start_index), cu.dwarfinfo.get_addr(cu, e.end_index), True),
+ 'DW_RLE_startx_length': _translate_startx_length
}
class RangeListsPair(object):
def get_range_list_at_offset(self, offset, cu=None):
""" Get a range list at the given offset in the section.
+
+ The cu argument is necessary if the ranges section is a
+ DWARFv5 debug_rnglists one, and the target rangelist
+ contains indirect encodings
"""
self.stream.seek(offset, os.SEEK_SET)
- return self._parse_range_list_from_stream()
+ return self._parse_range_list_from_stream(cu)
def get_range_list_at_offset_ex(self, offset):
"""Get a DWARF v5 range list, addresses and offsets unresolved,
"""
# Calling parse until the stream ends is wrong, because ranges can overlap.
# Need to scan the DIEs to know all range locations
+
+ # This maps list offset to CU
ver5 = self.version >= 5
- all_offsets = list(set(die.attributes['DW_AT_ranges'].value
+ cu_map = {die.attributes['DW_AT_ranges'].value : cu
for cu in self._dwarfinfo.iter_CUs()
for die in cu.iter_DIEs()
- if 'DW_AT_ranges' in die.attributes and (cu.header.version >= 5) == ver5))
+ if 'DW_AT_ranges' in die.attributes and (cu['version'] >= 5) == ver5}
+ all_offsets = list(cu_map.keys())
all_offsets.sort()
for offset in all_offsets:
- yield self.get_range_list_at_offset(offset)
+ yield self.get_range_list_at_offset(offset, cu_map[offset])
def iter_CUs(self):
"""For DWARF5 returns an array of objects, where each one has an array of offsets
#------ PRIVATE ------#
- def _parse_range_list_from_stream(self):
+ def _parse_range_list_from_stream(self, cu):
if self.version >= 5:
- return list(entry_translate[entry.entry_type](entry)
+ return list(entry_translate[entry.entry_type](entry, cu)
for entry
in struct_parse(self.structs.Dwarf_rnglists_entries, self.stream))
else:
DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
+
+ # New forms in DWARFv5
+ DW_FORM_loclistx=self.Dwarf_uleb128(''),
+ DW_FORM_rnglistx=self.Dwarf_uleb128('')
)
def _create_aranges_header(self):
'DW_LLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld),
'DW_LLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld),
})),
- StreamOffset('entry_end_offset')))
+ StreamOffset('entry_end_offset'),
+ Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
self.Dwarf_locview_pair = Struct('locview_pair',
StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
#-------------------------------------------------------------------------------
# scripts/dwarfdump.py
#
-# A clone of 'llvm-dwarfdump-11' in Python, based on the pyelftools library
+# A clone of 'llvm-dwarfdump' in Python, based on the pyelftools library
+# Roughly corresponding to v15
+#
+# Sources under https://github.com/llvm/llvm-project/tree/main/llvm/tools/llvm-dwarfdump
+#
+# Utterly incompatible with 64-bit DWARF or DWARFv2 targeting a 64-bit machine.
+# Also incompatible with machines that have a selector/segment in the address.
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
import elftools.dwarf.ranges
from elftools.dwarf.enums import *
from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp
-from elftools.dwarf.datatype_cpp import describe_cpp_datatype
+from elftools.dwarf.datatype_cpp import DIE_name, describe_cpp_datatype
from elftools.dwarf.descriptions import describe_reg_name
# ------------------------------
len = int(attr.form[12:]) * 2
return "0x%0*x" % (len, attr.value,)
+def _desc_strx(attr, die):
+ return "indexed (%08x) string = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\"))
+
FORM_DESCRIPTIONS = dict(
DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value),),
DW_FORM_strp=lambda attr, die: " .debug_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")),
+ DW_FORM_strx1=_desc_strx,
+ DW_FORM_strx2=_desc_strx,
+ DW_FORM_strx3=_desc_strx,
+ DW_FORM_strx4=_desc_strx,
DW_FORM_line_strp=lambda attr, die: ".debug_line_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")),
DW_FORM_flag_present=lambda attr, die: "true",
DW_FORM_flag=lambda attr, die: "0x%02x" % int(attr.value),
DW_FORM_addr=lambda attr, die: "0x%0*x" % (_addr_str_length(die), attr.value),
+ DW_FORM_addrx=lambda attr, die: "indexed (%08x) address = 0x%0*x" % (attr.raw_value, _addr_str_length(die), attr.value),
DW_FORM_data1=_desc_data,
DW_FORM_data2=_desc_data,
DW_FORM_data4=_desc_data,
return bytes2str(cu.get_top_DIE().attributes['DW_AT_comp_dir'].value)
def _desc_decl_file(attr, die):
+ # Filename/dirname arrays are 0 based in DWARFv5
cu = die.cu
if not hasattr(cu, "_lineprogram"):
cu._lineprogram = die.dwarfinfo.line_program_for_CU(cu)
- val = attr.value
- if cu._lineprogram and val > 0 and val <= len(cu._lineprogram.header.file_entry):
- file_entry = cu._lineprogram.header.file_entry[val-1]
+ ver5 = cu._lineprogram.header.version >= 5
+ file_index = attr.value if ver5 else attr.value-1
+ if cu._lineprogram and file_index >= 0 and file_index < len(cu._lineprogram.header.file_entry):
+ file_entry = cu._lineprogram.header.file_entry[file_index]
+ dir_index = file_entry.dir_index if ver5 else file_entry.dir_index - 1
includes = cu._lineprogram.header.include_directory
- if file_entry.dir_index > 0:
- dir = bytes2str(includes[file_entry.dir_index - 1])
+ if dir_index >= 0:
+ dir = bytes2str(includes[dir_index])
if dir.startswith('.'):
dir = posixpath.join(_cu_comp_dir(cu), dir)
else:
dir = _cu_comp_dir(cu)
- return "\"%s\"" % (posixpath.join(dir, bytes2str(file_entry.name)),)
+ file_name = bytes2str(file_entry.name)
else:
- return '(N/A)'
+ raise DWARFError("Invalid source filename entry index in a decl_file attribute")
+ return "\"%s\"" % (posixpath.join(dir, file_name),)
+
def _desc_ranges(attr, die):
di = die.cu.dwarfinfo
if not hasattr(di, '_rnglists'):
di._rangelists = di.range_lists()
- rangelist = di._rangelists.get_range_list_at_offset(attr.value)
+ rangelist = di._rangelists.get_range_list_at_offset(attr.value, die.cu)
base_ip = _get_cu_base(die.cu)
lines = []
addr_str_len = die.cu.header.address_size*2
base_ip = entry.base_address
else:
raise NotImplementedError("Unknown object in a range list")
- return ("0x%08x\n" % attr.value) + "\n".join(lines)
+ prefix = "indexed (0x%x) rangelist = " % attr.raw_value if attr.form == 'DW_FORM_rnglistx' else ''
+ return ("%s0x%08x\n" % (prefix, attr.value)) + "\n".join(lines)
def _desc_locations(attr, die):
cu = die.cu
base_ip = entry.base_address
else:
raise NotImplementedError("Unknown object in a location list")
- return ("0x%08x:\n" % attr.value) + "\n".join(lines)
+ prefix = "indexed (0x%x) loclist = " % attr.raw_value if attr.form == 'DW_FORM_loclistx' else ''
+ return ("%s0x%08x:\n" % (prefix, attr.value)) + "\n".join(lines)
# By default, numeric arguments are spelled in hex with a leading 0x
def _desc_operationarg(s, cu):
op_name,
_desc_reg(op - 0x70, cu),
args[0])
- elif op_name in ('DW_OP_fbreg', 'DW_OP_bra', 'DW_OP_skip'): # Argument is decimal with a leading sign
+ elif op_name in ('DW_OP_fbreg', 'DW_OP_bra', 'DW_OP_skip', 'DW_OP_consts', ): # Argument is decimal with a leading sign
return op_name + ' ' + "%+d" % (args[0])
elif op_name in ('DW_OP_const1s', 'DW_OP_const2s'): # Argument is decimal without a leading sign
return op_name + ' ' + "%d" % (args[0])
DW_AT_encoding=lambda attr, die: _desc_enum(attr, ENUM_DW_ATE),
DW_AT_accessibility=lambda attr, die: _desc_enum(attr, ENUM_DW_ACCESS),
DW_AT_inline=lambda attr, die: _desc_enum(attr, ENUM_DW_INL),
+ DW_AT_calling_convention=lambda attr, die: _desc_enum(attr, ENUM_DW_CC),
DW_AT_decl_file=_desc_decl_file,
DW_AT_decl_line=_desc_value,
DW_AT_ranges=_desc_ranges,