--- /dev/null
+#-------------------------------------------------------------------------------
+# elftools: dwarf/datatype_cpp.py
+#
+# First draft at restoring the source level name a C/C++ datatype
+# from DWARF data. Aiming at compatibility with llvm-dwarfdump v15.
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+from ..common.py3compat import bytes2str
+
+cpp_symbols = dict(
+ pointer = "*",
+ reference = "&",
+ const = "const")
+
+def describe_cpp_datatype(var_die):
+ return str(parse_cpp_datatype(var_die))
+
+def parse_cpp_datatype(var_die):
+ """Given a DIE that describes a variable, a parameter, or a member
+ with DW_AT_type in it, tries to return the C++ datatype as a string
+
+ Returns a TypeDesc.
+
+ Does not follow typedefs, doesn't resolve array element types
+ or struct members. Not good for a debugger.
+ """
+ t = TypeDesc()
+
+ if not 'DW_AT_type' in var_die.attributes:
+ t.tag = ''
+ return t
+
+ type_die = var_die.get_DIE_from_attribute('DW_AT_type')
+
+ mods = []
+ # Unlike readelf, dwarfdump doesn't chase typedefs
+ while type_die.tag in ('DW_TAG_const_type', 'DW_TAG_pointer_type', 'DW_TAG_reference_type'):
+ modifier = _strip_type_tag(type_die) # const/reference/pointer
+ mods.insert(0, modifier)
+ if not 'DW_AT_type' in type_die.attributes: # void* is encoded as a pointer to nothing
+ t.name = t.tag = "void"
+ t.modifiers = tuple(mods)
+ return t
+ type_die = type_die.get_DIE_from_attribute('DW_AT_type')
+
+ # From this point on, type_die doesn't change
+ t.tag = _strip_type_tag(type_die)
+ t.modifiers = tuple(mods)
+
+ if t.tag in ('ptr_to_member', 'subroutine'):
+ if t.tag == 'ptr_to_member':
+ ptr_prefix = DIE_name(type_die.get_DIE_from_attribute('DW_AT_containing_type')) + "::"
+ type_die = type_die.get_DIE_from_attribute('DW_AT_type')
+ elif "DW_AT_object_pointer" in type_die.attributes: # Older compiler... Subroutine, but with an object pointer
+ ptr_prefix = DIE_name(DIE_type(DIE_type(type_die.get_DIE_from_attribute('DW_AT_object_pointer')))) + "::"
+ else: # Not a pointer to member
+ ptr_prefix = ''
+
+ if t.tag == 'subroutine':
+ params = tuple(format_function_param(p, p) for p in type_die.iter_children() if p.tag in ("DW_TAG_formal_parameter", "DW_TAG_unspecified_parameters") and 'DW_AT_artificial' not in p.attributes)
+ params = ", ".join(params)
+ if 'DW_AT_type' in type_die.attributes:
+ retval_type = parse_cpp_datatype(type_die)
+ is_pointer = retval_type.modifiers and retval_type.modifiers[-1] == 'pointer'
+ retval_type = str(retval_type)
+ if not is_pointer:
+ retval_type += " "
+ else:
+ retval_type = "void "
+
+ if len(mods) and mods[-1] == 'pointer':
+ mods.pop()
+ t.modifiers = tuple(mods)
+ t.name = "%s(%s*)(%s)" % (retval_type, ptr_prefix, params)
+ else:
+ t.name = "%s(%s)" % (retval_type, params)
+ return t
+ elif DIE_is_ptr_to_member_struct(type_die):
+ dt = parse_cpp_datatype(next(type_die.iter_children())) # The first element is pfn, a function pointer with a this
+ dt.modifiers = tuple(dt.modifiers[:-1]) # Pop the extra pointer
+ dt.tag = "ptr_to_member_type" # Not a function pointer per se
+ return dt
+ elif t.tag == 'array':
+ t.dimensions = (sub.attributes['DW_AT_upper_bound'].value + 1 if 'DW_AT_upper_bound' in sub.attributes else -1
+ for sub
+ in type_die.iter_children()
+ if sub.tag == 'DW_TAG_subrange_type')
+ t.name = describe_cpp_datatype(type_die)
+ return t
+
+ # Now the nonfunction types
+ # Blank name is sometimes legal (unnamed unions, etc)
+
+ t.name = safe_DIE_name(type_die, t.tag + " ")
+
+ # Check the nesting - important for parameters
+ parent = type_die.get_parent()
+ scopes = list()
+ while parent.tag in ('DW_TAG_class_type', 'DW_TAG_structure_type', 'DW_TAG_union_type', 'DW_TAG_namespace'):
+ scopes.insert(0, safe_DIE_name(parent, _strip_type_tag(parent) + " "))
+ # If unnamed scope, fall back to scope type - like "structure "
+ parent = parent.get_parent()
+ t.scopes = tuple(scopes)
+
+ return t
+
+#--------------------------------------------------
+
+class TypeDesc(object):
+ """ Encapsulates a description of a datatype, as parsed from DWARF DIEs.
+ Not enough to display the variable in the debugger, but enough
+ to produce a type description string similar to those of llvm-dwarfdump.
+
+ name - name for primitive datatypes, element name for arrays, the
+ whole name for functions and function pouinters
+
+ modifiers - a collection of "const"/"pointer"/"reference", from the
+ chain of DIEs preceeding the real type DIE
+
+ scopes - a collection of struct/class/namespace names, parents of the
+ real type DIE
+
+ tag - the tag of the real type DIE, stripped of initial DW_TAG_ and
+ final _type
+
+ dimensions - the collection of array dimensions, if the type is an
+ array. -1 means an array of unknown dimension.
+
+ """
+ def __init__(self):
+ self.name = None
+ self.modifiers = () # Reads left to right
+ self.scopes = () # Reads left to right
+ self.tag = None
+ self.dimensions = None
+
+ def __str__(self):
+ # Some reference points from dwarfdump:
+ # const->pointer->const->char = const char *const
+ # const->reference->const->int = const const int &
+ # const->reference->int = const int &
+ name = str(self.name)
+ mods = self.modifiers
+
+ parts = []
+ # Initial const applies to the var ifself, other consts apply to the pointee
+ if len(mods) and mods[0] == 'const':
+ parts.append("const")
+ mods = mods[1:]
+
+ # ref->const in the end, const goes in front
+ if mods[-2:] == ("reference", "const"):
+ parts.append("const")
+ mods = mods[0:-1]
+
+ if self.scopes:
+ name = '::'.join(self.scopes)+'::' + name
+ parts.append(name)
+
+ if len(mods):
+ parts.append("".join(cpp_symbols[mod] for mod in mods))
+
+ if self.dimensions:
+ dims = "".join('[%s]' % (str(dim) if dim > 0 else '',)
+ for dim in self.dimensions)
+ else:
+ dims = ''
+
+ return " ".join(parts)+dims
+
+def DIE_name(die):
+ return bytes2str(die.attributes['DW_AT_name'].value)
+
+def safe_DIE_name(die, default = ''):
+ return bytes2str(die.attributes['DW_AT_name'].value) if 'DW_AT_name' in die.attributes else default
+
+def DIE_type(die):
+ return die.get_DIE_from_attribute("DW_AT_type")
+
+class ClassDesc(object):
+ def __init__(self):
+ self.scopes = ()
+ self.const_member = False
+
+def get_class_spec_if_member(func_spec, the_func):
+ if 'DW_AT_object_pointer' in the_func.attributes:
+ this_param = the_func.get_DIE_from_attribute('DW_AT_object_pointer')
+ this_type = parse_cpp_datatype(this_param)
+ class_spec = ClassDesc()
+ class_spec.scopes = this_type.scopes + (this_type.name,)
+ class_spec.const_member = any(("const", "pointer") == this_type.modifiers[i:i+2]
+ for i in range(len(this_type.modifiers))) # const -> pointer -> const for this arg of const
+ return class_spec
+
+ # Check the parent element chain - could be a class
+ parent = func_spec.get_parent()
+
+ scopes = []
+ while parent.tag in ("DW_TAG_class_type", "DW_TAG_structure_type", "DW_TAG_namespace"):
+ scopes.insert(0, DIE_name(parent))
+ parent = parent.get_parent()
+ if scopes:
+ cs = ClassDesc()
+ cs.scopes = tuple(scopes)
+ return cs
+
+ return None
+
+def format_function_param(param_spec, param):
+ if param_spec.tag == 'DW_TAG_formal_parameter':
+ if 'DW_AT_name' in param.attributes:
+ name = DIE_name(param)
+ elif 'DW_AT_name' in param_spec.attributes:
+ name = DIE_name(param_spec)
+ else:
+ name = None
+ type = parse_cpp_datatype(param_spec)
+ return str(type)
+ else: # unspecified_parameters AKA variadic
+ return "..."
+
+def DIE_is_ptr_to_member_struct(type_die):
+ if type_die.tag == 'DW_TAG_structure_type':
+ members = tuple(die for die in type_die.iter_children() if die.tag == "DW_TAG_member")
+ return len(members) == 2 and safe_DIE_name(members[0]) == "__pfn" and safe_DIE_name(members[1]) == "__delta"
+ return False
+
+def _strip_type_tag(die):
+ """Given a DIE with DW_TAG_foo_type, returns foo"""
+ return die.tag[7:-5]
--- /dev/null
+#-------------------------------------------------------------------------------
+# elftools: dwarf/dwarf_utils.py
+#
+# Minor, shared DWARF helpers
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+
+import os
+from ..construct.macros import UBInt32, UBInt64, ULInt32, ULInt64, Array
+from ..common.exceptions import DWARFError
+from ..common.utils import struct_parse
+
+def _get_base_offset(cu, base_attribute_name):
+ """Retrieves a required, base offset-type atribute
+ from the top DIE in the CU. Applies to several indirectly
+ encoded objects - range lists, location lists, strings, addresses.
+ """
+ cu_top_die = cu.get_top_DIE()
+ if not base_attribute_name in cu_top_die.attributes:
+ raise DWARFError("The CU at offset 0x%x needs %s" % (cu.cu_offset, base_attribute_name))
+ return cu_top_die.attributes[base_attribute_name].value
+
+def _resolve_via_offset_table(stream, cu, index, base_attribute_name):
+ """Given an index in the offset table and directions where to find it,
+ retrieves an offset. Works for loclists, rnglists.
+
+ The DWARF offset bitness of the CU block in the section matches that
+ of the CU record in dwarf_info. See DWARFv5 standard, section 7.4.
+
+ This is used for translating DW_FORM_loclistx, DW_FORM_rnglistx
+ via the offset table in the respective section.
+ """
+ base_offset = _get_base_offset(cu, base_attribute_name)
+ # That's offset (within the rnglists/loclists/str_offsets section) of
+ # the offset table for this CU's block in that section, which in turn is indexed by the index.
+
+ offset_size = 4 if cu.structs.dwarf_format == 32 else 8
+ return base_offset + struct_parse(cu.structs.Dwarf_offset(''), stream, base_offset + index*offset_size, True)
+
+def _iter_CUs_in_section(stream, structs, parser):
+ """Iterates through the list of CU sections in loclists or rangelists. Almost identical structures there.
+
+ get_parser is a lambda that takes structs, returns the parser
+ """
+ stream.seek(0, os.SEEK_END)
+ endpos = stream.tell()
+ stream.seek(0, os.SEEK_SET)
+
+ offset = 0
+ while offset < endpos:
+ header = struct_parse(parser, stream, offset)
+ if header.offset_count > 0:
+ offset_parser = structs.Dwarf_uint64 if header.is64 else structs.Dwarf_uint32
+ header['offsets'] = struct_parse(Array(header.offset_count, offset_parser('')), stream)
+ else:
+ header['offsets'] = False
+ yield header
+ offset = header.offset_after_length + header.unit_length
+
DW_RLE_start_end = 0x06,
DW_RLE_start_length = 0x07
)
+
+ENUM_DW_LANG = dict(
+ DW_LANG_C89 = 0x0001,
+ DW_LANG_C = 0x0002,
+ DW_LANG_Ada83 = 0x0003,
+ DW_LANG_C_plus_plus = 0x0004,
+ DW_LANG_Cobol74 = 0x0005,
+ DW_LANG_Cobol85 = 0x0006,
+ DW_LANG_Fortran77 = 0x0007,
+ DW_LANG_Fortran90 = 0x0008,
+ DW_LANG_Pascal83 = 0x0009,
+ DW_LANG_Modula2 = 0x000a,
+ DW_LANG_Java = 0x000b,
+ DW_LANG_C99 = 0x000c,
+ DW_LANG_Ada95 = 0x000d,
+ DW_LANG_Fortran95 = 0x000e,
+ DW_LANG_PLI = 0x000f,
+ DW_LANG_ObjC = 0x0010,
+ DW_LANG_ObjC_plus_plus = 0x0011,
+ DW_LANG_UPC = 0x0012,
+ DW_LANG_D = 0x0013,
+ DW_LANG_Python = 0x0014,
+ DW_LANG_OpenCL = 0x0015,
+ DW_LANG_Go = 0x0016,
+ DW_LANG_Modula3 = 0x0017,
+ DW_LANG_Haskell = 0x0018,
+ DW_LANG_C_plus_plus_03 = 0x0019,
+ DW_LANG_C_plus_plus_11 = 0x001a,
+ DW_LANG_OCaml = 0x001b,
+ DW_LANG_Rust = 0x001c,
+ DW_LANG_C11 = 0x001d,
+ DW_LANG_Swift = 0x001e,
+ DW_LANG_Julia = 0x001f,
+ DW_LANG_Dylan = 0x0020,
+ DW_LANG_C_plus_plus_14 = 0x0021,
+ DW_LANG_Fortran03 = 0x0022,
+ DW_LANG_Fortran08 = 0x0023,
+ DW_LANG_RenderScript = 0x0024,
+ DW_LANG_BLISS = 0x0025,
+ DW_LANG_lo_user = 0x8000,
+ DW_LANG_hi_user = 0xffff
+)
+
+ENUM_DW_ATE = dict(
+ DW_ATE_address = 0x01,
+ DW_ATE_boolean = 0x02,
+ DW_ATE_complex_float = 0x03,
+ DW_ATE_float = 0x04,
+ DW_ATE_signed = 0x05,
+ DW_ATE_signed_char = 0x06,
+ DW_ATE_unsigned = 0x07,
+ DW_ATE_unsigned_char = 0x08,
+ DW_ATE_imaginary_float = 0x09,
+ DW_ATE_packed_decimal = 0x0a,
+ DW_ATE_numeric_string = 0x0b,
+ DW_ATE_edited = 0x0c,
+ DW_ATE_signed_fixed = 0x0d,
+ DW_ATE_unsigned_fixed = 0x0e,
+ DW_ATE_decimal_float = 0x0f,
+ DW_ATE_UTF = 0x10,
+ DW_ATE_UCS = 0x11,
+ DW_ATE_ASCII = 0x12,
+ DW_ATE_lo_user = 0x80,
+ DW_ATE_hi_user = 0xff
+)
+
+ENUM_DW_ACCESS = dict(
+ DW_ACCESS_public = 0x01,
+ DW_ACCESS_protected = 0x02,
+ DW_ACCESS_private = 0x03
+)
+
+ENUM_DW_INL = dict(
+ DW_INL_not_inlined = 0x00,
+ DW_INL_inlined = 0x01,
+ DW_INL_declared_not_inlined = 0x02,
+ DW_INL_declared_inlined = 0x03
+)
offset_index = 0
while stream.tell() < endpos:
# We are at the start of the CU block in the loclists now
- unit_length = struct_parse(self.structs.Dwarf_initial_length(''), stream)
- offset_past_len = stream.tell()
cu_header = struct_parse(self.structs.Dwarf_loclists_CU_header, stream)
assert(cu_header.version == 5)
# We don't have a binary for the former yet. On an off chance that we one day might,
# let's parse the header anyway.
- cu_end_offset = offset_past_len + unit_length
+ cu_end_offset = cu_header.offset_after_length + cu_header.unit_length
# Unit_length includes the header but doesn't include the length
while stream.tell() < cu_end_offset:
@staticmethod
def _attribute_has_loc_list(attr, dwarf_version):
return ((dwarf_version < 4 and
- attr.form in ('DW_FORM_data4', 'DW_FORM_data8') and
+ attr.form in ('DW_FORM_data1', 'DW_FORM_data2', 'DW_FORM_data4', 'DW_FORM_data8') and
not attr.name == 'DW_AT_const_value') or
attr.form == 'DW_FORM_sec_offset')
'DW_AT_frame_base', 'DW_AT_segment',
'DW_AT_static_link', 'DW_AT_use_location',
'DW_AT_vtable_elem_location',
+ 'DW_AT_call_value',
'DW_AT_GNU_call_site_value',
'DW_AT_GNU_call_site_target',
'DW_AT_GNU_call_site_data_value'))
from collections import namedtuple
from ..common.utils import struct_parse
+from ..common.exceptions import DWARFError
+from .dwarf_util import _iter_CUs_in_section
RangeEntry = namedtuple('RangeEntry', 'entry_offset entry_length begin_offset end_offset is_absolute')
self.stream.seek(offset, os.SEEK_SET)
return self._parse_range_list_from_stream()
+ def get_range_list_at_offset_ex(self, offset):
+ """Get a DWARF v5 range list, addresses and offsets unresolved,
+ at the given offset in the section
+ """
+ return struct_parse(self.structs.Dwarf_rnglists_entries, self.stream, offset)
+
def iter_range_lists(self):
""" Yield all range lists found in the section.
"""
for offset in all_offsets:
yield self.get_range_list_at_offset(offset)
+ def iter_CUs(self):
+ """For DWARF5 returns an array of objects, where each one has an array of offsets
+ """
+ if self.version < 5:
+ raise DWARFError("CU iteration in rnglists is not supported with DWARF<5")
+
+ structs = next(self._dwarfinfo.iter_CUs()).structs # Just pick one
+ return _iter_CUs_in_section(self.stream, structs, structs.Dwarf_rnglists_CU_header)
+
+ def iter_CU_range_lists_ex(self, cu):
+ """For DWARF5, returns untranslated rangelists in the CU, where CU comes from iter_CUs above
+ """
+ stream = self.stream
+ stream.seek(cu.offset_table_offset + (64 if cu.is64 else 32) * cu.offset_count)
+ while stream.tell() < cu.offset_after_length + cu.unit_length:
+ yield struct_parse(self.structs.Dwarf_rnglists_entries, stream);
+
+
#------ PRIVATE ------#
def _parse_range_list_from_stream(self):
""" Create a struct for debug_loclists CU header, DWARFv5, 7,29
"""
self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
- # Unit_length parsed separately
+ StreamOffset('cu_offset'),
+ self.Dwarf_initial_length('unit_length'),
+ Value('is64', lambda ctx: ctx.is64),
+ StreamOffset('offset_after_length'),
self.Dwarf_uint16('version'),
self.Dwarf_uint8('address_size'),
self.Dwarf_uint8('segment_selector_size'),
- PrefixedArray(
- self.Dwarf_offset('offsets'),
- self.Dwarf_uint32('')))
+ self.Dwarf_uint32('offset_count'),
+ StreamOffset('offset_table_offset'))
cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
def _create_rnglists_parsers(self):
+ self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header',
+ StreamOffset('cu_offset'),
+ self.Dwarf_initial_length('unit_length'),
+ Value('is64', lambda ctx: ctx.is64),
+ StreamOffset('offset_after_length'),
+ self.Dwarf_uint16('version'),
+ self.Dwarf_uint8('address_size'),
+ self.Dwarf_uint8('segment_selector_size'),
+ self.Dwarf_uint32('offset_count'),
+ StreamOffset('offset_table_offset'))
+
self.Dwarf_rnglists_entries = RepeatUntilExcluding(
lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list',
Struct('entry',
"""
def _decode(self, obj, context):
if obj.first < 0xFFFFFF00:
+ context['is64'] = False
return obj.first
else:
if obj.first == 0xFFFFFFFF:
+ context['is64'] = True
return obj.second
else:
raise ConstructError("Failed decoding initial length for %X" % (
--- /dev/null
+#!/usr/bin/env python
+#-------------------------------------------------------------------------------
+# scripts/dwarfdump.py
+#
+# A clone of 'llvm-dwarfdump-11' in Python, based on the pyelftools library
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import argparse
+import os, sys, posixpath
+import traceback
+
+# For running from development directory. It should take precedence over the
+# installed pyelftools.
+sys.path.insert(0, '.')
+
+from elftools import __version__
+from elftools.common.exceptions import DWARFError, ELFError
+from elftools.common.py3compat import bytes2str
+from elftools.elf.elffile import ELFFile
+from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationExpr, LocationViewPair, BaseAddressEntry as LocBaseAddressEntry
+from elftools.dwarf.ranges import RangeEntry # ranges.BaseAddressEntry collides with the one above
+import elftools.dwarf.ranges
+from elftools.dwarf.enums import *
+from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp
+from elftools.dwarf.datatype_cpp import describe_cpp_datatype
+from elftools.dwarf.descriptions import describe_reg_name
+
+# ------------------------------
+# ------------------------------
+
+def _get_cu_base(cu):
+ top_die = cu.get_top_DIE()
+ attr = top_die.attributes
+ if 'DW_AT_low_pc' in attr:
+ return attr['DW_AT_low_pc'].value
+ elif 'DW_AT_entry_pc' in attr:
+ return attr['DW_AT_entry_pc'].value
+ else:
+ raise ValueError("Can't find the base IP (low_pc) for a CU")
+
+def _addr_str_length(die):
+ return die.cu.header.address_size*2
+
+def _DIE_name(die):
+ if 'DW_AT_name' in die.attributes:
+ return bytes2str(die.attributes['DW_AT_name'].value)
+ elif 'DW_AT_linkage_name' in die.attributes:
+ return bytes2str(die.attributes['DW_AT_linkage_name'].value)
+ else:
+ raise DWARFError()
+
+def _DIE_linkage_name(die):
+ if 'DW_AT_linkage_name' in die.attributes:
+ return bytes2str(die.attributes['DW_AT_linkage_name'].value)
+ elif 'DW_AT_name' in die.attributes:
+ return bytes2str(die.attributes['DW_AT_name'].value)
+ else:
+ raise DWARFError()
+
+def _safe_DIE_name(die, default=None):
+ if 'DW_AT_name' in die.attributes:
+ return bytes2str(die.attributes['DW_AT_name'].value)
+ elif 'DW_AT_linkage_name' in die.attributes:
+ return bytes2str(die.attributes['DW_AT_linkage_name'].value)
+ else:
+ return default
+
+def _safe_DIE_linkage_name(die, default=None):
+ if 'DW_AT_linkage_name' in die.attributes:
+ return bytes2str(die.attributes['DW_AT_linkage_name'].value)
+ elif 'DW_AT_name' in die.attributes:
+ return bytes2str(die.attributes['DW_AT_name'].value)
+ else:
+ return default
+
+def _desc_ref(attr, die, extra=''):
+ if extra:
+ extra = " \"%s\"" % extra
+ return "cu + 0x%04x => {0x%08x}%s" % (
+ attr.raw_value,
+ die.cu.cu_offset + attr.raw_value,
+ extra)
+
+def _desc_data(attr, die):
+ """ Hex with length driven by form
+ """
+ len = int(attr.form[12:]) * 2
+ return "0x%0*x" % (len, attr.value,)
+
+FORM_DESCRIPTIONS = dict(
+ DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value),),
+ DW_FORM_strp=lambda attr, die: " .debug_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")),
+ DW_FORM_line_strp=lambda attr, die: ".debug_line_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")),
+ DW_FORM_flag_present=lambda attr, die: "true",
+ DW_FORM_flag=lambda attr, die: "0x%02x" % int(attr.value),
+ DW_FORM_addr=lambda attr, die: "0x%0*x" % (_addr_str_length(die), attr.value),
+ DW_FORM_data1=_desc_data,
+ DW_FORM_data2=_desc_data,
+ DW_FORM_data4=_desc_data,
+ DW_FORM_data8=_desc_data,
+ DW_FORM_block1=lambda attr, die: "<0x%02x> %s " % (len(attr.value), " ".join("%02x" %b for b in attr.value)),
+ DW_FORM_block2=lambda attr, die: "<0x%04x> %s " % (len(attr.value), " ".join("%02x" %b for b in attr.value)),
+ DW_FORM_block4=lambda attr, die: "<0x%08x> %s " % (len(attr.value), " ".join("%02x" %b for b in attr.value)),
+ DW_FORM_ref=_desc_ref,
+ DW_FORM_ref1=_desc_ref, DW_FORM_ref2=_desc_ref,
+ DW_FORM_ref4=_desc_ref, DW_FORM_ref8=_desc_ref,
+ DW_FORM_sec_offset=lambda attr,die: "0x%08x" % (attr.value,),
+ DW_FORM_exprloc=lambda attr, die: _desc_expression(attr.value, die)
+)
+
+def _desc_enum(attr, enum):
+ """For attributes like DW_AT_language, physically
+ int, logically an enum
+ """
+ return next((k for (k, v) in enum.items() if v == attr.value), str(attr.value))
+
+def _cu_comp_dir(cu):
+ return bytes2str(cu.get_top_DIE().attributes['DW_AT_comp_dir'].value)
+
+def _desc_decl_file(attr, die):
+ cu = die.cu
+ if not hasattr(cu, "_lineprogram"):
+ cu._lineprogram = die.dwarfinfo.line_program_for_CU(cu)
+ val = attr.value
+ if cu._lineprogram and val > 0 and val <= len(cu._lineprogram.header.file_entry):
+ file_entry = cu._lineprogram.header.file_entry[val-1]
+ includes = cu._lineprogram.header.include_directory
+ if file_entry.dir_index > 0:
+ dir = bytes2str(includes[file_entry.dir_index - 1])
+ if dir.startswith('.'):
+ dir = posixpath.join(_cu_comp_dir(cu), dir)
+ else:
+ dir = _cu_comp_dir(cu)
+ return "\"%s\"" % (posixpath.join(dir, bytes2str(file_entry.name)),)
+ else:
+ return '(N/A)'
+
+def _desc_ranges(attr, die):
+ di = die.cu.dwarfinfo
+ if not hasattr(di, '_rnglists'):
+ di._rangelists = di.range_lists()
+ rangelist = di._rangelists.get_range_list_at_offset(attr.value)
+ base_ip = _get_cu_base(die.cu)
+ lines = []
+ addr_str_len = die.cu.header.address_size*2
+ for entry in rangelist:
+ if isinstance(entry, RangeEntry):
+ lines.append(" [0x%0*x, 0x%0*x)" % (
+ addr_str_len,
+ (0 if entry.is_absolute else base_ip) + entry.begin_offset,
+ addr_str_len,
+ (0 if entry.is_absolute else base_ip) + entry.end_offset))
+ elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry):
+ base_ip = entry.base_address
+ else:
+ raise NotImplementedError("Unknown object in a range list")
+ return ("0x%08x\n" % attr.value) + "\n".join(lines)
+
+def _desc_locations(attr, die):
+ cu = die.cu
+ di = cu.dwarfinfo
+ if not hasattr(di, '_loclists'):
+ di._loclists = di.location_lists()
+ if not hasattr(di, '_locparser'):
+ di._locparser = LocationParser(di._loclists)
+ loclist = di._locparser.parse_from_attribute(attr, cu.header.version, die)
+ if isinstance(loclist, LocationExpr):
+ return _desc_expression(loclist.loc_expr, die)
+ else:
+ base_ip = _get_cu_base(cu)
+ lines = []
+ addr_str_len = die.cu.header.address_size*2
+ for entry in loclist:
+ if isinstance(entry, LocationEntry):
+ lines.append(" [0x%0*x, 0x%0*x): %s" % (
+ addr_str_len,
+ (0 if entry.is_absolute else base_ip) + entry.begin_offset,
+ addr_str_len,
+ (0 if entry.is_absolute else base_ip) + entry.end_offset,
+ _desc_expression(entry.loc_expr, die)))
+ elif isinstance(entry, LocBaseAddressEntry):
+ base_ip = entry.base_address
+ else:
+ raise NotImplementedError("Unknown object in a location list")
+ return ("0x%08x:\n" % attr.value) + "\n".join(lines)
+
+# By default, numeric arguments are spelled in hex with a leading 0x
+def _desc_operationarg(s, cu):
+ if isinstance(s, str):
+ return s
+ elif isinstance(s, int):
+ return hex(s)
+ elif isinstance(s, list): # Could be a blob (list of ints), could be a subexpression
+ if len(s) > 0 and isinstance(s[0], DWARFExprOp): # Subexpression
+ return '(' + '; '.join(_desc_operation(op.op, op.op_name, op.args, cu) for op in s) + ')'
+ else:
+ return " ".join((hex(len(s)),) + tuple("0x%02x" % b for b in s))
+
+def _arch(cu):
+ return cu.dwarfinfo.config.machine_arch
+
+def _desc_reg(reg_no, cu):
+ return describe_reg_name(reg_no, _arch(cu), True).upper()
+
+def _desc_operation(op, op_name, args, cu):
+ # Not sure about regx(regno) and bregx(regno, offset)
+ if 0x50 <= op <= 0x6f: # reg0...reg31 - decode reg name
+ return op_name + " " + _desc_reg(op - 0x50, cu)
+ elif 0x70 <= op <= 0x8f: # breg0...breg31(offset) - also decode reg name
+ return '%s %s%+d' % (
+ op_name,
+ _desc_reg(op - 0x70, cu),
+ args[0])
+ elif op_name in ('DW_OP_fbreg', 'DW_OP_bra', 'DW_OP_skip'): # Argument is decimal with a leading sign
+ return op_name + ' ' + "%+d" % (args[0])
+ elif op_name in ('DW_OP_const1s', 'DW_OP_const2s'): # Argument is decimal without a leading sign
+ return op_name + ' ' + "%d" % (args[0])
+ elif op_name in ('DW_OP_entry_value', 'DW_OP_GNU_entry_value'): # No space between opcode and args
+ return op_name + _desc_operationarg(args[0], cu)
+ elif op_name == 'DW_OP_regval_type': # Arg is a DIE pointer
+ return "%s %s (0x%08x -> 0x%08x) \"%s\"" % (
+ op_name,
+ _desc_reg(args[0], cu),
+ args[1],
+ args[1] + cu.cu_offset,
+ _DIE_name(cu._get_cached_DIE(args[1] + cu.cu_offset)))
+ elif op_name == 'DW_OP_convert': # Arg is a DIE pointer
+ return "%s (0x%08x -> 0x%08x) \"%s\"" % (
+ op_name,
+ args[0],
+ args[0] + cu.cu_offset,
+ _DIE_name(cu._get_cached_DIE(args[0] + cu.cu_offset)))
+ elif args:
+ return op_name + ' ' + ', '.join(_desc_operationarg(s, cu) for s in args)
+ else:
+ return op_name
+
+# TODO: remove this once dwarfdump catches up
+UNSUPPORTED_OPS = (
+ 'DW_OP_implicit_pointer',
+ 'DW_OP_deref_type',
+ 'DW_OP_GNU_parameter_ref',
+ 'DW_OP_GNU_deref_type',
+ 'DW_OP_GNU_implicit_pointer',
+ 'DW_OP_GNU_convert',
+ 'DW_OP_GNU_regval_type')
+
+def _desc_expression(expr, die):
+ cu = die.cu
+ if not hasattr(cu, '_exprparser'):
+ cu._exprparser = DWARFExprParser(cu.structs)
+
+ parsed = cu._exprparser.parse_expr(expr)
+ # TODO: remove this once dwarfdump catches up
+ first_unsupported = next((i for (i, op) in enumerate(parsed) if op.op_name in UNSUPPORTED_OPS), None)
+ if first_unsupported is None:
+ lines = [_desc_operation(op.op, op.op_name, op.args, cu) for op in parsed]
+ else:
+ lines = [_desc_operation(op.op, op.op_name, op.args, cu) for op in parsed[0:first_unsupported]]
+ start_of_unparsed = parsed[first_unsupported].offset
+ lines.append("<decoding error> " + " ".join("%02x" % b for b in expr[start_of_unparsed:]))
+ return ", ".join(lines)
+
+def _desc_datatype(attr, die):
+ """Oy vey
+ """
+ return _desc_ref(attr, die, describe_cpp_datatype(die))
+
+def _get_origin_name(die):
+ func_die = die.get_DIE_from_attribute('DW_AT_abstract_origin')
+ name = _safe_DIE_linkage_name(func_die, '')
+ if not name:
+ if 'DW_AT_specification' in func_die.attributes:
+ name = _DIE_linkage_name(func_die.get_DIE_from_attribute('DW_AT_specification'))
+ elif 'DW_AT_abstract_origin' in func_die.attributes:
+ return _get_origin_name(func_die)
+ return name
+
+def _desc_origin(attr, die):
+ return _desc_ref(attr, die, _get_origin_name(die))
+
+def _desc_spec(attr, die):
+ return _desc_ref(attr, die,
+ _DIE_linkage_name(die.get_DIE_from_attribute('DW_AT_specification')))
+
+def _desc_value(attr, die):
+ return str(attr.value)
+
+ATTR_DESCRIPTIONS = dict(
+ DW_AT_language=lambda attr, die: _desc_enum(attr, ENUM_DW_LANG),
+ DW_AT_encoding=lambda attr, die: _desc_enum(attr, ENUM_DW_ATE),
+ DW_AT_accessibility=lambda attr, die: _desc_enum(attr, ENUM_DW_ACCESS),
+ DW_AT_inline=lambda attr, die: _desc_enum(attr, ENUM_DW_INL),
+ DW_AT_decl_file=_desc_decl_file,
+ DW_AT_decl_line=_desc_value,
+ DW_AT_ranges=_desc_ranges,
+ DW_AT_location=_desc_locations,
+ DW_AT_data_member_location=lambda attr, die: _desc_data(attr, die) if attr.form.startswith('DW_FORM_data') else _desc_locations(attr, die),
+ DW_AT_frame_base=_desc_locations,
+ DW_AT_type=_desc_datatype,
+ DW_AT_call_line=_desc_value,
+ DW_AT_call_file=_desc_decl_file,
+ DW_AT_abstract_origin=_desc_origin,
+ DW_AT_specification=_desc_spec
+)
+
+class ReadElf(object):
+ """ dump_xxx is used to dump the respective section.
+ Mimics the output of dwarfdump with --verbose
+ """
+ def __init__(self, filename, file, output):
+ """ file:
+ stream object with the ELF file to read
+
+ output:
+ output stream to write to
+ """
+ self.elffile = ELFFile(file)
+ self.output = output
+ self._dwarfinfo = self.elffile.get_dwarf_info()
+ arches = {"EM_386": "i386", "EM_X86_64": "x86-64"}
+ arch = arches[self.elffile['e_machine']]
+ bits = self.elffile.elfclass
+ self._emitline("%s: file format elf%d-%s" % (filename, bits, arch))
+
+ def _emit(self, s=''):
+ """ Emit an object to output
+ """
+ self.output.write(str(s))
+
+ def _emitline(self, s=''):
+ """ Emit an object to output, followed by a newline
+ """
+ self.output.write(str(s).rstrip() + '\n')
+
+ def dump_info(self):
+ # TODO: DWARF64 will cause discrepancies in hex offset sizes
+ self._emitline(".debug_info contents:")
+ for cu in self._dwarfinfo.iter_CUs():
+ if cu.header.version >= 5:
+ ut = next(k for (k,v) in ENUM_DW_UT.items() if v == cu.header.unit_type)
+ unit_type_str = " unit_type = %s," % ut
+ else:
+ unit_type_str = ''
+
+ self._emitline("0x%08x: Compile Unit: length = 0x%08x, format = DWARF%d, version = 0x%04x,%s abbr_offset = 0x%04x, addr_size = 0x%02x (next unit at 0x%08x)" %(
+ cu.cu_offset,
+ cu.header.unit_length,
+ cu.structs.dwarf_format,
+ cu.header.version,
+ unit_type_str,
+ cu.header.debug_abbrev_offset,
+ cu.header.address_size,
+ cu.cu_offset + (4 if cu.structs.dwarf_format == 32 else 12) + cu.header.unit_length))
+ self._emitline()
+ parent = cu.get_top_DIE()
+ for die in cu.iter_DIEs():
+ if die.get_parent() == parent:
+ parent = die
+ if not die.is_null():
+ self._emitline("0x%08x: %s [%d] %s %s" % (
+ die.offset,
+ die.tag,
+ die.abbrev_code,
+ '*' if die.has_children else '',
+ '(0x%08x)' % die.get_parent().offset if die.get_parent() is not None else ''))
+ for attr_name in die.attributes:
+ attr = die.attributes[attr_name]
+ self._emitline(" %s [%s] (%s)" % (attr_name, attr.form, self.describe_attr_value(die, attr)))
+ else:
+ self._emitline("0x%08x: NULL" % (die.offset,))
+ parent = die.get_parent()
+ self._emitline()
+
+ def describe_attr_value(self, die, attr):
+ """This describes the attribute value in the way that's compatible
+ with llvm_dwarfdump. Somewhat duplicates the work of describe_attr_value() in descriptions
+ """
+ if attr.name in ATTR_DESCRIPTIONS:
+ return ATTR_DESCRIPTIONS[attr.name](attr, die)
+ elif attr.form in FORM_DESCRIPTIONS:
+ return FORM_DESCRIPTIONS[attr.form](attr, die)
+ else:
+ return str(attr.value)
+
+ def dump_loc(self):
+ pass
+
+ def dump_loclists(self):
+ pass
+
+ def dump_ranges(self):
+ pass
+
+ def dump_v4_rangelist(self, rangelist, cu_map):
+ cu = cu_map[rangelist[0].entry_offset]
+ addr_str_len = cu.header.address_size*2
+ base_ip = _get_cu_base(cu)
+ for entry in rangelist:
+ if isinstance(entry, RangeEntry):
+ self._emitline("[0x%0*x, 0x%0*x)" % (
+ addr_str_len,
+ (0 if entry.is_absolute else base_ip) + entry.begin_offset,
+ addr_str_len,
+ (0 if entry.is_absolute else base_ip) + entry.end_offset))
+ elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry):
+ base_ip = entry.base_address
+ else:
+ raise NotImplementedError("Unknown object in a range list")
+
+ def dump_rnglists(self):
+ self._emitline(".debug_rnglists contents:")
+ ranges_sec = self._dwarfinfo.range_lists()
+ if ranges_sec.version < 5:
+ return
+
+ cu_map = {die.attributes['DW_AT_ranges'].value : cu # Dict from range offset to home CU
+ for cu in self._dwarfinfo.iter_CUs()
+ for die in cu.iter_DIEs()
+ if 'DW_AT_ranges' in die.attributes}
+
+ for cu in ranges_sec.iter_CUs():
+ self._emitline("0x%08x: range list header: length = 0x%08x, format = DWARF%d, version = 0x%04x, addr_size = 0x%02x, seg_size = 0x%02x, offset_entry_count = 0x%08x" % (
+ cu.cu_offset,
+ cu.unit_length,
+ 64 if cu.is64 else 32,
+ cu.version,
+ cu.address_size,
+ cu.segment_selector_size,
+ cu.offset_count))
+ self._emitline("ranges:")
+ if cu.offset_count > 0:
+ rangelists = [ranges_sec.get_range_list_at_offset_ex(offset) for offset in cu.offsets]
+ else:
+ rangelists = list(ranges_sec.iter_CU_range_lists_ex(cu))
+ # We have to parse it completely before dumping, because dwarfdump aligns columns,
+ # no way to do that without some lookahead
+ max_type_len = max(len(entry.entry_type) for rangelist in rangelists for entry in rangelist)
+ for rangelist in rangelists:
+ self.dump_v5_rangelist(rangelist, cu_map, max_type_len)
+
+ def dump_v5_rangelist(self, rangelist, cu_map, max_type_len):
+ cu = cu_map[rangelist[0].entry_offset]
+ addr_str_len = cu.header.address_size*2
+ base_ip = _get_cu_base(cu)
+ for entry in rangelist:
+ type = entry.entry_type
+ self._emit("0x%08x: [%s]: " % (entry.entry_offset, type.ljust(max_type_len)))
+ if type == 'DW_RLE_base_address':
+ base_ip = entry.address
+ self._emitline("0x%0*x" % (addr_str_len, base_ip))
+ elif type == 'DW_RLE_offset_pair':
+ self._emitline("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % (
+ addr_str_len, entry.start_offset,
+ addr_str_len, entry.end_offset,
+ addr_str_len, entry.start_offset + base_ip,
+ addr_str_len, entry.end_offset + base_ip))
+ elif type == 'DW_RLE_start_length':
+ self._emitline("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % (
+ addr_str_len, entry.start_address,
+ addr_str_len, entry.length,
+ addr_str_len, entry.start_address,
+ addr_str_len, entry.start_address + entry.length))
+ elif type == 'DW_RLE_start_end':
+ self._emitline("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % (
+ addr_str_len, entry.start_address,
+ addr_str_len, entry.end_address,
+ addr_str_len, entry.start_address,
+ addr_str_len, entry.end_address))
+ else:
+ raise NotImplementedError()
+ last = rangelist[-1]
+ self._emitline("0x%08x: [DW_RLE_end_of_list ]" % (last.entry_offset + last.entry_length,))
+
+SCRIPT_DESCRIPTION = 'Display information about the contents of ELF format files'
+VERSION_STRING = '%%(prog)s: based on pyelftools %s' % __version__
+
+def main(stream=None):
+ # parse the command-line arguments and invoke ReadElf
+ argparser = argparse.ArgumentParser(
+ usage='usage: %(prog)s [options] <elf-file>',
+ description=SCRIPT_DESCRIPTION,
+ add_help=False,
+ prog='readelf.py')
+ argparser.add_argument('file',
+ nargs='?', default=None,
+ help='ELF file to parse')
+ argparser.add_argument('-H', '--help',
+ action='store_true', dest='help',
+ help='Display this information')
+ argparser.add_argument('--verbose',
+ action='store_true', dest='verbose',
+ help=('For compatibility with dwarfdump. Non-verbose mode is not implemented.'))
+
+ # Section dumpers
+ sections = ('info', 'loclists', 'rnglists') # 'loc', 'ranges' not implemented yet
+ for section in sections:
+ argparser.add_argument('--debug-%s' % section,
+ action='store_true', dest=section,
+ help=('Display the contents of DWARF debug_%s section.' % section))
+
+ args = argparser.parse_args()
+
+ if args.help or not args.file:
+ argparser.print_help()
+ sys.exit(0)
+
+ # A compatibility hack on top of a compatibility hack :(
+ del ENUM_DW_TAG["DW_TAG_template_type_param"]
+ del ENUM_DW_TAG["DW_TAG_template_value_param"]
+ ENUM_DW_TAG['DW_TAG_template_type_parameter'] = 0x2f
+ ENUM_DW_TAG['DW_TAG_template_value_parameter'] = 0x30
+
+ with open(args.file, 'rb') as file:
+ try:
+ readelf = ReadElf(args.file, file, stream or sys.stdout)
+ if args.info:
+ readelf.dump_info()
+ if args.loclists:
+ readelf.dump_loclists()
+ if args.rnglists:
+ readelf.dump_rnglists()
+ #if args.loc:
+ # readelf.dump_loc()
+ #if args.ranges:
+ # readelf.dump_ranges()
+ except ELFError as ex:
+ sys.stdout.flush()
+ sys.stderr.write('ELF error: %s\n' % ex)
+ if args.show_traceback:
+ traceback.print_exc()
+ sys.exit(1)
+
+#-------------------------------------------------------------------------------
+if __name__ == '__main__':
+ main()
+ #profile_main()
run_test_script('test/run_all_unittests.py')
run_test_script('test/run_examples_test.py')
run_test_script('test/run_readelf_tests.py', '--parallel')
+ run_test_script('test/run_dwarfdump_tests.py', '--parallel')
if __name__ == '__main__':
sys.exit(main())
* git co binutils-<VERSION>-branch
* Run configure, then make
* Built on a 64-bit Ubuntu machine
+
+llvm-dwarfdump is built as follows:
+
+* Used Debian v10 on x86_64
+* install gcc, git, cmake
+* git clone https://github.com/llvm/llvm-project.git llvm
+* cd llvm
+* cmake -S llvm -B build -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release
+* cmake --build build -- llvm-dwarfdump
--- /dev/null
+#!/usr/bin/env python
+#-------------------------------------------------------------------------------
+# test/run_dwarfdump_tests.py
+#
+# Automatic test runner for elftools & llvm-dwarfdump-11
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import argparse
+from difflib import SequenceMatcher
+import logging
+from multiprocessing import Pool
+import os
+import platform
+import re
+import sys
+import time
+
+from utils import run_exe, is_in_rootdir, dump_output_to_temp_files
+
+# Make it possible to run this file from the root dir of pyelftools without
+# installing pyelftools; useful for CI testing, etc.
+sys.path[0:0] = ['.']
+
+# Create a global logger object
+testlog = logging.getLogger('run_tests')
+testlog.setLevel(logging.DEBUG)
+testlog.addHandler(logging.StreamHandler(sys.stdout))
+
+# Following the readelf example, we ship our own.
+if platform.system() == "Darwin": # MacOS
+ raise NotImplementedError("Not supported on MacOS")
+elif platform.system() == "Windows":
+ raise NotImplementedError("Not supported on Windows")
+else:
+ DWARFDUMP_PATH = 'test/external_tools/llvm-dwarfdump'
+
+def discover_testfiles(rootdir):
+ """ Discover test files in the given directory. Yield them one by one.
+ """
+ for filename in os.listdir(rootdir):
+ _, ext = os.path.splitext(filename)
+ if ext == '.elf':
+ yield os.path.join(rootdir, filename)
+
+
+def run_test_on_file(filename, verbose=False, opt=None):
+ """ Runs a test on the given input filename. Return True if all test
+ runs succeeded.
+ If opt is specified, rather that going over the whole
+ set of supported options, the test will only
+ run for one option.
+ """
+ success = True
+ testlog.info("Test file '%s'" % filename)
+ if opt is None:
+ options = [
+ '--debug-info'
+ ]
+ else:
+ options = [opt]
+
+ for option in options:
+ if verbose: testlog.info("..option='%s'" % option)
+
+ # stdouts will be a 2-element list: output of llvm-dwarfdump and output
+ # of scripts/dwarfdump.py
+ stdouts = []
+ for exe_path in [DWARFDUMP_PATH, 'scripts/dwarfdump.py']:
+ args = [option, '--verbose', filename]
+ if verbose: testlog.info("....executing: '%s %s'" % (
+ exe_path, ' '.join(args)))
+ t1 = time.time()
+ rc, stdout = run_exe(exe_path, args)
+ if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
+ if rc != 0:
+ testlog.error("@@ aborting - '%s %s' returned '%s'" % (exe_path, option, rc))
+ return False
+ stdouts.append(stdout)
+ if verbose: testlog.info('....comparing output...')
+ t1 = time.time()
+ rc, errmsg = compare_output(*stdouts)
+ if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
+ if rc:
+ if verbose: testlog.info('.......................SUCCESS')
+ else:
+ success = False
+ testlog.info('.......................FAIL')
+ testlog.info('....for file %s' % filename)
+ testlog.info('....for option "%s"' % option)
+ testlog.info('....Output #1 is llvm-dwarfdump, Output #2 is pyelftools')
+ testlog.info('@@ ' + errmsg)
+ dump_output_to_temp_files(testlog, filename, option, *stdouts)
+ return success
+
+
+def compare_output(s1, s2):
+ """ Compare stdout strings s1 and s2.
+ s1 is from llvm-dwarfdump, s2 from elftools dwarfdump.py
+ Return pair success, errmsg. If comparison succeeds, success is True
+ and errmsg is empty. Otherwise success is False and errmsg holds a
+ description of the mismatch.
+ """
+ def prepare_lines(s):
+ return [line for line in s.lower().splitlines() if line.strip() != '']
+
+ lines1 = prepare_lines(s1)
+ lines2 = prepare_lines(s2)
+
+ if len(lines1) != len(lines2):
+ return False, 'Number of lines different: %s vs %s' % (
+ len(lines1), len(lines2))
+
+ for (i, (line1, line2)) in enumerate(zip(lines1, lines2)):
+ # Compare ignoring whitespace
+ lines1_parts = line1.split()
+ lines2_parts = line2.split()
+
+ if ''.join(lines1_parts) != ''.join(lines2_parts):
+ sm = SequenceMatcher()
+ sm.set_seqs(lines1[i], lines2[i])
+ changes = sm.get_opcodes()
+
+ errmsg = 'Mismatch on line #%s:\n>>%s<<\n>>%s<<\n (%r)' % (
+ i, line1, line2, changes)
+ return False, errmsg
+ return True, ''
+
+def main():
+ if not is_in_rootdir():
+ testlog.error('Error: Please run me from the root dir of pyelftools!')
+ return 1
+
+ argparser = argparse.ArgumentParser(
+ usage='usage: %(prog)s [options] [file] [file] ...',
+ prog='run_dwarfdump_tests.py')
+ argparser.add_argument('files', nargs='*', help='files to run tests on')
+ argparser.add_argument(
+ '--parallel', action='store_true',
+ help='run tests in parallel; always runs all tests w/o verbose')
+ argparser.add_argument('-V', '--verbose',
+ action='store_true', dest='verbose',
+ help='verbose output')
+ argparser.add_argument(
+ '-k', '--keep-going',
+ action='store_true', dest='keep_going',
+ help="Run all tests, don't stop at the first failure")
+ argparser.add_argument('--opt',
+ action='store', dest='opt', metavar='<dwarfdump-option>',
+ help= 'Limit the test one one dwarfdump option.')
+ args = argparser.parse_args()
+
+ if args.parallel:
+ if args.verbose or args.keep_going == False:
+ print('WARNING: parallel mode disables verbosity and always keeps going')
+
+ if args.verbose:
+ testlog.info('Running in verbose mode')
+ testlog.info('Python executable = %s' % sys.executable)
+ testlog.info('dwarfdump path = %s' % DWARFDUMP_PATH)
+ testlog.info('Given list of files: %s' % args.files)
+
+ # If file names are given as command-line arguments, only these files
+ # are taken as inputs. Otherwise, autodiscovery is performed.
+ if len(args.files) > 0:
+ filenames = args.files
+ else:
+ filenames = sorted(discover_testfiles('test/testfiles_for_dwarfdump'))
+
+ if len(filenames) > 1 and args.parallel:
+ pool = Pool()
+ results = pool.map(run_test_on_file, filenames)
+ failures = results.count(False)
+ else:
+ failures = 0
+ for filename in filenames:
+ if not run_test_on_file(filename, args.verbose, args.opt):
+ failures += 1
+ if not args.keep_going:
+ break
+
+ if failures == 0:
+ testlog.info('\nConclusion: SUCCESS')
+ return 0
+ elif args.keep_going:
+ testlog.info('\nConclusion: FAIL ({}/{})'.format(
+ failures, len(filenames)))
+ return 1
+ else:
+ testlog.info('\nConclusion: FAIL')
+ return 1
+
+
+if __name__ == '__main__':
+ sys.exit(main())
return True
else:
testlog.info('.......FAIL comparison')
- dump_output_to_temp_files(testlog, example_out, ref_str)
+ dump_output_to_temp_files(testlog, example_path, '', example_out, ref_str)
return False
testlog.info('....for option "%s"' % option)
testlog.info('....Output #1 is readelf, Output #2 is pyelftools')
testlog.info('@@ ' + errmsg)
- dump_output_to_temp_files(testlog, *stdouts)
+ dump_output_to_temp_files(testlog, filename, option, *stdouts)
return success
return os.path.isdir('test') and os.path.isdir('elftools')
-def dump_output_to_temp_files(testlog, *args):
+def dump_output_to_temp_files(testlog, filename, option, *args):
""" Dumps the output strings given in 'args' to temp files: one for each
- arg.
+ arg. The filename and option arguments contribute to the file name,
+ so that one knows which test did the output dump come from.
"""
for i, s in enumerate(args):
fd, path = tempfile.mkstemp(
- prefix='out' + str(i + 1) + '_',
+ prefix='out-%d-%s-%s-' % (i + 1, os.path.split(filename)[-1], option),
suffix='.stdout')
file = os.fdopen(fd, 'w')
file.write(s)