From: Seva Alekseyev Date: Wed, 6 Jul 2022 17:06:37 +0000 (-0400) Subject: Autotest against llvm-dwarfdump (#428) X-Git-Tag: v0.29~12 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6795bc62ebf01d1df8a4c6a4b1e342e5edc5201b;p=pyelftools.git Autotest against llvm-dwarfdump (#428) * Descriptive output dump file names on autotest * Dwarfdump.py * Test and test files for dwarfdump * Loclist dump fix * Permissions * Fixes --- diff --git a/elftools/dwarf/datatype_cpp.py b/elftools/dwarf/datatype_cpp.py new file mode 100644 index 0000000..fea7e9c --- /dev/null +++ b/elftools/dwarf/datatype_cpp.py @@ -0,0 +1,232 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/datatype_cpp.py +# +# First draft at restoring the source level name a C/C++ datatype +# from DWARF data. Aiming at compatibility with llvm-dwarfdump v15. +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from ..common.py3compat import bytes2str + +cpp_symbols = dict( + pointer = "*", + reference = "&", + const = "const") + +def describe_cpp_datatype(var_die): + return str(parse_cpp_datatype(var_die)) + +def parse_cpp_datatype(var_die): + """Given a DIE that describes a variable, a parameter, or a member + with DW_AT_type in it, tries to return the C++ datatype as a string + + Returns a TypeDesc. + + Does not follow typedefs, doesn't resolve array element types + or struct members. Not good for a debugger. + """ + t = TypeDesc() + + if not 'DW_AT_type' in var_die.attributes: + t.tag = '' + return t + + type_die = var_die.get_DIE_from_attribute('DW_AT_type') + + mods = [] + # Unlike readelf, dwarfdump doesn't chase typedefs + while type_die.tag in ('DW_TAG_const_type', 'DW_TAG_pointer_type', 'DW_TAG_reference_type'): + modifier = _strip_type_tag(type_die) # const/reference/pointer + mods.insert(0, modifier) + if not 'DW_AT_type' in type_die.attributes: # void* is encoded as a pointer to nothing + t.name = t.tag = "void" + t.modifiers = tuple(mods) + return t + type_die = type_die.get_DIE_from_attribute('DW_AT_type') + + # From this point on, type_die doesn't change + t.tag = _strip_type_tag(type_die) + t.modifiers = tuple(mods) + + if t.tag in ('ptr_to_member', 'subroutine'): + if t.tag == 'ptr_to_member': + ptr_prefix = DIE_name(type_die.get_DIE_from_attribute('DW_AT_containing_type')) + "::" + type_die = type_die.get_DIE_from_attribute('DW_AT_type') + elif "DW_AT_object_pointer" in type_die.attributes: # Older compiler... Subroutine, but with an object pointer + ptr_prefix = DIE_name(DIE_type(DIE_type(type_die.get_DIE_from_attribute('DW_AT_object_pointer')))) + "::" + else: # Not a pointer to member + ptr_prefix = '' + + if t.tag == 'subroutine': + params = tuple(format_function_param(p, p) for p in type_die.iter_children() if p.tag in ("DW_TAG_formal_parameter", "DW_TAG_unspecified_parameters") and 'DW_AT_artificial' not in p.attributes) + params = ", ".join(params) + if 'DW_AT_type' in type_die.attributes: + retval_type = parse_cpp_datatype(type_die) + is_pointer = retval_type.modifiers and retval_type.modifiers[-1] == 'pointer' + retval_type = str(retval_type) + if not is_pointer: + retval_type += " " + else: + retval_type = "void " + + if len(mods) and mods[-1] == 'pointer': + mods.pop() + t.modifiers = tuple(mods) + t.name = "%s(%s*)(%s)" % (retval_type, ptr_prefix, params) + else: + t.name = "%s(%s)" % (retval_type, params) + return t + elif DIE_is_ptr_to_member_struct(type_die): + dt = parse_cpp_datatype(next(type_die.iter_children())) # The first element is pfn, a function pointer with a this + dt.modifiers = tuple(dt.modifiers[:-1]) # Pop the extra pointer + dt.tag = "ptr_to_member_type" # Not a function pointer per se + return dt + elif t.tag == 'array': + t.dimensions = (sub.attributes['DW_AT_upper_bound'].value + 1 if 'DW_AT_upper_bound' in sub.attributes else -1 + for sub + in type_die.iter_children() + if sub.tag == 'DW_TAG_subrange_type') + t.name = describe_cpp_datatype(type_die) + return t + + # Now the nonfunction types + # Blank name is sometimes legal (unnamed unions, etc) + + t.name = safe_DIE_name(type_die, t.tag + " ") + + # Check the nesting - important for parameters + parent = type_die.get_parent() + scopes = list() + while parent.tag in ('DW_TAG_class_type', 'DW_TAG_structure_type', 'DW_TAG_union_type', 'DW_TAG_namespace'): + scopes.insert(0, safe_DIE_name(parent, _strip_type_tag(parent) + " ")) + # If unnamed scope, fall back to scope type - like "structure " + parent = parent.get_parent() + t.scopes = tuple(scopes) + + return t + +#-------------------------------------------------- + +class TypeDesc(object): + """ Encapsulates a description of a datatype, as parsed from DWARF DIEs. + Not enough to display the variable in the debugger, but enough + to produce a type description string similar to those of llvm-dwarfdump. + + name - name for primitive datatypes, element name for arrays, the + whole name for functions and function pouinters + + modifiers - a collection of "const"/"pointer"/"reference", from the + chain of DIEs preceeding the real type DIE + + scopes - a collection of struct/class/namespace names, parents of the + real type DIE + + tag - the tag of the real type DIE, stripped of initial DW_TAG_ and + final _type + + dimensions - the collection of array dimensions, if the type is an + array. -1 means an array of unknown dimension. + + """ + def __init__(self): + self.name = None + self.modifiers = () # Reads left to right + self.scopes = () # Reads left to right + self.tag = None + self.dimensions = None + + def __str__(self): + # Some reference points from dwarfdump: + # const->pointer->const->char = const char *const + # const->reference->const->int = const const int & + # const->reference->int = const int & + name = str(self.name) + mods = self.modifiers + + parts = [] + # Initial const applies to the var ifself, other consts apply to the pointee + if len(mods) and mods[0] == 'const': + parts.append("const") + mods = mods[1:] + + # ref->const in the end, const goes in front + if mods[-2:] == ("reference", "const"): + parts.append("const") + mods = mods[0:-1] + + if self.scopes: + name = '::'.join(self.scopes)+'::' + name + parts.append(name) + + if len(mods): + parts.append("".join(cpp_symbols[mod] for mod in mods)) + + if self.dimensions: + dims = "".join('[%s]' % (str(dim) if dim > 0 else '',) + for dim in self.dimensions) + else: + dims = '' + + return " ".join(parts)+dims + +def DIE_name(die): + return bytes2str(die.attributes['DW_AT_name'].value) + +def safe_DIE_name(die, default = ''): + return bytes2str(die.attributes['DW_AT_name'].value) if 'DW_AT_name' in die.attributes else default + +def DIE_type(die): + return die.get_DIE_from_attribute("DW_AT_type") + +class ClassDesc(object): + def __init__(self): + self.scopes = () + self.const_member = False + +def get_class_spec_if_member(func_spec, the_func): + if 'DW_AT_object_pointer' in the_func.attributes: + this_param = the_func.get_DIE_from_attribute('DW_AT_object_pointer') + this_type = parse_cpp_datatype(this_param) + class_spec = ClassDesc() + class_spec.scopes = this_type.scopes + (this_type.name,) + class_spec.const_member = any(("const", "pointer") == this_type.modifiers[i:i+2] + for i in range(len(this_type.modifiers))) # const -> pointer -> const for this arg of const + return class_spec + + # Check the parent element chain - could be a class + parent = func_spec.get_parent() + + scopes = [] + while parent.tag in ("DW_TAG_class_type", "DW_TAG_structure_type", "DW_TAG_namespace"): + scopes.insert(0, DIE_name(parent)) + parent = parent.get_parent() + if scopes: + cs = ClassDesc() + cs.scopes = tuple(scopes) + return cs + + return None + +def format_function_param(param_spec, param): + if param_spec.tag == 'DW_TAG_formal_parameter': + if 'DW_AT_name' in param.attributes: + name = DIE_name(param) + elif 'DW_AT_name' in param_spec.attributes: + name = DIE_name(param_spec) + else: + name = None + type = parse_cpp_datatype(param_spec) + return str(type) + else: # unspecified_parameters AKA variadic + return "..." + +def DIE_is_ptr_to_member_struct(type_die): + if type_die.tag == 'DW_TAG_structure_type': + members = tuple(die for die in type_die.iter_children() if die.tag == "DW_TAG_member") + return len(members) == 2 and safe_DIE_name(members[0]) == "__pfn" and safe_DIE_name(members[1]) == "__delta" + return False + +def _strip_type_tag(die): + """Given a DIE with DW_TAG_foo_type, returns foo""" + return die.tag[7:-5] diff --git a/elftools/dwarf/dwarf_util.py b/elftools/dwarf/dwarf_util.py new file mode 100644 index 0000000..5d942e1 --- /dev/null +++ b/elftools/dwarf/dwarf_util.py @@ -0,0 +1,61 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/dwarf_utils.py +# +# Minor, shared DWARF helpers +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- + +import os +from ..construct.macros import UBInt32, UBInt64, ULInt32, ULInt64, Array +from ..common.exceptions import DWARFError +from ..common.utils import struct_parse + +def _get_base_offset(cu, base_attribute_name): + """Retrieves a required, base offset-type atribute + from the top DIE in the CU. Applies to several indirectly + encoded objects - range lists, location lists, strings, addresses. + """ + cu_top_die = cu.get_top_DIE() + if not base_attribute_name in cu_top_die.attributes: + raise DWARFError("The CU at offset 0x%x needs %s" % (cu.cu_offset, base_attribute_name)) + return cu_top_die.attributes[base_attribute_name].value + +def _resolve_via_offset_table(stream, cu, index, base_attribute_name): + """Given an index in the offset table and directions where to find it, + retrieves an offset. Works for loclists, rnglists. + + The DWARF offset bitness of the CU block in the section matches that + of the CU record in dwarf_info. See DWARFv5 standard, section 7.4. + + This is used for translating DW_FORM_loclistx, DW_FORM_rnglistx + via the offset table in the respective section. + """ + base_offset = _get_base_offset(cu, base_attribute_name) + # That's offset (within the rnglists/loclists/str_offsets section) of + # the offset table for this CU's block in that section, which in turn is indexed by the index. + + offset_size = 4 if cu.structs.dwarf_format == 32 else 8 + return base_offset + struct_parse(cu.structs.Dwarf_offset(''), stream, base_offset + index*offset_size, True) + +def _iter_CUs_in_section(stream, structs, parser): + """Iterates through the list of CU sections in loclists or rangelists. Almost identical structures there. + + get_parser is a lambda that takes structs, returns the parser + """ + stream.seek(0, os.SEEK_END) + endpos = stream.tell() + stream.seek(0, os.SEEK_SET) + + offset = 0 + while offset < endpos: + header = struct_parse(parser, stream, offset) + if header.offset_count > 0: + offset_parser = structs.Dwarf_uint64 if header.is64 else structs.Dwarf_uint32 + header['offsets'] = struct_parse(Array(header.offset_count, offset_parser('')), stream) + else: + header['offsets'] = False + yield header + offset = header.offset_after_length + header.unit_length + diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py index 97630f7..9eaaf26 100644 --- a/elftools/dwarf/enums.py +++ b/elftools/dwarf/enums.py @@ -440,3 +440,81 @@ ENUM_DW_RLE = dict( DW_RLE_start_end = 0x06, DW_RLE_start_length = 0x07 ) + +ENUM_DW_LANG = dict( + DW_LANG_C89 = 0x0001, + DW_LANG_C = 0x0002, + DW_LANG_Ada83 = 0x0003, + DW_LANG_C_plus_plus = 0x0004, + DW_LANG_Cobol74 = 0x0005, + DW_LANG_Cobol85 = 0x0006, + DW_LANG_Fortran77 = 0x0007, + DW_LANG_Fortran90 = 0x0008, + DW_LANG_Pascal83 = 0x0009, + DW_LANG_Modula2 = 0x000a, + DW_LANG_Java = 0x000b, + DW_LANG_C99 = 0x000c, + DW_LANG_Ada95 = 0x000d, + DW_LANG_Fortran95 = 0x000e, + DW_LANG_PLI = 0x000f, + DW_LANG_ObjC = 0x0010, + DW_LANG_ObjC_plus_plus = 0x0011, + DW_LANG_UPC = 0x0012, + DW_LANG_D = 0x0013, + DW_LANG_Python = 0x0014, + DW_LANG_OpenCL = 0x0015, + DW_LANG_Go = 0x0016, + DW_LANG_Modula3 = 0x0017, + DW_LANG_Haskell = 0x0018, + DW_LANG_C_plus_plus_03 = 0x0019, + DW_LANG_C_plus_plus_11 = 0x001a, + DW_LANG_OCaml = 0x001b, + DW_LANG_Rust = 0x001c, + DW_LANG_C11 = 0x001d, + DW_LANG_Swift = 0x001e, + DW_LANG_Julia = 0x001f, + DW_LANG_Dylan = 0x0020, + DW_LANG_C_plus_plus_14 = 0x0021, + DW_LANG_Fortran03 = 0x0022, + DW_LANG_Fortran08 = 0x0023, + DW_LANG_RenderScript = 0x0024, + DW_LANG_BLISS = 0x0025, + DW_LANG_lo_user = 0x8000, + DW_LANG_hi_user = 0xffff +) + +ENUM_DW_ATE = dict( + DW_ATE_address = 0x01, + DW_ATE_boolean = 0x02, + DW_ATE_complex_float = 0x03, + DW_ATE_float = 0x04, + DW_ATE_signed = 0x05, + DW_ATE_signed_char = 0x06, + DW_ATE_unsigned = 0x07, + DW_ATE_unsigned_char = 0x08, + DW_ATE_imaginary_float = 0x09, + DW_ATE_packed_decimal = 0x0a, + DW_ATE_numeric_string = 0x0b, + DW_ATE_edited = 0x0c, + DW_ATE_signed_fixed = 0x0d, + DW_ATE_unsigned_fixed = 0x0e, + DW_ATE_decimal_float = 0x0f, + DW_ATE_UTF = 0x10, + DW_ATE_UCS = 0x11, + DW_ATE_ASCII = 0x12, + DW_ATE_lo_user = 0x80, + DW_ATE_hi_user = 0xff +) + +ENUM_DW_ACCESS = dict( + DW_ACCESS_public = 0x01, + DW_ACCESS_protected = 0x02, + DW_ACCESS_private = 0x03 +) + +ENUM_DW_INL = dict( + DW_INL_not_inlined = 0x00, + DW_INL_inlined = 0x01, + DW_INL_declared_not_inlined = 0x02, + DW_INL_declared_inlined = 0x03 +) diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index c23cee9..eae55c5 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -110,8 +110,6 @@ class LocationLists(object): offset_index = 0 while stream.tell() < endpos: # We are at the start of the CU block in the loclists now - unit_length = struct_parse(self.structs.Dwarf_initial_length(''), stream) - offset_past_len = stream.tell() cu_header = struct_parse(self.structs.Dwarf_loclists_CU_header, stream) assert(cu_header.version == 5) @@ -119,7 +117,7 @@ class LocationLists(object): # We don't have a binary for the former yet. On an off chance that we one day might, # let's parse the header anyway. - cu_end_offset = offset_past_len + unit_length + cu_end_offset = cu_header.offset_after_length + cu_header.unit_length # Unit_length includes the header but doesn't include the length while stream.tell() < cu_end_offset: @@ -263,7 +261,7 @@ class LocationParser(object): @staticmethod def _attribute_has_loc_list(attr, dwarf_version): return ((dwarf_version < 4 and - attr.form in ('DW_FORM_data4', 'DW_FORM_data8') and + attr.form in ('DW_FORM_data1', 'DW_FORM_data2', 'DW_FORM_data4', 'DW_FORM_data8') and not attr.name == 'DW_AT_const_value') or attr.form == 'DW_FORM_sec_offset') @@ -275,6 +273,7 @@ class LocationParser(object): 'DW_AT_frame_base', 'DW_AT_segment', 'DW_AT_static_link', 'DW_AT_use_location', 'DW_AT_vtable_elem_location', + 'DW_AT_call_value', 'DW_AT_GNU_call_site_value', 'DW_AT_GNU_call_site_target', 'DW_AT_GNU_call_site_data_value')) diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index e5476de..e5c9fde 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -10,6 +10,8 @@ import os from collections import namedtuple from ..common.utils import struct_parse +from ..common.exceptions import DWARFError +from .dwarf_util import _iter_CUs_in_section RangeEntry = namedtuple('RangeEntry', 'entry_offset entry_length begin_offset end_offset is_absolute') @@ -54,6 +56,12 @@ class RangeLists(object): self.stream.seek(offset, os.SEEK_SET) return self._parse_range_list_from_stream() + def get_range_list_at_offset_ex(self, offset): + """Get a DWARF v5 range list, addresses and offsets unresolved, + at the given offset in the section + """ + return struct_parse(self.structs.Dwarf_rnglists_entries, self.stream, offset) + def iter_range_lists(self): """ Yield all range lists found in the section. """ @@ -68,6 +76,24 @@ class RangeLists(object): for offset in all_offsets: yield self.get_range_list_at_offset(offset) + def iter_CUs(self): + """For DWARF5 returns an array of objects, where each one has an array of offsets + """ + if self.version < 5: + raise DWARFError("CU iteration in rnglists is not supported with DWARF<5") + + structs = next(self._dwarfinfo.iter_CUs()).structs # Just pick one + return _iter_CUs_in_section(self.stream, structs, structs.Dwarf_rnglists_CU_header) + + def iter_CU_range_lists_ex(self, cu): + """For DWARF5, returns untranslated rangelists in the CU, where CU comes from iter_CUs above + """ + stream = self.stream + stream.seek(cu.offset_table_offset + (64 if cu.is64 else 32) * cu.offset_count) + while stream.tell() < cu.offset_after_length + cu.unit_length: + yield struct_parse(self.structs.Dwarf_rnglists_entries, stream); + + #------ PRIVATE ------# def _parse_range_list_from_stream(self): diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 5aa4a12..b479399 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -403,13 +403,15 @@ class DWARFStructs(object): """ Create a struct for debug_loclists CU header, DWARFv5, 7,29 """ self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header', - # Unit_length parsed separately + StreamOffset('cu_offset'), + self.Dwarf_initial_length('unit_length'), + Value('is64', lambda ctx: ctx.is64), + StreamOffset('offset_after_length'), self.Dwarf_uint16('version'), self.Dwarf_uint8('address_size'), self.Dwarf_uint8('segment_selector_size'), - PrefixedArray( - self.Dwarf_offset('offsets'), - self.Dwarf_uint32(''))) + self.Dwarf_uint32('offset_count'), + StreamOffset('offset_table_offset')) cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128('')) @@ -436,6 +438,17 @@ class DWARFStructs(object): StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end')) def _create_rnglists_parsers(self): + self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header', + StreamOffset('cu_offset'), + self.Dwarf_initial_length('unit_length'), + Value('is64', lambda ctx: ctx.is64), + StreamOffset('offset_after_length'), + self.Dwarf_uint16('version'), + self.Dwarf_uint8('address_size'), + self.Dwarf_uint8('segment_selector_size'), + self.Dwarf_uint32('offset_count'), + StreamOffset('offset_table_offset')) + self.Dwarf_rnglists_entries = RepeatUntilExcluding( lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list', Struct('entry', @@ -462,9 +475,11 @@ class _InitialLengthAdapter(Adapter): """ def _decode(self, obj, context): if obj.first < 0xFFFFFF00: + context['is64'] = False return obj.first else: if obj.first == 0xFFFFFFFF: + context['is64'] = True return obj.second else: raise ConstructError("Failed decoding initial length for %X" % ( diff --git a/scripts/dwarfdump.py b/scripts/dwarfdump.py new file mode 100644 index 0000000..ac86da8 --- /dev/null +++ b/scripts/dwarfdump.py @@ -0,0 +1,539 @@ +#!/usr/bin/env python +#------------------------------------------------------------------------------- +# scripts/dwarfdump.py +# +# A clone of 'llvm-dwarfdump-11' in Python, based on the pyelftools library +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import argparse +import os, sys, posixpath +import traceback + +# For running from development directory. It should take precedence over the +# installed pyelftools. +sys.path.insert(0, '.') + +from elftools import __version__ +from elftools.common.exceptions import DWARFError, ELFError +from elftools.common.py3compat import bytes2str +from elftools.elf.elffile import ELFFile +from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationExpr, LocationViewPair, BaseAddressEntry as LocBaseAddressEntry +from elftools.dwarf.ranges import RangeEntry # ranges.BaseAddressEntry collides with the one above +import elftools.dwarf.ranges +from elftools.dwarf.enums import * +from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp +from elftools.dwarf.datatype_cpp import describe_cpp_datatype +from elftools.dwarf.descriptions import describe_reg_name + +# ------------------------------ +# ------------------------------ + +def _get_cu_base(cu): + top_die = cu.get_top_DIE() + attr = top_die.attributes + if 'DW_AT_low_pc' in attr: + return attr['DW_AT_low_pc'].value + elif 'DW_AT_entry_pc' in attr: + return attr['DW_AT_entry_pc'].value + else: + raise ValueError("Can't find the base IP (low_pc) for a CU") + +def _addr_str_length(die): + return die.cu.header.address_size*2 + +def _DIE_name(die): + if 'DW_AT_name' in die.attributes: + return bytes2str(die.attributes['DW_AT_name'].value) + elif 'DW_AT_linkage_name' in die.attributes: + return bytes2str(die.attributes['DW_AT_linkage_name'].value) + else: + raise DWARFError() + +def _DIE_linkage_name(die): + if 'DW_AT_linkage_name' in die.attributes: + return bytes2str(die.attributes['DW_AT_linkage_name'].value) + elif 'DW_AT_name' in die.attributes: + return bytes2str(die.attributes['DW_AT_name'].value) + else: + raise DWARFError() + +def _safe_DIE_name(die, default=None): + if 'DW_AT_name' in die.attributes: + return bytes2str(die.attributes['DW_AT_name'].value) + elif 'DW_AT_linkage_name' in die.attributes: + return bytes2str(die.attributes['DW_AT_linkage_name'].value) + else: + return default + +def _safe_DIE_linkage_name(die, default=None): + if 'DW_AT_linkage_name' in die.attributes: + return bytes2str(die.attributes['DW_AT_linkage_name'].value) + elif 'DW_AT_name' in die.attributes: + return bytes2str(die.attributes['DW_AT_name'].value) + else: + return default + +def _desc_ref(attr, die, extra=''): + if extra: + extra = " \"%s\"" % extra + return "cu + 0x%04x => {0x%08x}%s" % ( + attr.raw_value, + die.cu.cu_offset + attr.raw_value, + extra) + +def _desc_data(attr, die): + """ Hex with length driven by form + """ + len = int(attr.form[12:]) * 2 + return "0x%0*x" % (len, attr.value,) + +FORM_DESCRIPTIONS = dict( + DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value),), + DW_FORM_strp=lambda attr, die: " .debug_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")), + DW_FORM_line_strp=lambda attr, die: ".debug_line_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")), + DW_FORM_flag_present=lambda attr, die: "true", + DW_FORM_flag=lambda attr, die: "0x%02x" % int(attr.value), + DW_FORM_addr=lambda attr, die: "0x%0*x" % (_addr_str_length(die), attr.value), + DW_FORM_data1=_desc_data, + DW_FORM_data2=_desc_data, + DW_FORM_data4=_desc_data, + DW_FORM_data8=_desc_data, + DW_FORM_block1=lambda attr, die: "<0x%02x> %s " % (len(attr.value), " ".join("%02x" %b for b in attr.value)), + DW_FORM_block2=lambda attr, die: "<0x%04x> %s " % (len(attr.value), " ".join("%02x" %b for b in attr.value)), + DW_FORM_block4=lambda attr, die: "<0x%08x> %s " % (len(attr.value), " ".join("%02x" %b for b in attr.value)), + DW_FORM_ref=_desc_ref, + DW_FORM_ref1=_desc_ref, DW_FORM_ref2=_desc_ref, + DW_FORM_ref4=_desc_ref, DW_FORM_ref8=_desc_ref, + DW_FORM_sec_offset=lambda attr,die: "0x%08x" % (attr.value,), + DW_FORM_exprloc=lambda attr, die: _desc_expression(attr.value, die) +) + +def _desc_enum(attr, enum): + """For attributes like DW_AT_language, physically + int, logically an enum + """ + return next((k for (k, v) in enum.items() if v == attr.value), str(attr.value)) + +def _cu_comp_dir(cu): + return bytes2str(cu.get_top_DIE().attributes['DW_AT_comp_dir'].value) + +def _desc_decl_file(attr, die): + cu = die.cu + if not hasattr(cu, "_lineprogram"): + cu._lineprogram = die.dwarfinfo.line_program_for_CU(cu) + val = attr.value + if cu._lineprogram and val > 0 and val <= len(cu._lineprogram.header.file_entry): + file_entry = cu._lineprogram.header.file_entry[val-1] + includes = cu._lineprogram.header.include_directory + if file_entry.dir_index > 0: + dir = bytes2str(includes[file_entry.dir_index - 1]) + if dir.startswith('.'): + dir = posixpath.join(_cu_comp_dir(cu), dir) + else: + dir = _cu_comp_dir(cu) + return "\"%s\"" % (posixpath.join(dir, bytes2str(file_entry.name)),) + else: + return '(N/A)' + +def _desc_ranges(attr, die): + di = die.cu.dwarfinfo + if not hasattr(di, '_rnglists'): + di._rangelists = di.range_lists() + rangelist = di._rangelists.get_range_list_at_offset(attr.value) + base_ip = _get_cu_base(die.cu) + lines = [] + addr_str_len = die.cu.header.address_size*2 + for entry in rangelist: + if isinstance(entry, RangeEntry): + lines.append(" [0x%0*x, 0x%0*x)" % ( + addr_str_len, + (0 if entry.is_absolute else base_ip) + entry.begin_offset, + addr_str_len, + (0 if entry.is_absolute else base_ip) + entry.end_offset)) + elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry): + base_ip = entry.base_address + else: + raise NotImplementedError("Unknown object in a range list") + return ("0x%08x\n" % attr.value) + "\n".join(lines) + +def _desc_locations(attr, die): + cu = die.cu + di = cu.dwarfinfo + if not hasattr(di, '_loclists'): + di._loclists = di.location_lists() + if not hasattr(di, '_locparser'): + di._locparser = LocationParser(di._loclists) + loclist = di._locparser.parse_from_attribute(attr, cu.header.version, die) + if isinstance(loclist, LocationExpr): + return _desc_expression(loclist.loc_expr, die) + else: + base_ip = _get_cu_base(cu) + lines = [] + addr_str_len = die.cu.header.address_size*2 + for entry in loclist: + if isinstance(entry, LocationEntry): + lines.append(" [0x%0*x, 0x%0*x): %s" % ( + addr_str_len, + (0 if entry.is_absolute else base_ip) + entry.begin_offset, + addr_str_len, + (0 if entry.is_absolute else base_ip) + entry.end_offset, + _desc_expression(entry.loc_expr, die))) + elif isinstance(entry, LocBaseAddressEntry): + base_ip = entry.base_address + else: + raise NotImplementedError("Unknown object in a location list") + return ("0x%08x:\n" % attr.value) + "\n".join(lines) + +# By default, numeric arguments are spelled in hex with a leading 0x +def _desc_operationarg(s, cu): + if isinstance(s, str): + return s + elif isinstance(s, int): + return hex(s) + elif isinstance(s, list): # Could be a blob (list of ints), could be a subexpression + if len(s) > 0 and isinstance(s[0], DWARFExprOp): # Subexpression + return '(' + '; '.join(_desc_operation(op.op, op.op_name, op.args, cu) for op in s) + ')' + else: + return " ".join((hex(len(s)),) + tuple("0x%02x" % b for b in s)) + +def _arch(cu): + return cu.dwarfinfo.config.machine_arch + +def _desc_reg(reg_no, cu): + return describe_reg_name(reg_no, _arch(cu), True).upper() + +def _desc_operation(op, op_name, args, cu): + # Not sure about regx(regno) and bregx(regno, offset) + if 0x50 <= op <= 0x6f: # reg0...reg31 - decode reg name + return op_name + " " + _desc_reg(op - 0x50, cu) + elif 0x70 <= op <= 0x8f: # breg0...breg31(offset) - also decode reg name + return '%s %s%+d' % ( + op_name, + _desc_reg(op - 0x70, cu), + args[0]) + elif op_name in ('DW_OP_fbreg', 'DW_OP_bra', 'DW_OP_skip'): # Argument is decimal with a leading sign + return op_name + ' ' + "%+d" % (args[0]) + elif op_name in ('DW_OP_const1s', 'DW_OP_const2s'): # Argument is decimal without a leading sign + return op_name + ' ' + "%d" % (args[0]) + elif op_name in ('DW_OP_entry_value', 'DW_OP_GNU_entry_value'): # No space between opcode and args + return op_name + _desc_operationarg(args[0], cu) + elif op_name == 'DW_OP_regval_type': # Arg is a DIE pointer + return "%s %s (0x%08x -> 0x%08x) \"%s\"" % ( + op_name, + _desc_reg(args[0], cu), + args[1], + args[1] + cu.cu_offset, + _DIE_name(cu._get_cached_DIE(args[1] + cu.cu_offset))) + elif op_name == 'DW_OP_convert': # Arg is a DIE pointer + return "%s (0x%08x -> 0x%08x) \"%s\"" % ( + op_name, + args[0], + args[0] + cu.cu_offset, + _DIE_name(cu._get_cached_DIE(args[0] + cu.cu_offset))) + elif args: + return op_name + ' ' + ', '.join(_desc_operationarg(s, cu) for s in args) + else: + return op_name + +# TODO: remove this once dwarfdump catches up +UNSUPPORTED_OPS = ( + 'DW_OP_implicit_pointer', + 'DW_OP_deref_type', + 'DW_OP_GNU_parameter_ref', + 'DW_OP_GNU_deref_type', + 'DW_OP_GNU_implicit_pointer', + 'DW_OP_GNU_convert', + 'DW_OP_GNU_regval_type') + +def _desc_expression(expr, die): + cu = die.cu + if not hasattr(cu, '_exprparser'): + cu._exprparser = DWARFExprParser(cu.structs) + + parsed = cu._exprparser.parse_expr(expr) + # TODO: remove this once dwarfdump catches up + first_unsupported = next((i for (i, op) in enumerate(parsed) if op.op_name in UNSUPPORTED_OPS), None) + if first_unsupported is None: + lines = [_desc_operation(op.op, op.op_name, op.args, cu) for op in parsed] + else: + lines = [_desc_operation(op.op, op.op_name, op.args, cu) for op in parsed[0:first_unsupported]] + start_of_unparsed = parsed[first_unsupported].offset + lines.append(" " + " ".join("%02x" % b for b in expr[start_of_unparsed:])) + return ", ".join(lines) + +def _desc_datatype(attr, die): + """Oy vey + """ + return _desc_ref(attr, die, describe_cpp_datatype(die)) + +def _get_origin_name(die): + func_die = die.get_DIE_from_attribute('DW_AT_abstract_origin') + name = _safe_DIE_linkage_name(func_die, '') + if not name: + if 'DW_AT_specification' in func_die.attributes: + name = _DIE_linkage_name(func_die.get_DIE_from_attribute('DW_AT_specification')) + elif 'DW_AT_abstract_origin' in func_die.attributes: + return _get_origin_name(func_die) + return name + +def _desc_origin(attr, die): + return _desc_ref(attr, die, _get_origin_name(die)) + +def _desc_spec(attr, die): + return _desc_ref(attr, die, + _DIE_linkage_name(die.get_DIE_from_attribute('DW_AT_specification'))) + +def _desc_value(attr, die): + return str(attr.value) + +ATTR_DESCRIPTIONS = dict( + DW_AT_language=lambda attr, die: _desc_enum(attr, ENUM_DW_LANG), + DW_AT_encoding=lambda attr, die: _desc_enum(attr, ENUM_DW_ATE), + DW_AT_accessibility=lambda attr, die: _desc_enum(attr, ENUM_DW_ACCESS), + DW_AT_inline=lambda attr, die: _desc_enum(attr, ENUM_DW_INL), + DW_AT_decl_file=_desc_decl_file, + DW_AT_decl_line=_desc_value, + DW_AT_ranges=_desc_ranges, + DW_AT_location=_desc_locations, + DW_AT_data_member_location=lambda attr, die: _desc_data(attr, die) if attr.form.startswith('DW_FORM_data') else _desc_locations(attr, die), + DW_AT_frame_base=_desc_locations, + DW_AT_type=_desc_datatype, + DW_AT_call_line=_desc_value, + DW_AT_call_file=_desc_decl_file, + DW_AT_abstract_origin=_desc_origin, + DW_AT_specification=_desc_spec +) + +class ReadElf(object): + """ dump_xxx is used to dump the respective section. + Mimics the output of dwarfdump with --verbose + """ + def __init__(self, filename, file, output): + """ file: + stream object with the ELF file to read + + output: + output stream to write to + """ + self.elffile = ELFFile(file) + self.output = output + self._dwarfinfo = self.elffile.get_dwarf_info() + arches = {"EM_386": "i386", "EM_X86_64": "x86-64"} + arch = arches[self.elffile['e_machine']] + bits = self.elffile.elfclass + self._emitline("%s: file format elf%d-%s" % (filename, bits, arch)) + + def _emit(self, s=''): + """ Emit an object to output + """ + self.output.write(str(s)) + + def _emitline(self, s=''): + """ Emit an object to output, followed by a newline + """ + self.output.write(str(s).rstrip() + '\n') + + def dump_info(self): + # TODO: DWARF64 will cause discrepancies in hex offset sizes + self._emitline(".debug_info contents:") + for cu in self._dwarfinfo.iter_CUs(): + if cu.header.version >= 5: + ut = next(k for (k,v) in ENUM_DW_UT.items() if v == cu.header.unit_type) + unit_type_str = " unit_type = %s," % ut + else: + unit_type_str = '' + + self._emitline("0x%08x: Compile Unit: length = 0x%08x, format = DWARF%d, version = 0x%04x,%s abbr_offset = 0x%04x, addr_size = 0x%02x (next unit at 0x%08x)" %( + cu.cu_offset, + cu.header.unit_length, + cu.structs.dwarf_format, + cu.header.version, + unit_type_str, + cu.header.debug_abbrev_offset, + cu.header.address_size, + cu.cu_offset + (4 if cu.structs.dwarf_format == 32 else 12) + cu.header.unit_length)) + self._emitline() + parent = cu.get_top_DIE() + for die in cu.iter_DIEs(): + if die.get_parent() == parent: + parent = die + if not die.is_null(): + self._emitline("0x%08x: %s [%d] %s %s" % ( + die.offset, + die.tag, + die.abbrev_code, + '*' if die.has_children else '', + '(0x%08x)' % die.get_parent().offset if die.get_parent() is not None else '')) + for attr_name in die.attributes: + attr = die.attributes[attr_name] + self._emitline(" %s [%s] (%s)" % (attr_name, attr.form, self.describe_attr_value(die, attr))) + else: + self._emitline("0x%08x: NULL" % (die.offset,)) + parent = die.get_parent() + self._emitline() + + def describe_attr_value(self, die, attr): + """This describes the attribute value in the way that's compatible + with llvm_dwarfdump. Somewhat duplicates the work of describe_attr_value() in descriptions + """ + if attr.name in ATTR_DESCRIPTIONS: + return ATTR_DESCRIPTIONS[attr.name](attr, die) + elif attr.form in FORM_DESCRIPTIONS: + return FORM_DESCRIPTIONS[attr.form](attr, die) + else: + return str(attr.value) + + def dump_loc(self): + pass + + def dump_loclists(self): + pass + + def dump_ranges(self): + pass + + def dump_v4_rangelist(self, rangelist, cu_map): + cu = cu_map[rangelist[0].entry_offset] + addr_str_len = cu.header.address_size*2 + base_ip = _get_cu_base(cu) + for entry in rangelist: + if isinstance(entry, RangeEntry): + self._emitline("[0x%0*x, 0x%0*x)" % ( + addr_str_len, + (0 if entry.is_absolute else base_ip) + entry.begin_offset, + addr_str_len, + (0 if entry.is_absolute else base_ip) + entry.end_offset)) + elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry): + base_ip = entry.base_address + else: + raise NotImplementedError("Unknown object in a range list") + + def dump_rnglists(self): + self._emitline(".debug_rnglists contents:") + ranges_sec = self._dwarfinfo.range_lists() + if ranges_sec.version < 5: + return + + cu_map = {die.attributes['DW_AT_ranges'].value : cu # Dict from range offset to home CU + for cu in self._dwarfinfo.iter_CUs() + for die in cu.iter_DIEs() + if 'DW_AT_ranges' in die.attributes} + + for cu in ranges_sec.iter_CUs(): + self._emitline("0x%08x: range list header: length = 0x%08x, format = DWARF%d, version = 0x%04x, addr_size = 0x%02x, seg_size = 0x%02x, offset_entry_count = 0x%08x" % ( + cu.cu_offset, + cu.unit_length, + 64 if cu.is64 else 32, + cu.version, + cu.address_size, + cu.segment_selector_size, + cu.offset_count)) + self._emitline("ranges:") + if cu.offset_count > 0: + rangelists = [ranges_sec.get_range_list_at_offset_ex(offset) for offset in cu.offsets] + else: + rangelists = list(ranges_sec.iter_CU_range_lists_ex(cu)) + # We have to parse it completely before dumping, because dwarfdump aligns columns, + # no way to do that without some lookahead + max_type_len = max(len(entry.entry_type) for rangelist in rangelists for entry in rangelist) + for rangelist in rangelists: + self.dump_v5_rangelist(rangelist, cu_map, max_type_len) + + def dump_v5_rangelist(self, rangelist, cu_map, max_type_len): + cu = cu_map[rangelist[0].entry_offset] + addr_str_len = cu.header.address_size*2 + base_ip = _get_cu_base(cu) + for entry in rangelist: + type = entry.entry_type + self._emit("0x%08x: [%s]: " % (entry.entry_offset, type.ljust(max_type_len))) + if type == 'DW_RLE_base_address': + base_ip = entry.address + self._emitline("0x%0*x" % (addr_str_len, base_ip)) + elif type == 'DW_RLE_offset_pair': + self._emitline("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % ( + addr_str_len, entry.start_offset, + addr_str_len, entry.end_offset, + addr_str_len, entry.start_offset + base_ip, + addr_str_len, entry.end_offset + base_ip)) + elif type == 'DW_RLE_start_length': + self._emitline("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % ( + addr_str_len, entry.start_address, + addr_str_len, entry.length, + addr_str_len, entry.start_address, + addr_str_len, entry.start_address + entry.length)) + elif type == 'DW_RLE_start_end': + self._emitline("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % ( + addr_str_len, entry.start_address, + addr_str_len, entry.end_address, + addr_str_len, entry.start_address, + addr_str_len, entry.end_address)) + else: + raise NotImplementedError() + last = rangelist[-1] + self._emitline("0x%08x: [DW_RLE_end_of_list ]" % (last.entry_offset + last.entry_length,)) + +SCRIPT_DESCRIPTION = 'Display information about the contents of ELF format files' +VERSION_STRING = '%%(prog)s: based on pyelftools %s' % __version__ + +def main(stream=None): + # parse the command-line arguments and invoke ReadElf + argparser = argparse.ArgumentParser( + usage='usage: %(prog)s [options] ', + description=SCRIPT_DESCRIPTION, + add_help=False, + prog='readelf.py') + argparser.add_argument('file', + nargs='?', default=None, + help='ELF file to parse') + argparser.add_argument('-H', '--help', + action='store_true', dest='help', + help='Display this information') + argparser.add_argument('--verbose', + action='store_true', dest='verbose', + help=('For compatibility with dwarfdump. Non-verbose mode is not implemented.')) + + # Section dumpers + sections = ('info', 'loclists', 'rnglists') # 'loc', 'ranges' not implemented yet + for section in sections: + argparser.add_argument('--debug-%s' % section, + action='store_true', dest=section, + help=('Display the contents of DWARF debug_%s section.' % section)) + + args = argparser.parse_args() + + if args.help or not args.file: + argparser.print_help() + sys.exit(0) + + # A compatibility hack on top of a compatibility hack :( + del ENUM_DW_TAG["DW_TAG_template_type_param"] + del ENUM_DW_TAG["DW_TAG_template_value_param"] + ENUM_DW_TAG['DW_TAG_template_type_parameter'] = 0x2f + ENUM_DW_TAG['DW_TAG_template_value_parameter'] = 0x30 + + with open(args.file, 'rb') as file: + try: + readelf = ReadElf(args.file, file, stream or sys.stdout) + if args.info: + readelf.dump_info() + if args.loclists: + readelf.dump_loclists() + if args.rnglists: + readelf.dump_rnglists() + #if args.loc: + # readelf.dump_loc() + #if args.ranges: + # readelf.dump_ranges() + except ELFError as ex: + sys.stdout.flush() + sys.stderr.write('ELF error: %s\n' % ex) + if args.show_traceback: + traceback.print_exc() + sys.exit(1) + +#------------------------------------------------------------------------------- +if __name__ == '__main__': + main() + #profile_main() diff --git a/test/all_tests.py b/test/all_tests.py index 289a0fd..b7f60fc 100755 --- a/test/all_tests.py +++ b/test/all_tests.py @@ -23,6 +23,7 @@ def main(): run_test_script('test/run_all_unittests.py') run_test_script('test/run_examples_test.py') run_test_script('test/run_readelf_tests.py', '--parallel') + run_test_script('test/run_dwarfdump_tests.py', '--parallel') if __name__ == '__main__': sys.exit(main()) diff --git a/test/external_tools/README.txt b/test/external_tools/README.txt index 31feddf..968c677 100644 --- a/test/external_tools/README.txt +++ b/test/external_tools/README.txt @@ -7,3 +7,12 @@ readelf is built as follows: * git co binutils--branch * Run configure, then make * Built on a 64-bit Ubuntu machine + +llvm-dwarfdump is built as follows: + +* Used Debian v10 on x86_64 +* install gcc, git, cmake +* git clone https://github.com/llvm/llvm-project.git llvm +* cd llvm +* cmake -S llvm -B build -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release +* cmake --build build -- llvm-dwarfdump diff --git a/test/external_tools/llvm-dwarfdump b/test/external_tools/llvm-dwarfdump new file mode 100755 index 0000000..02bc4df Binary files /dev/null and b/test/external_tools/llvm-dwarfdump differ diff --git a/test/run_dwarfdump_tests.py b/test/run_dwarfdump_tests.py new file mode 100644 index 0000000..8209e80 --- /dev/null +++ b/test/run_dwarfdump_tests.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python +#------------------------------------------------------------------------------- +# test/run_dwarfdump_tests.py +# +# Automatic test runner for elftools & llvm-dwarfdump-11 +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import argparse +from difflib import SequenceMatcher +import logging +from multiprocessing import Pool +import os +import platform +import re +import sys +import time + +from utils import run_exe, is_in_rootdir, dump_output_to_temp_files + +# Make it possible to run this file from the root dir of pyelftools without +# installing pyelftools; useful for CI testing, etc. +sys.path[0:0] = ['.'] + +# Create a global logger object +testlog = logging.getLogger('run_tests') +testlog.setLevel(logging.DEBUG) +testlog.addHandler(logging.StreamHandler(sys.stdout)) + +# Following the readelf example, we ship our own. +if platform.system() == "Darwin": # MacOS + raise NotImplementedError("Not supported on MacOS") +elif platform.system() == "Windows": + raise NotImplementedError("Not supported on Windows") +else: + DWARFDUMP_PATH = 'test/external_tools/llvm-dwarfdump' + +def discover_testfiles(rootdir): + """ Discover test files in the given directory. Yield them one by one. + """ + for filename in os.listdir(rootdir): + _, ext = os.path.splitext(filename) + if ext == '.elf': + yield os.path.join(rootdir, filename) + + +def run_test_on_file(filename, verbose=False, opt=None): + """ Runs a test on the given input filename. Return True if all test + runs succeeded. + If opt is specified, rather that going over the whole + set of supported options, the test will only + run for one option. + """ + success = True + testlog.info("Test file '%s'" % filename) + if opt is None: + options = [ + '--debug-info' + ] + else: + options = [opt] + + for option in options: + if verbose: testlog.info("..option='%s'" % option) + + # stdouts will be a 2-element list: output of llvm-dwarfdump and output + # of scripts/dwarfdump.py + stdouts = [] + for exe_path in [DWARFDUMP_PATH, 'scripts/dwarfdump.py']: + args = [option, '--verbose', filename] + if verbose: testlog.info("....executing: '%s %s'" % ( + exe_path, ' '.join(args))) + t1 = time.time() + rc, stdout = run_exe(exe_path, args) + if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,)) + if rc != 0: + testlog.error("@@ aborting - '%s %s' returned '%s'" % (exe_path, option, rc)) + return False + stdouts.append(stdout) + if verbose: testlog.info('....comparing output...') + t1 = time.time() + rc, errmsg = compare_output(*stdouts) + if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,)) + if rc: + if verbose: testlog.info('.......................SUCCESS') + else: + success = False + testlog.info('.......................FAIL') + testlog.info('....for file %s' % filename) + testlog.info('....for option "%s"' % option) + testlog.info('....Output #1 is llvm-dwarfdump, Output #2 is pyelftools') + testlog.info('@@ ' + errmsg) + dump_output_to_temp_files(testlog, filename, option, *stdouts) + return success + + +def compare_output(s1, s2): + """ Compare stdout strings s1 and s2. + s1 is from llvm-dwarfdump, s2 from elftools dwarfdump.py + Return pair success, errmsg. If comparison succeeds, success is True + and errmsg is empty. Otherwise success is False and errmsg holds a + description of the mismatch. + """ + def prepare_lines(s): + return [line for line in s.lower().splitlines() if line.strip() != ''] + + lines1 = prepare_lines(s1) + lines2 = prepare_lines(s2) + + if len(lines1) != len(lines2): + return False, 'Number of lines different: %s vs %s' % ( + len(lines1), len(lines2)) + + for (i, (line1, line2)) in enumerate(zip(lines1, lines2)): + # Compare ignoring whitespace + lines1_parts = line1.split() + lines2_parts = line2.split() + + if ''.join(lines1_parts) != ''.join(lines2_parts): + sm = SequenceMatcher() + sm.set_seqs(lines1[i], lines2[i]) + changes = sm.get_opcodes() + + errmsg = 'Mismatch on line #%s:\n>>%s<<\n>>%s<<\n (%r)' % ( + i, line1, line2, changes) + return False, errmsg + return True, '' + +def main(): + if not is_in_rootdir(): + testlog.error('Error: Please run me from the root dir of pyelftools!') + return 1 + + argparser = argparse.ArgumentParser( + usage='usage: %(prog)s [options] [file] [file] ...', + prog='run_dwarfdump_tests.py') + argparser.add_argument('files', nargs='*', help='files to run tests on') + argparser.add_argument( + '--parallel', action='store_true', + help='run tests in parallel; always runs all tests w/o verbose') + argparser.add_argument('-V', '--verbose', + action='store_true', dest='verbose', + help='verbose output') + argparser.add_argument( + '-k', '--keep-going', + action='store_true', dest='keep_going', + help="Run all tests, don't stop at the first failure") + argparser.add_argument('--opt', + action='store', dest='opt', metavar='', + help= 'Limit the test one one dwarfdump option.') + args = argparser.parse_args() + + if args.parallel: + if args.verbose or args.keep_going == False: + print('WARNING: parallel mode disables verbosity and always keeps going') + + if args.verbose: + testlog.info('Running in verbose mode') + testlog.info('Python executable = %s' % sys.executable) + testlog.info('dwarfdump path = %s' % DWARFDUMP_PATH) + testlog.info('Given list of files: %s' % args.files) + + # If file names are given as command-line arguments, only these files + # are taken as inputs. Otherwise, autodiscovery is performed. + if len(args.files) > 0: + filenames = args.files + else: + filenames = sorted(discover_testfiles('test/testfiles_for_dwarfdump')) + + if len(filenames) > 1 and args.parallel: + pool = Pool() + results = pool.map(run_test_on_file, filenames) + failures = results.count(False) + else: + failures = 0 + for filename in filenames: + if not run_test_on_file(filename, args.verbose, args.opt): + failures += 1 + if not args.keep_going: + break + + if failures == 0: + testlog.info('\nConclusion: SUCCESS') + return 0 + elif args.keep_going: + testlog.info('\nConclusion: FAIL ({}/{})'.format( + failures, len(filenames))) + return 1 + else: + testlog.info('\nConclusion: FAIL') + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/test/run_examples_test.py b/test/run_examples_test.py index c5268f3..2335f88 100755 --- a/test/run_examples_test.py +++ b/test/run_examples_test.py @@ -63,7 +63,7 @@ def run_example_and_compare(example_path): return True else: testlog.info('.......FAIL comparison') - dump_output_to_temp_files(testlog, example_out, ref_str) + dump_output_to_temp_files(testlog, example_path, '', example_out, ref_str) return False diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 4c3df98..ad56f4e 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -117,7 +117,7 @@ def run_test_on_file(filename, verbose=False, opt=None): testlog.info('....for option "%s"' % option) testlog.info('....Output #1 is readelf, Output #2 is pyelftools') testlog.info('@@ ' + errmsg) - dump_output_to_temp_files(testlog, *stdouts) + dump_output_to_temp_files(testlog, filename, option, *stdouts) return success diff --git a/test/testfiles_for_dwarfdump/dwarf_gnuops4.so.elf b/test/testfiles_for_dwarfdump/dwarf_gnuops4.so.elf new file mode 100644 index 0000000..d9ffe9b Binary files /dev/null and b/test/testfiles_for_dwarfdump/dwarf_gnuops4.so.elf differ diff --git a/test/testfiles_for_dwarfdump/dwarf_lineprogramv5.elf b/test/testfiles_for_dwarfdump/dwarf_lineprogramv5.elf new file mode 100644 index 0000000..33f051b Binary files /dev/null and b/test/testfiles_for_dwarfdump/dwarf_lineprogramv5.elf differ diff --git a/test/testfiles_for_dwarfdump/dwarf_v5ops.so.elf b/test/testfiles_for_dwarfdump/dwarf_v5ops.so.elf new file mode 100644 index 0000000..9da7825 Binary files /dev/null and b/test/testfiles_for_dwarfdump/dwarf_v5ops.so.elf differ diff --git a/test/utils.py b/test/utils.py index 8eedacf..4669cf8 100644 --- a/test/utils.py +++ b/test/utils.py @@ -32,13 +32,14 @@ def is_in_rootdir(): return os.path.isdir('test') and os.path.isdir('elftools') -def dump_output_to_temp_files(testlog, *args): +def dump_output_to_temp_files(testlog, filename, option, *args): """ Dumps the output strings given in 'args' to temp files: one for each - arg. + arg. The filename and option arguments contribute to the file name, + so that one knows which test did the output dump come from. """ for i, s in enumerate(args): fd, path = tempfile.mkstemp( - prefix='out' + str(i + 1) + '_', + prefix='out-%d-%s-%s-' % (i + 1, os.path.split(filename)[-1], option), suffix='.stdout') file = os.fdopen(fd, 'w') file.write(s)