if PY3:
import io
+ from pathlib import Path
StringIO = io.StringIO
BytesIO = io.BytesIO
ifilter = filter
maxint = sys.maxsize
+
+ def path_to_posix(s):
+ return Path(s).as_posix()
+
else:
import cStringIO
+ import os
+ import posixpath
+
StringIO = BytesIO = cStringIO.StringIO
def bytes2hex(b, sep=''):
maxint = sys.maxint
+ def path_to_posix(s):
+ return posixpath.join(*os.path.split(s))
+
def iterkeys(d):
"""Return an iterator over the keys of a dictionary."""
return '(indirect string, offset: 0x%x): %s' % (
attr.raw_value, bytes2str(attr.value))
+def _describe_attr_line_strp(attr, die, section_offset):
+ return '(indirect line string, offset: 0x%x): %s' % (
+ attr.raw_value, bytes2str(attr.value))
+
def _describe_attr_string(attr, die, section_offset):
return bytes2str(attr.value)
DW_FORM_udata=_describe_attr_value_passthrough,
DW_FORM_string=_describe_attr_string,
DW_FORM_strp=_describe_attr_strp,
+ DW_FORM_line_strp=_describe_attr_line_strp,
DW_FORM_block1=_describe_attr_block,
DW_FORM_block2=_describe_attr_block,
DW_FORM_block4=_describe_attr_block,
DW_LANG_UPC: '(Unified Parallel C)',
DW_LANG_D: '(D)',
DW_LANG_Python: '(Python)',
+ DW_LANG_OpenCL: '(OpenCL)',
+ DW_LANG_Go: '(Go)',
+ DW_LANG_Modula3: '(Modula 3)',
+ DW_LANG_Haskell: '(Haskell)',
+ DW_LANG_C_plus_plus_03: '(C++03)',
+ DW_LANG_C_plus_plus_11: '(C++11)',
+ DW_LANG_OCaml: '(OCaml)',
+ DW_LANG_Rust: '(Rust)',
+ DW_LANG_C11: '(C11)',
+ DW_LANG_Swift: '(Swift)',
+ DW_LANG_Julia: '(Julia)',
+ DW_LANG_Dylan: '(Dylan)',
+ DW_LANG_C_plus_plus_14: '(C++14)',
+ DW_LANG_Fortran03: '(Fortran 03)',
+ DW_LANG_Fortran08: '(Fortran 08)',
+ DW_LANG_RenderScript: '(RenderScript)',
+ DW_LANG_BLISS: '(Bliss)', # Not in binutils
DW_LANG_Mips_Assembler: '(MIPS assembler)',
DW_LANG_HP_Bliss: '(HP Bliss)',
DW_LANG_HP_Basic91: '(HP Basic 91)',
DW_LANG_HP_Pascal91: '(HP Pascal 91)',
DW_LANG_HP_IMacro: '(HP IMacro)',
- DW_LANG_HP_Assembler: '(HP assembler)',
+ DW_LANG_HP_Assembler: '(HP assembler)'
}
_DESCR_DW_ATE = {
""" Given a CU object, fetch the line program it points to from the
.debug_line section.
If the CU doesn't point to a line program, return None.
+
+ Note about directory and file names. They are returned as two collections
+ in the lineprogram object's header - include_directory and file_entry.
+
+ In DWARFv5, they have introduced a different, extensible format for those
+ collections. So in a lineprogram v5+, there are two more collections in
+ the header - directories and file_names. Those might contain extra DWARFv5
+ information that is not exposed in include_directory and file_entry.
"""
# The line program is pointed to by the DW_AT_stmt_list attribute of
# the top DIE of a CU.
self.debug_line_sec.stream,
debug_line_offset)
+ # DWARF5: resolve names
+ def resolve_strings(self, lineprog_header, format_field, data_field):
+ if lineprog_header.get(format_field, False):
+ data = lineprog_header[data_field]
+ for field in lineprog_header[format_field]:
+ def replace_value(data, content_type, replacer):
+ for entry in data:
+ entry[content_type] = replacer(entry[content_type])
+
+ if field.form == 'DW_FORM_line_strp':
+ replace_value(data, field.content_type, self.get_string_from_linetable)
+ elif field.form == 'DW_FORM_strp':
+ replace_value(data, field.content_type, self.get_string_from_table)
+ elif field.form in ('DW_FORM_strp_sup', 'DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4'):
+ raise NotImplementedError()
+
+ resolve_strings(self, lineprog_header, 'directory_entry_format', 'directories')
+ resolve_strings(self, lineprog_header, 'file_name_entry_format', 'file_names')
+
+ # DWARF5: provide compatible file/directory name arrays for legacy lineprogram consumers
+ if lineprog_header.get('directories', False):
+ lineprog_header.include_directory = tuple(d.DW_LNCT_path for d in lineprog_header.directories)
+ if lineprog_header.get('file_names', False):
+ translate = namedtuple("file_entry", "name dir_index mtime length")
+ lineprog_header.file_entry = tuple(
+ translate(e.get('DW_LNCT_path'), e.get('DW_LNCT_directory_index'), e.get('DW_LNCT_timestamp'), e.get('DW_LNCT_size'))
+ for e in lineprog_header.file_names)
+
# Calculate the offset to the next line program (see DWARF 6.2.4)
end_offset = ( debug_line_offset + lineprog_header['unit_length'] +
structs.initial_length_field_size())
DW_EH_PE_omit = 0xff,
)
+
+ENUM_DW_LNCT = dict(
+ DW_LNCT_path = 0x1,
+ DW_LNCT_directory_index = 0x2,
+ DW_LNCT_timestamp = 0x3,
+ DW_LNCT_size = 0x4,
+ DW_LNCT_MD5 = 0x5,
+ DW_LNCT_lo_user = 0x2000,
+ DW_LNCT_hi_user = 0x3fff
+)
+
+ENUM_DW_UT = dict(
+ DW_UT_compile = 0x01,
+ DW_UT_type = 0x02,
+ DW_UT_partial = 0x03,
+ DW_UT_skeleton = 0x04,
+ DW_UT_split_compile = 0x05,
+ DW_UT_split_type = 0x06,
+ DW_UT_lo_user = 0x80,
+ DW_UT_hi_user = 0xff
+)
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
+from elftools.construct.core import Subconstruct
+from elftools.construct.macros import Embedded
from ..construct import (
UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
- CString, Embed, StaticField, IfThenElse
+ CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence
)
from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
from .enums import *
self.Dwarf_uleb128('mtime'),
self.Dwarf_uleb128('length')))))
+ class FormattedEntry(Construct):
+ # Generates a parser based on a previously parsed piece,
+ # similar to deprecared Dynamic.
+ # Strings are resolved later, since it potentially requires
+ # looking at another section.
+ def __init__(self, name, structs, format_field):
+ Construct.__init__(self, name)
+ self.structs = structs
+ self.format_field = format_field
+
+ def _parse(self, stream, context):
+ # Somewhat tricky technique here, explicitly writing back to the context
+ if self.format_field + "_parser" in context:
+ parser = context[self.format_field + "_parser"]
+ else:
+ fields = tuple(
+ Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
+ for f in context[self.format_field])
+ parser = Struct('formatted_entry', *fields)
+ context[self.format_field + "_parser"] = parser
+ return parser._parse(stream, context)
+
+ ver5 = lambda ctx: ctx.version >= 5
+
self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
self.Dwarf_initial_length('unit_length'),
self.Dwarf_uint16('version'),
+ If(ver5,
+ self.Dwarf_uint8("address_size"),
+ None),
+ If(ver5,
+ self.Dwarf_uint8("segment_selector_size"),
+ None),
self.Dwarf_offset('header_length'),
self.Dwarf_uint8('minimum_instruction_length'),
- If(lambda ctx: ctx['version'] >= 4,
+ If(lambda ctx: ctx.version >= 4,
self.Dwarf_uint8("maximum_operations_per_instruction"),
1),
self.Dwarf_uint8('default_is_stmt'),
self.Dwarf_int8('line_base'),
self.Dwarf_uint8('line_range'),
self.Dwarf_uint8('opcode_base'),
- Array(lambda ctx: ctx['opcode_base'] - 1,
+ Array(lambda ctx: ctx.opcode_base - 1,
self.Dwarf_uint8('standard_opcode_lengths')),
- RepeatUntilExcluding(
- lambda obj, ctx: obj == b'',
- CString('include_directory')),
- RepeatUntilExcluding(
- lambda obj, ctx: len(obj.name) == 0,
- self.Dwarf_lineprog_file_entry),
- )
+ If(ver5,
+ PrefixedArray(
+ Struct('directory_entry_format',
+ Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
+ Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
+ self.Dwarf_uint8("directory_entry_format_count"))),
+ If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
+ PrefixedArray(
+ FormattedEntry('directories', self, "directory_entry_format"),
+ self.Dwarf_uleb128('directories_count'))),
+ If(ver5,
+ PrefixedArray(
+ Struct('file_name_entry_format',
+ Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
+ Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
+ self.Dwarf_uint8("file_name_entry_format_count"))),
+ If(ver5,
+ PrefixedArray(
+ FormattedEntry('file_names', self, "file_name_entry_format"),
+ self.Dwarf_uleb128('file_names_count'))),
+ # Legacy directories/files - DWARF < 5 only
+ If(lambda ctx: ctx.version < 5,
+ RepeatUntilExcluding(
+ lambda obj, ctx: obj == b'',
+ CString('include_directory'))),
+ If(lambda ctx: ctx.version < 5,
+ RepeatUntilExcluding(
+ lambda obj, ctx: len(obj.name) == 0,
+ self.Dwarf_lineprog_file_entry)) # array name is file_entry
+ )
def _create_callframe_entry_headers(self):
self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
descs.append(desc)
return 'x86 feature: ' + ', '.join(descs)
+def describe_note_gnu_property_x86_isa_1(value):
+ descs = []
+ for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS:
+ if value & mask:
+ descs.append(desc)
+ return 'x86 ISA needed: ' + ', '.join(descs)
+
def describe_note_gnu_properties(properties):
descriptions = []
for prop in properties:
prop_desc = ' <corrupt length: 0x%x>' % sz
else:
prop_desc = describe_note_gnu_property_x86_feature_1(d)
+ elif t == 'GNU_PROPERTY_X86_ISA_1_NEEDED':
+ if sz != 4:
+ prop_desc = ' <corrupt length: 0x%x>' % sz
+ else:
+ prop_desc = describe_note_gnu_property_x86_isa_1(d)
elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC:
prop_desc = '<processor-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER:
(8, 'LAM_U57'),
)
+# Same for GNU_PROPERTY_X86_SET_1_xxx
+_DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS = (
+ (1, 'x86-64-baseline'),
+ # TODO; there is a long list
+)
+
def _reverse_dict(d, low_priority=()):
"""
GNU_PROPERTY_STACK_SIZE=1,
GNU_PROPERTY_NO_COPY_ON_PROTECTED=2,
GNU_PROPERTY_X86_FEATURE_1_AND=0xc0000002,
+ GNU_PROPERTY_X86_ISA_1_NEEDED=0xc0008002,
_default_=Pass,
)
sys.path[0:0] = ['.', '..']
from elftools.elf.elffile import ELFFile
+from elftools.common.py3compat import path_to_posix
def process_file(filename):
print(' Top DIE with tag=%s' % top_DIE.tag)
# We're interested in the filename...
- print(' name=%s' % top_DIE.get_full_path())
+ print(' name=%s' % path_to_posix(top_DIE.get_full_path()))
# Display DIEs recursively starting with top_DIE
die_info_rec(top_DIE)
from collections import defaultdict
import os
import sys
+import posixpath
# If pyelftools is not installed, the example can also run from the root or
# examples/ dir of the source distribution.
return file_entry.name.decode()
directory = lp_header["include_directory"][dir_index - 1]
- return os.path.join(directory, file_entry.name).decode()
+ return posixpath.join(directory, file_entry.name).decode()
if __name__ == '__main__':
sys.path[0:0] = ['.', '..']
from elftools.elf.elffile import ELFFile
+from elftools.common.py3compat import path_to_posix
def process_file(filename):
print(' Top DIE with tag=%s' % top_DIE.tag)
# We're interested in the filename...
- print(' name=%s' % top_DIE.get_full_path())
+ print(' name=%s' % path_to_posix(top_DIE.get_full_path()))
if __name__ == '__main__':
if sys.argv[1] == '--test':
from elftools.dwarf.locationlists import LocationParser, LocationEntry
from elftools.dwarf.callframe import CIE, FDE, ZERO
from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
+from elftools.dwarf.enums import ENUM_DW_UT
class ReadElf(object):
self._emitline(' Length: %s (%s)' % (
self._format_hex(cu['unit_length']),
'%s-bit' % cu.dwarf_format()))
- self._emitline(' Version: %s' % cu['version']),
+ self._emitline(' Version: %s' % cu['version'])
+ if cu.header.get("unit_type", False):
+ ut = next((key for key, value in ENUM_DW_UT.items() if value == cu.header.unit_type), '?')
+ self._emitline(' Unit Type: %s (%d)' % (ut, cu.header.unit_type))
self._emitline(' Abbrev Offset: %s' % (
self._format_hex(cu['debug_abbrev_offset']))),
self._emitline(' Pointer Size: %s' % cu['address_size'])
for cu in self._dwarfinfo.iter_CUs():
lineprogram = self._dwarfinfo.line_program_for_CU(cu)
+ ver5 = lineprogram.header.version >= 5
cu_filename = bytes2str(lineprogram['file_entry'][0].name)
if len(lineprogram['include_directory']) > 0:
cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
self._emitline('CU: %s:' % cu_filename)
- self._emitline('File name Line number Starting address Stmt')
+ self._emitline('File name Line number Starting address View Stmt' if ver5
+ else 'File name Line number Starting address Stmt')
+ # What goes into View on V5? To be seen...
# Print each state's file, line and address information. For some
# instructions other output is needed to be compatible with
'0' if state.address == 0 else self._format_hex(state.address),
'x' if state.is_stmt and not state.end_sequence else ''))
else:
- self._emitline('%-35s %11d %18s[%d] %s' % (
+ # What's the deal with op_index after address on DWARF 5? Is omitting it
+ # a function of DWARF version, or ISA, or what?
+ # Used to be unconditional, even on non-VLIW machines.
+ self._emitline('%-35s %s %18s%s %s' % (
bytes2str(lineprogram['file_entry'][state.file - 1].name),
- state.line if not state.end_sequence else '-',
+ "%11d" % (state.line,) if not state.end_sequence else '-',
'0' if state.address == 0 else self._format_hex(state.address),
- state.op_index,
+ '' if ver5 else '[%d]' % (state.op_index,),
'x' if state.is_stmt and not state.end_sequence else ''))
if entry.command == DW_LNS_copy:
# Another readelf oddity...
testlog.info('.......................SKIPPED')
continue
+ # sevaa says: there is another shorted out test; in dwarf_lineprogramv5.elf, the two bytes at 0x2072 were
+ # patched from 0x07 0x10 to 00 00.
+ # Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction
+ # from "DW_CFA_undefined 16" to two NOPs.
+ # GNU readelf had a bug here, had to work around. See PR #411.
+
# stdouts will be a 2-element list: output of readelf and output
# of scripts/readelf.py
stdouts = []