From db6df4b89083d9832362a67dd9da434cb412fd55 Mon Sep 17 00:00:00 2001 From: Pierre-Marie de Rodat Date: Mon, 28 Aug 2017 22:05:58 -0400 Subject: [PATCH] Add parsing and readelf dumping for .eh_frame (#155) * Fix the byte size for R_X86_64_PC32 relocations * Describe the address for DW_CFA_advance_loc with only 8 columns * Add .eh_frame section decoding and dump in readelf.py --- elftools/common/py3compat.py | 5 + elftools/dwarf/callframe.py | 254 +++++++++++++++++++++++++++++++-- elftools/dwarf/descriptions.py | 2 +- elftools/dwarf/dwarfinfo.py | 20 ++- elftools/dwarf/enums.py | 23 +++ elftools/dwarf/structs.py | 19 +-- elftools/elf/elffile.py | 20 +-- elftools/elf/relocation.py | 2 +- scripts/readelf.py | 99 ++++++++++--- test/run_readelf_tests.py | 13 -- test/test_callframe.py | 4 +- 11 files changed, 389 insertions(+), 72 deletions(-) diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py index 4878bf3..f481043 100644 --- a/elftools/common/py3compat.py +++ b/elftools/common/py3compat.py @@ -23,6 +23,9 @@ if PY3: def str2bytes(s): return s.encode('latin-1') def int2byte(i):return bytes((i,)) def byte2int(b): return b + def iterbytes(b): + for i in range(len(b)): + yield b[i:i+1] ifilter = filter @@ -39,6 +42,8 @@ else: def str2bytes(s): return s int2byte = chr byte2int = ord + def iterbytes(b): + return iter(b) from itertools import ifilter diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 22f6a71..127e2d8 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -9,7 +9,9 @@ import copy from collections import namedtuple from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos) -from ..common.py3compat import iterkeys +from ..common.py3compat import iterbytes, iterkeys +from ..construct import Struct, Switch +from .enums import DW_EH_encoding_flags from .structs import DWARFStructs from .constants import * @@ -17,10 +19,18 @@ from .constants import * class CallFrameInfo(object): """ DWARF CFI (Call Frame Info) + Note that this also supports unwinding information as found in .eh_frame + sections: its format differs slightly from the one in .debug_frame. See + . + stream, size: A stream holding the .debug_frame section, and the size of the section in it. + address: + Virtual address for this section. This is used to decode relative + addresses. + base_structs: The structs to be used as the base for parsing this section. Eventually, each entry gets its own structs based on the initial @@ -34,9 +44,11 @@ class CallFrameInfo(object): such as guessing which CU contains which FDEs (based on their address ranges) and taking the address_size from those CUs. """ - def __init__(self, stream, size, base_structs): + def __init__(self, stream, size, address, base_structs, + for_eh_frame=False): self.stream = stream self.size = size + self.address = address self.base_structs = base_structs self.entries = None @@ -45,6 +57,11 @@ class CallFrameInfo(object): # header field which contains a stream offset. self._entry_cache = {} + # The .eh_frame and .debug_frame section use almost the same CFI + # encoding, but there are tiny variations we need to handle during + # parsing. + self.for_eh_frame = for_eh_frame + def get_entries(self): """ Get a list of entries that constitute this CFI. The list consists of CIE or FDE objects, in the order of their appearance in the @@ -74,6 +91,10 @@ class CallFrameInfo(object): entry_length = struct_parse( self.base_structs.Dwarf_uint32(''), self.stream, offset) + + if self.for_eh_frame and entry_length == 0: + return ZERO(offset) + dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32 entry_structs = DWARFStructs( @@ -85,28 +106,42 @@ class CallFrameInfo(object): CIE_id = struct_parse( entry_structs.Dwarf_offset(''), self.stream) - is_CIE = ( - (dwarf_format == 32 and CIE_id == 0xFFFFFFFF) or - CIE_id == 0xFFFFFFFFFFFFFFFF) + if self.for_eh_frame: + is_CIE = CIE_id == 0 + else: + is_CIE = ( + (dwarf_format == 32 and CIE_id == 0xFFFFFFFF) or + CIE_id == 0xFFFFFFFFFFFFFFFF) + # Parse the header, which goes up to and excluding the sequence of + # instructions. if is_CIE: - header_struct = entry_structs.Dwarf_CIE_header + header_struct = (entry_structs.EH_CIE_header + if self.for_eh_frame else + entry_structs.Dwarf_CIE_header) + header = struct_parse( + header_struct, self.stream, offset) else: - header_struct = entry_structs.Dwarf_FDE_header + header = self._parse_fde_header(entry_structs, offset) - # Parse the header, which goes up to and including the - # return_address_register field - header = struct_parse( - header_struct, self.stream, offset) # If this is DWARF version 4 or later, we can have a more precise # address size, read from the CIE header. - if entry_structs.dwarf_version >= 4: + if not self.for_eh_frame and entry_structs.dwarf_version >= 4: entry_structs = DWARFStructs( little_endian=entry_structs.little_endian, dwarf_format=entry_structs.dwarf_format, address_size=header.address_size) + # If the augmentation string is not empty, hope to find a length field + # in order to skip the data specified augmentation. + if is_CIE: + aug_bytes, aug_dict = self._parse_cie_augmentation( + header, entry_structs) + else: + cie = self._parse_cie_for_fde(offset, header, entry_structs) + aug_bytes = self._read_augmentation_data(entry_structs) + # For convenience, compute the end offset for this entry end_offset = ( offset + header.length + @@ -120,12 +155,15 @@ class CallFrameInfo(object): if is_CIE: self._entry_cache[offset] = CIE( header=header, instructions=instructions, offset=offset, + augmentation_dict=aug_dict, + augmentation_bytes=aug_bytes, structs=entry_structs) + else: # FDE - with preserve_stream_pos(self.stream): - cie = self._parse_entry_at(header['CIE_pointer']) + cie = self._parse_cie_for_fde(offset, header, entry_structs) self._entry_cache[offset] = FDE( header=header, instructions=instructions, offset=offset, + augmentation_bytes=aug_bytes, structs=entry_structs, cie=cie) return self._entry_cache[offset] @@ -193,6 +231,172 @@ class CallFrameInfo(object): offset = self.stream.tell() return instructions + def _parse_cie_for_fde(self, fde_offset, fde_header, entry_structs): + """ Parse the CIE that corresponds to an FDE. + """ + # Determine the offset of the CIE that corresponds to this FDE + if self.for_eh_frame: + # CIE_pointer contains the offset for a reverse displacement from + # the section offset of the CIE_pointer field itself (not from the + # FDE header offset). + cie_displacement = fde_header['CIE_pointer'] + cie_offset = (fde_offset + entry_structs.dwarf_format // 8 + - cie_displacement) + else: + cie_offset = fde_header['CIE_pointer'] + + # Then read it + with preserve_stream_pos(self.stream): + return self._parse_entry_at(cie_offset) + + def _parse_cie_augmentation(self, header, entry_structs): + """ Parse CIE augmentation data from the annotation string in `header`. + + Return a tuple that contains 1) the augmentation data as a string + (without the length field) and 2) the augmentation data as a dict. + """ + augmentation = header.get('augmentation') + if not augmentation: + return ('', {}) + + # Augmentation parsing works in minimal mode here: we need the length + # field to be able to skip unhandled augmentation fields. + assert augmentation.startswith(b'z'), ( + 'Unhandled augmentation string: {}'.format(repr(augmentation))) + + available_fields = { + b'z': entry_structs.Dwarf_uleb128('length'), + b'L': entry_structs.Dwarf_uint8('LSDA_encoding'), + b'R': entry_structs.Dwarf_uint8('FDE_encoding'), + b'S': True, + b'P': Struct( + 'personality', + entry_structs.Dwarf_uint8('encoding'), + Switch('function', lambda ctx: ctx.encoding & 0x0f, { + enc: fld_cons('function') + for enc, fld_cons + in self._eh_encoding_to_field(entry_structs).items()})), + } + + # Build the Struct we will be using to parse the augmentation data. + # Stop as soon as we are not able to match the augmentation string. + fields = [] + aug_dict = {} + + for b in iterbytes(augmentation): + try: + fld = available_fields[b] + except KeyError: + break + + if fld is True: + aug_dict[fld] = True + else: + fields.append(fld) + + # Read the augmentation twice: once with the Struct, once for the raw + # bytes. Read the raw bytes last so we are sure we leave the stream + # pointing right after the augmentation: the Struct may be incomplete + # (missing trailing fields) due to an unknown char: see the KeyError + # above. + offset = self.stream.tell() + struct = Struct('Augmentation_Data', *fields) + aug_dict.update(struct_parse(struct, self.stream, offset)) + self.stream.seek(offset) + aug_bytes = self._read_augmentation_data(entry_structs) + return (aug_bytes, aug_dict) + + def _read_augmentation_data(self, entry_structs): + """ Read augmentation data. + + This assumes that the augmentation string starts with 'z', i.e. that + augmentation data is prefixed by a length field, which is not returned. + """ + if not self.for_eh_frame: + return b'' + + augmentation_data_length = struct_parse( + Struct('Dummy_Augmentation_Data', + entry_structs.Dwarf_uleb128('length')), + self.stream)['length'] + return self.stream.read(augmentation_data_length) + + def _parse_fde_header(self, entry_structs, offset): + """ Compute a struct to parse the header of the current FDE. + """ + if not self.for_eh_frame: + return struct_parse(entry_structs.Dwarf_FDE_header, self.stream, + offset) + + fields = [entry_structs.Dwarf_initial_length('length'), + entry_structs.Dwarf_offset('CIE_pointer')] + + # Parse the couple of header fields that are always here so we can + # fetch the corresponding CIE. + minimal_header = struct_parse(Struct('eh_frame_minimal_header', + *fields), self.stream, offset) + cie = self._parse_cie_for_fde(offset, minimal_header, entry_structs) + initial_location_offset = self.stream.tell() + + # Try to parse the initial location. We need the initial location in + # order to create a meaningful FDE, so assume it's there. Omission does + # not seem to happen in practice. + encoding = cie.augmentation_dict['FDE_encoding'] + assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit'] + basic_encoding = encoding & 0x0f + encoding_modifier = encoding & 0xf0 + + # Depending on the specified encoding, complete the header Struct + formats = self._eh_encoding_to_field(entry_structs) + fields.append(formats[basic_encoding]('initial_location')) + fields.append(formats[basic_encoding]('address_range')) + + result = struct_parse(Struct('Dwarf_FDE_header', *fields), + self.stream, offset) + + if encoding_modifier == 0: + pass + + elif encoding_modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']: + # Start address is relative to the address of the + # "initial_location" field. + result['initial_location'] += ( + self.address + initial_location_offset) + else: + assert False, 'Unsupported encoding: {:#x}'.format(encoding) + + return result + + def _eh_encoding_to_field(self, entry_structs): + """ + Return a mapping from basic encodings (DW_EH_encoding_flags) the + corresponding field constructors (for instance + entry_structs.Dwarf_uint32). + """ + return { + DW_EH_encoding_flags['DW_EH_PE_absptr']: + entry_structs.Dwarf_uint32 + if entry_structs.dwarf_format == 32 else + entry_structs.Dwarf_uint64, + DW_EH_encoding_flags['DW_EH_PE_uleb128']: + entry_structs.Dwarf_uleb128, + DW_EH_encoding_flags['DW_EH_PE_udata2']: + entry_structs.Dwarf_uint16, + DW_EH_encoding_flags['DW_EH_PE_udata4']: + entry_structs.Dwarf_uint32, + DW_EH_encoding_flags['DW_EH_PE_udata8']: + entry_structs.Dwarf_uint64, + + DW_EH_encoding_flags['DW_EH_PE_sleb128']: + entry_structs.Dwarf_sleb128, + DW_EH_encoding_flags['DW_EH_PE_sdata2']: + entry_structs.Dwarf_int16, + DW_EH_encoding_flags['DW_EH_PE_sdata4']: + entry_structs.Dwarf_int32, + DW_EH_encoding_flags['DW_EH_PE_sdata8']: + entry_structs.Dwarf_int64, + } + def instruction_name(opcode): """ Given an opcode, return the instruction name. @@ -224,14 +428,23 @@ class CFIEntry(object): Contains a header and a list of instructions (CallFrameInstruction). offset: the offset of this entry from the beginning of the section cie: for FDEs, a CIE pointer is required + augmentation_dict: Augmentation data as a parsed struct (dict): see + CallFrameInfo._parse_cie_augmentation and + http://www.airs.com/blog/archives/460. + augmentation_bytes: Augmentation data as a chain of bytes: see + CallFrameInfo._parse_cie_augmentation and + http://www.airs.com/blog/archives/460. """ - def __init__(self, header, structs, instructions, offset, cie=None): + def __init__(self, header, structs, instructions, offset, + augmentation_dict={}, augmentation_bytes=b'', cie=None): self.header = header self.structs = structs self.instructions = instructions self.offset = offset self.cie = cie self._decoded_table = None + self.augmentation_dict = augmentation_dict + self.augmentation_bytes = augmentation_bytes def get_decoded(self): """ Decode the CFI contained in this entry and return a @@ -374,6 +587,17 @@ class FDE(CFIEntry): pass +class ZERO(object): + """ End marker for the sequence of CIE/FDE. + + This is specific to `.eh_frame` sections: this kind of entry does not exist + in pure DWARF. `readelf` displays these as "ZERO terminator", hence the + class name. + """ + def __init__(self, offset): + self.offset = offset + + class RegisterRule(object): """ Register rules are used to find registers in call frames. Each rule consists of a type (enumeration following DWARFv3 section 6.4.1) diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 2dbd699..3a07607 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -86,7 +86,7 @@ def describe_CFI_instructions(entry): 'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'): _assert_FDE_instruction(instr) factored_offset = instr.args[0] * cie['code_alignment_factor'] - s += ' %s: %s to %016x\n' % ( + s += ' %s: %s to %08x\n' % ( name, factored_offset, factored_offset + pc) pc += factored_offset elif name in ( 'DW_CFA_remember_state', 'DW_CFA_restore_state', diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 330a238..b8faf9d 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -27,12 +27,14 @@ from .aranges import ARanges # name: section name in the container file # global_offset: the global offset of the section in its container file # size: the size of the section's data, in bytes +# address: the virtual address for the section's data # # 'name' and 'global_offset' are for descriptional purposes only and -# aren't strictly required for the DWARF parsing to work. +# aren't strictly required for the DWARF parsing to work. 'address' is required +# to properly decode the special '.eh_frame' format. # DebugSectionDescriptor = namedtuple('DebugSectionDescriptor', - 'stream name global_offset size') + 'stream name global_offset size address') # Some configuration parameters for the DWARF reader. This exists to allow @@ -96,6 +98,15 @@ class DWARFInfo(object): # Cache for abbrev tables: a dict keyed by offset self._abbrevtable_cache = {} + @property + def has_debug_info(self): + """ Return whether this contains debug information. + + It can be not the case when the ELF only contains .eh_frame, which is + encoded DWARF but not actually for debugging. + """ + return bool(self.debug_info_sec) + def iter_CUs(self): """ Yield all the compile units (CompileUnit objects) in the debug info """ @@ -154,6 +165,7 @@ class DWARFInfo(object): cfi = CallFrameInfo( stream=self.debug_frame_sec.stream, size=self.debug_frame_sec.size, + address=self.debug_frame_sec.address, base_structs=self.structs) return cfi.get_entries() @@ -168,7 +180,9 @@ class DWARFInfo(object): cfi = CallFrameInfo( stream=self.eh_frame_sec.stream, size=self.eh_frame_sec.size, - base_structs=self.structs) + address=self.eh_frame_sec.address, + base_structs=self.structs, + for_eh_frame=True) return cfi.get_entries() def get_aranges(self): diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py index 9140f91..903e7d5 100644 --- a/elftools/dwarf/enums.py +++ b/elftools/dwarf/enums.py @@ -283,3 +283,26 @@ ENUM_DW_FORM = dict( # Inverse mapping for ENUM_DW_FORM DW_FORM_raw2name = dict((v, k) for k, v in iteritems(ENUM_DW_FORM)) +# See http://www.airs.com/blog/archives/460 +DW_EH_encoding_flags = dict( + DW_EH_PE_absptr = 0x00, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + + DW_EH_PE_signed = 0x08, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0a, + DW_EH_PE_sdata4 = 0x0b, + DW_EH_PE_sdata8 = 0x0c, + + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + DW_EH_PE_indirect = 0x80, + + DW_EH_PE_omit = 0xff, +) diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index b21bd93..9234cf8 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -253,6 +253,16 @@ class DWARFStructs(object): ) def _create_callframe_entry_headers(self): + self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', + self.Dwarf_initial_length('length'), + self.Dwarf_offset('CIE_id'), + self.Dwarf_uint8('version'), + CString('augmentation'), + self.Dwarf_uleb128('code_alignment_factor'), + self.Dwarf_sleb128('data_alignment_factor'), + self.Dwarf_uleb128('return_address_register')) + self.EH_CIE_header = self.Dwarf_CIE_header + # The CIE header was modified in DWARFv4. if self.dwarf_version == 4: self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', @@ -265,15 +275,6 @@ class DWARFStructs(object): self.Dwarf_uleb128('code_alignment_factor'), self.Dwarf_sleb128('data_alignment_factor'), self.Dwarf_uleb128('return_address_register')) - else: - self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', - self.Dwarf_initial_length('length'), - self.Dwarf_offset('CIE_id'), - self.Dwarf_uint8('version'), - CString('augmentation'), - self.Dwarf_uleb128('code_alignment_factor'), - self.Dwarf_sleb128('data_alignment_factor'), - self.Dwarf_uleb128('return_address_register')) self.Dwarf_FDE_header = Struct('Dwarf_FDE_header', self.Dwarf_initial_length('length'), diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index fedd14d..02a279c 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -145,8 +145,9 @@ class ELFFile(object): We assume that if it has the .debug_info or .zdebug_info section, it has all the other required sections as well. """ - return bool(self.get_section_by_name('.debug_info')) or \ - bool(self.get_section_by_name('.zdebug_info')) + return (self.get_section_by_name('.debug_info') or + self.get_section_by_name('.zdebug_info') or + self.get_section_by_name('.eh_frame')) def get_dwarf_info(self, relocate_dwarf_sections=True): """ Return a DWARFInfo object representing the debugging information in @@ -158,7 +159,6 @@ class ELFFile(object): # Expect that has_dwarf_info was called, so at least .debug_info is # present. # Sections that aren't found will be passed as None to DWARFInfo. - # section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str', '.debug_line', '.debug_frame', @@ -168,9 +168,13 @@ class ELFFile(object): if compressed: section_names = tuple(map(lambda x: '.z' + x[1:], section_names)) + # As it is loaded in the process image, .eh_frame cannot be compressed + section_names += ('.eh_frame', ) + (debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name, debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name, - debug_loc_sec_name, debug_ranges_sec_name) = section_names + debug_loc_sec_name, debug_ranges_sec_name, + eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -181,7 +185,7 @@ class ELFFile(object): dwarf_section = self._read_dwarf_section( section, relocate_dwarf_sections) - if compressed: + if compressed and secname.startswith('.z'): dwarf_section = self._decompress_dwarf_section(dwarf_section) debug_sections[secname] = dwarf_section @@ -194,8 +198,7 @@ class ELFFile(object): debug_aranges_sec=debug_sections[debug_aranges_sec_name], debug_abbrev_sec=debug_sections[debug_abbrev_sec_name], debug_frame_sec=debug_sections[debug_frame_sec_name], - # TODO(eliben): reading of eh_frame is not hooked up yet - eh_frame_sec=None, + eh_frame_sec=debug_sections[eh_frame_sec_name], debug_str_sec=debug_sections[debug_str_sec_name], debug_loc_sec=debug_sections[debug_loc_sec_name], debug_ranges_sec=debug_sections[debug_ranges_sec_name], @@ -413,7 +416,8 @@ class ELFFile(object): stream=section_stream, name=section.name, global_offset=section['sh_offset'], - size=section['sh_size']) + size=section['sh_size'], + address=section['sh_addr']) @staticmethod def _decompress_dwarf_section(section): diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index c202bf9..633bbf5 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -240,7 +240,7 @@ class RelocationHandler(object): ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE( bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE( - bytesize=8, has_addend=True, + bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel), ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), diff --git a/scripts/readelf.py b/scripts/readelf.py index 8f50e22..b19bf18 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -19,7 +19,7 @@ sys.path.insert(0, '.') from elftools import __version__ from elftools.common.exceptions import ELFError from elftools.common.py3compat import ( - ifilter, byte2int, bytes2str, itervalues, str2bytes) + ifilter, byte2int, bytes2str, itervalues, str2bytes, iterbytes) from elftools.elf.elffile import ELFFile from elftools.elf.dynamic import DynamicSection, DynamicSegment from elftools.elf.enums import ENUM_D_TAG @@ -48,7 +48,7 @@ from elftools.dwarf.descriptions import ( ) from elftools.dwarf.constants import ( DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file) -from elftools.dwarf.callframe import CIE, FDE +from elftools.dwarf.callframe import CIE, FDE, ZERO class ReadElf(object): @@ -852,6 +852,8 @@ class ReadElf(object): def _dump_debug_info(self): """ Dump the debugging info section. """ + if not self._dwarfinfo.has_debug_info: + return self._emitline('Contents of the %s section:\n' % self._dwarfinfo.debug_info_sec.name) # Offset of the .debug_info section in the stream @@ -905,6 +907,8 @@ class ReadElf(object): """ Dump the (decoded) line programs from .debug_line The programs are dumped in the order of the CUs they belong to. """ + if not self._dwarfinfo.has_debug_info: + return self._emitline('Decoded dump of debug contents of section %s:\n' % self._dwarfinfo.debug_line_sec.name) for cu in self._dwarfinfo.iter_CUs(): @@ -963,14 +967,16 @@ class ReadElf(object): # Another readelf oddity... self._emitline() - def _dump_debug_frames(self): - """ Dump the raw frame information from .debug_frame + def _dump_frames_info(self, section, cfi_entries): + """ Dump the raw call frame info in a section. + + `section` is the Section instance that contains the call frame info + while `cfi_entries` must be an iterable that yields the sequence of + CIE or FDE instances. """ - if not self._dwarfinfo.has_CFI(): - return - self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_frame_sec.name) + self._emitline('Contents of the %s section:' % section.name) - for entry in self._dwarfinfo.CFI_entries(): + for entry in cfi_entries: if isinstance(entry, CIE): self._emitline('\n%08x %s %s CIE' % ( entry.offset, @@ -981,8 +987,14 @@ class ReadElf(object): self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor']) self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor']) self._emitline(' Return address column: %d' % entry['return_address_register']) + if entry.augmentation_bytes: + self._emitline(' Augmentation data: {}'.format(' '.join( + '{:02x}'.format(ord(b)) + for b in iterbytes(entry.augmentation_bytes) + ))) self._emitline() - else: # FDE + + elif isinstance(entry, FDE): self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % ( entry.offset, self._format_hex(entry['length'], fullhex=True, lead0x=False), @@ -992,10 +1004,34 @@ class ReadElf(object): self._format_hex( entry['initial_location'] + entry['address_range'], fullhex=True, lead0x=False))) + if entry.augmentation_bytes: + self._emitline(' Augmentation data: {}'.format(' '.join( + '{:02x}'.format(ord(b)) + for b in iterbytes(entry.augmentation_bytes) + ))) + + else: # ZERO terminator + assert isinstance(entry, ZERO) + self._emitline('\n%08x ZERO terminator' % entry.offset) + continue self._emit(describe_CFI_instructions(entry)) self._emitline() + def _dump_debug_frames(self): + """ Dump the raw frame info from .debug_frame and .eh_frame sections. + """ + if self._dwarfinfo.has_EH_CFI(): + self._dump_frames_info( + self._dwarfinfo.eh_frame_sec, + self._dwarfinfo.EH_CFI_entries()) + self._emitline() + + if self._dwarfinfo.has_CFI(): + self._dump_frames_info( + self._dwarfinfo.debug_frame_sec, + self._dwarfinfo.CFI_entries()) + def _dump_debug_aranges(self): """ Dump the aranges table """ @@ -1034,15 +1070,16 @@ class ReadElf(object): self._format_hex(0, fullhex=True, lead0x=False), self._format_hex(0, fullhex=True, lead0x=False))) - def _dump_debug_frames_interp(self): - """ Dump the interpreted (decoded) frame information from .debug_frame - """ - if not self._dwarfinfo.has_CFI(): - return + def _dump_frames_interp_info(self, section, cfi_entries): + """ Dump interpreted (decoded) frame information in a section. - self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_frame_sec.name) + `section` is the Section instance that contains the call frame info + while `cfi_entries` must be an iterable that yields the sequence of + CIE or FDE instances. + """ + self._emitline('Contents of the %s section:' % section.name) - for entry in self._dwarfinfo.CFI_entries(): + for entry in cfi_entries: if isinstance(entry, CIE): self._emitline('\n%08x %s %s CIE "%s" cf=%d df=%d ra=%d' % ( entry.offset, @@ -1053,7 +1090,8 @@ class ReadElf(object): entry['data_alignment_factor'], entry['return_address_register'])) ra_regnum = entry['return_address_register'] - else: # FDE + + elif isinstance(entry, FDE): self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % ( entry.offset, self._format_hex(entry['length'], fullhex=True, lead0x=False), @@ -1064,12 +1102,18 @@ class ReadElf(object): fullhex=True, lead0x=False))) ra_regnum = entry.cie['return_address_register'] + else: # ZERO terminator + assert isinstance(entry, ZERO) + self._emitline('\n%08x ZERO terminator' % entry.offset) + continue + + # Print the heading row for the decoded table self._emit(' LOC') self._emit(' ' if entry.structs.address_size == 4 else ' ') self._emit(' CFA ') - # Decode the table nad look at the registers it describes. + # Decode the table and look at the registers it describes. # We build reg_order here to match readelf's order. In particular, # registers are sorted by their number, and the register matching # ra_regnum is always listed last with a special heading. @@ -1084,8 +1128,8 @@ class ReadElf(object): self._emit('%-6s' % describe_reg_name(regnum)) self._emitline('ra ') - # Now include ra_regnum in reg_order to print its values similarly - # to the other registers. + # Now include ra_regnum in reg_order to print its values + # similarly to the other registers. reg_order.append(ra_regnum) else: self._emitline() @@ -1104,6 +1148,21 @@ class ReadElf(object): self._emitline() self._emitline() + def _dump_debug_frames_interp(self): + """ Dump the interpreted (decoded) frame information from .debug_frame + and .eh_framae sections. + """ + if self._dwarfinfo.has_EH_CFI(): + self._dump_frames_interp_info( + self._dwarfinfo.eh_frame_sec, + self._dwarfinfo.EH_CFI_entries()) + self._emitline() + + if self._dwarfinfo.has_CFI(): + self._dump_frames_interp_info( + self._dwarfinfo.debug_frame_sec, + self._dwarfinfo.CFI_entries()) + def _emit(self, s=''): """ Emit an object to output """ diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 7ccbb4d..65ef656 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -107,23 +107,10 @@ def compare_output(s1, s2): """ def prepare_lines(s): return [line for line in s.lower().splitlines() if line.strip() != ''] - def filter_readelf_lines(lines): - filter_out = False - for line in lines: - if 'of the .eh_frame section' in line: - filter_out = True - elif 'of the .debug_frame section' in line or \ - 'of the .zdebug_frame section' in line: - filter_out = False - if not filter_out: - if not line.startswith('unknown: length'): - yield line lines1 = prepare_lines(s1) lines2 = prepare_lines(s2) - lines1 = list(filter_readelf_lines(lines1)) - flag_after_symtable = False if len(lines1) != len(lines2): diff --git a/test/test_callframe.py b/test/test_callframe.py index 617f2c8..5e5c0d5 100644 --- a/test/test_callframe.py +++ b/test/test_callframe.py @@ -63,7 +63,7 @@ class TestCallFrame(unittest.TestCase): s.write(data) structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) - cfi = CallFrameInfo(s, len(data), structs) + cfi = CallFrameInfo(s, len(data), 0, structs) entries = cfi.get_entries() self.assertEqual(len(entries), 2) @@ -137,7 +137,7 @@ class TestCallFrame(unittest.TestCase): s = BytesIO(data) structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) - cfi = CallFrameInfo(s, len(data), structs) + cfi = CallFrameInfo(s, len(data), 0, structs) entries = cfi.get_entries() set_global_machine_arch('x86') -- 2.30.2