From ef943b79e3179070c36ea5803bb7ff2988129428 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 23 Nov 2011 16:01:52 +0200 Subject: [PATCH] still not working, but getting there. need to revise DWARF processing for per-section streams --- elftools/dwarf/abbrevtable.py | 2 - elftools/dwarf/dwarfinfo.py | 30 ++------ elftools/elf/elffile.py | 7 +- elftools/elf/relocation.py | 141 +++++++++++++++++++++------------- elftools/elf/structs.py | 6 +- z.py | 2 +- 6 files changed, 106 insertions(+), 82 deletions(-) diff --git a/elftools/dwarf/abbrevtable.py b/elftools/dwarf/abbrevtable.py index 328f49a..ad25aeb 100644 --- a/elftools/dwarf/abbrevtable.py +++ b/elftools/dwarf/abbrevtable.py @@ -22,8 +22,6 @@ class AbbrevTable(object): stream, offset: The stream and offset into the stream where this abbreviation table lives. - Note that this is the absolute offset into the stream, not - relative to the debug_abbrev section. """ self.structs = structs self.stream = stream diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index f32c6aa..3a66871 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -14,7 +14,6 @@ from ..common.utils import struct_parse, dwarf_assert from .structs import DWARFStructs from .compileunit import CompileUnit from .abbrevtable import AbbrevTable -from .dwarfrelocationmanager import DWARFRelocationManager # Describes a debug section @@ -24,7 +23,7 @@ from .dwarfrelocationmanager import DWARFRelocationManager # global_offset: the global offset of the section in its container file # size: the size of the section's data, in bytes # -DebugSectionDescriptor = namedtuple('DebugSectionLocator', +DebugSectionDescriptor = namedtuple('DebugSectionDescriptor', 'stream name global_offset size') @@ -45,7 +44,7 @@ class DWARFInfo(object): ELFFile reference debug_*_sec: - DebugSectionDescriptor for this section + DebugSectionDescriptor for a section """ self.elffile = elffile self.debug_info_sec = debug_info_sec @@ -55,11 +54,6 @@ class DWARFInfo(object): self.little_endian = self.elffile.little_endian - self.relocation_manager = {} - self.relocation_manager['.debug_info'] = DWARFRelocationManager( - elffile=self.elffile, - section_name='.debug_info') - # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them # to default values. @@ -108,25 +102,15 @@ class DWARFInfo(object): offset will return the same object). """ dwarf_assert( - offset < self.debug_abbrev_loc.size, + offset < self.debug_abbrev_sec.size, "Offset '0x%x' to abbrev table out of section bounds" % offset) if offset not in self._abbrevtable_cache: self._abbrevtable_cache[offset] = AbbrevTable( structs=self.structs, stream=self.stream, - offset=offset + self.debug_abbrev_loc.offset) + offset=offset) return self._abbrevtable_cache[offset] - def info_offset2absolute(self, offset): - """ Given an offset into the debug_info section, translate it to an - absolute offset into the stream. Raise an exception if the offset - exceeds the section bounds. - """ - dwarf_assert( - offset < self.debug_info_loc.size, - "Offset '0x%x' to debug_info out of section bounds" % offset) - return offset + self.debug_info_loc.offset - def get_string_from_table(self, offset): """ Obtain a string from the string table section, given an offset relative to the section. @@ -134,15 +118,15 @@ class DWARFInfo(object): return struct_parse( CString(''), self.stream, - stream_pos=self.debug_str_loc.offset + offset) + stream_pos=offset) #------ PRIVATE ------# def _parse_CUs(self): """ Parse CU entries from debug_info. """ - offset = self.debug_info_loc.offset - section_boundary = self.debug_info_loc.offset + self.debug_info_loc.size + offset = 0 + section_boundary = self.debug_info_sec.size CUlist = [] while offset < section_boundary: # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3 diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 1f81c31..a9ca631 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -13,9 +13,9 @@ from ..construct import ConstructError from .structs import ELFStructs from .sections import ( Section, StringTableSection, SymbolTableSection, NullSection) +from .relocation import RelocationSection, RelocationHandler from .segments import Segment, InterpSegment from .enums import ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64 -from .relocation import RelocationHandler from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor @@ -268,7 +268,10 @@ class ELFFile(object): """ self.stream.seek(section['sh_offset']) # The section data is read into a new stream, for processing - section_stream = StringIO(self.stream.read(section['sh_size'])) + section_stream = StringIO() + # Using .write instead of initializing StringIO with the string because + # such a StringIO from cStringIO is read-only. + section_stream.write(self.stream.read(section['sh_size'])) if relocate_dwarf_sections: reloc_handler = RelocationHandler(self) diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index abb35bd..b00c72d 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -9,6 +9,9 @@ from collections import namedtuple from ..common.exceptions import ELFRelocationError +from ..common.utils import elf_assert, struct_parse +from .sections import Section +from .enums import ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64 class Relocation(object): @@ -102,7 +105,7 @@ class RelocationHandler(object): # Find the relocation section aimed at this one. Currently assume # that either .rel or .rela section exists for this section, but # not both. - for relsection in self.iter_sections(): + for relsection in self.elffile.iter_sections(): if ( isinstance(relsection, RelocationSection) and relsection.name in reloc_section_names): return relsection @@ -119,70 +122,104 @@ class RelocationHandler(object): self._do_apply_relocation(stream, reloc, symtab) def _do_apply_relocation(self, stream, reloc, symtab): - # ZZZ: steps - # 1. Read the value from the stream (with correct size and endianness) - # 2. Apply the relocation to the value - # 3. Write the relocated value back into the stream - # - # To make it generic, have a map of "relocation recipes" per - # relocation. - # - - - # Some basic sanity checking - if self.architecture_is_x86() and reloc.is_RELA(): - raise ELFRelocationError( - 'Unexpected RELA relocation for x86: %s' % reloc) - elif self.architecture_is_x64() and not reloc.is_RELA(): - raise ELFRelocationError( - 'Unexpected REL relocation for x64: %s' % reloc) - + # Preparations for performing the relocation: obtain the value of + # the symbol mentioned in the relocation, as well as the relocation + # recipe which tells us how to actually perform it. + # All peppered with some sanity checking. if reloc['r_info_sym'] >= symtab.num_symbols(): raise ELFRelocationError( 'Invalid symbol reference in relocation: index %s' % ( reloc['r_info_sym'])) - sym_value = symtab.get_symbol(reloc['r_info_sym'])['st_value'] - reloc_type = reloc['r_info_type'] - if self.architecture_is_x86(): - if reloc_type == ENUM_RELOC_TYPE_i386['R_386_NONE']: - # No relocation - return value - elif reloc_type == ENUM_RELOC_TYPE_i386['R_386_32']: - return sym_value + value - elif reloc_type == ENUM_RELOC_TYPE_i386['R_386_PC32']: - return sym_value + value - offset - else: - raise ELFRelocationError('Unsupported relocation type %s' % ( - reloc_type)) - elif self.architecture_is_x64(): - if reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: - # No relocation - return value - elif reloc_type in ( - ENUM_RELOC_TYPE_x64['R_X86_64_64'], - ENUM_RELOC_TYPE_x64['R_X86_64_32'], - ENUM_RELOC_TYPE_x64['R_X86_64_32S']): - return sym_value + reloc['r_addend'] - else: - raise ELFRelocationError('Unsupported relocation type %s' % ( - reloc_type)) - else: + reloc_type = reloc['r_info_type'] + recipe = None + + if self.elffile.architecture_is_x86(): + if reloc.is_RELA(): + raise ELFRelocationError( + 'Unexpected RELA relocation for x86: %s' % reloc) + recipe = self._RELOCATION_RECIPES_X86.get(reloc_type, None) + elif self.elffile.architecture_is_x64(): + if not reloc.is_RELA(): + raise ELFRelocationError( + 'Unexpected REL relocation for x64: %s' % reloc) + recipe = self._RELOCATION_RECIPES_X64.get(reloc_type, None) + + if recipe is None: raise ELFRelocationError( - 'Relocations not supported for architecture %s' % ( - self['e_machine'])) + 'Unsupported relocation type: %s' % reloc_type) + + # So now we have everything we need to actually perform the relocation. + # Let's get to it: - raise ELFRelocationError('unreachable relocation code') + # 0. Find out which struct we're going to be using to read this value + # from the stream and write it back. + if recipe.bytesize == 4: + value_struct = self.elffile.structs.Elf_word('') + elif recipe.bytesize == 8: + value_struct = self.elffile.structs.Elf_word64('') + else: + raise ELFRelocationError('Invalid bytesize %s for relocation' % + recipe_bytesize) - # Relocations are represented by "recipes". Each recipe specifies + # 1. Read the value from the stream (with correct size and endianness) + original_value = struct_parse( + value_struct, + stream, + stream_pos=reloc['r_offset']) + # 2. Apply the relocation to the value, acting according to the recipe + relocated_value = recipe.calc_func( + value=original_value, + sym_value=sym_value, + offset=reloc['r_offset'], + addend=reloc['r_addend'] if recipe.has_addend else 0) + # 3. Write the relocated value back into the stream + stream.seek(reloc['r_offset']) + value_struct.build_stream(relocated_value, stream) + + # Relocations are represented by "recipes". Each recipe specifies: + # bytesize: The number of bytes to read (and write back) to the section. + # This is the unit of data on which relocation is performed. + # has_addend: Does this relocation have an extra addend? + # calc_func: A function that performs the relocation on an extracted + # value, and returns the updated value. + # _RELOCATION_RECIPE_TYPE = namedtuple('_RELOCATION_RECIPE_TYPE', 'bytesize has_addend calc_func') - def _reloc_calc_identity(value, offset, addend=0): + def _reloc_calc_identity(value, sym_value, offset, addend=0): return value + + def _reloc_calc_sym_plus_value(value, sym_value, offset, addend=0): + return sym_value + value + + def _reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0): + return sym_value + value - offset + + def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0): + return sym_value + addend - _RELOCATION_RECIPES = { - 'R_386_NONE': + _RELOCATION_RECIPES_X86 = { + ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), + ENUM_RELOC_TYPE_i386['R_386_32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=False, + calc_func=_reloc_calc_sym_plus_value), + ENUM_RELOC_TYPE_i386['R_386_PC32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=False, + calc_func=_reloc_calc_sym_plus_value_pcrel), } + + _RELOCATION_RECIPES_X64 = { + ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: _RELOCATION_RECIPE_TYPE( + bytesize=8, has_addend=True, calc_func=_reloc_calc_identity), + ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE( + bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + } + diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index d2d6e69..2c55d5b 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -20,10 +20,10 @@ from .enums import * class ELFStructs(object): """ Accessible attributes: - Elf_{byte|half|word|addr|offset|sword|xword|xsword}: + Elf_{byte|half|word|word64|addr|offset|sword|xword|xsword}: Data chunks, as specified by the ELF standard, adjusted for correct endianness and word-size. - + Elf_Ehdr: ELF file header @@ -50,6 +50,7 @@ class ELFStructs(object): self.Elf_byte = ULInt8 self.Elf_half = ULInt16 self.Elf_word = ULInt32 + self.Elf_word64 = ULInt64 self.Elf_addr = ULInt32 if self.elfclass == 32 else ULInt64 self.Elf_offset = self.Elf_addr self.Elf_sword = SLInt32 @@ -59,6 +60,7 @@ class ELFStructs(object): self.Elf_byte = UBInt8 self.Elf_half = UBInt16 self.Elf_word = UBInt32 + self.Elf_word64 = UBInt64 self.Elf_addr = UBInt32 if self.elfclass == 32 else UBInt64 self.Elf_offset = self.Elf_addr self.Elf_sword = SBInt32 diff --git a/z.py b/z.py index 8bf4743..b0cdfb2 100644 --- a/z.py +++ b/z.py @@ -7,7 +7,7 @@ from elftools.elf.structs import ELFStructs from elftools.elf.elffile import ELFFile from elftools.elf.sections import * -from elftools.dwarf.dwarfrelocationmanager import DWARFRelocationManager +from elftools.elf.relocation import * # read a little-endian, 64-bit file es = ELFStructs(True, 64) -- 2.30.2