From 067b3fd6c5046c0e4c95053fc94d68d8a1594db1 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 18 Nov 2011 12:02:57 +0200 Subject: [PATCH] the relocation manager for dwarf works! sdf --- elftools/common/exceptions.py | 3 + elftools/dwarf/die.py | 25 +++++++- elftools/dwarf/dwarfinfo.py | 17 ++++-- elftools/dwarf/dwarfrelocationmanager.py | 26 +++++---- elftools/elf/descriptions.py | 6 +- elftools/elf/elffile.py | 74 +++++++++++++++++++++++- scripts/readelf.py | 2 +- z.py | 4 +- 8 files changed, 130 insertions(+), 27 deletions(-) diff --git a/elftools/common/exceptions.py b/elftools/common/exceptions.py index c2da24c..26f1ba0 100644 --- a/elftools/common/exceptions.py +++ b/elftools/common/exceptions.py @@ -9,6 +9,9 @@ class ELFError(Exception): pass +class ELFRelocationError(ELFError): + pass + class ELFParseError(ELFError): pass diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 0836aa0..f438934 100644 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -29,7 +29,8 @@ from ..common.utils import struct_parse, preserve_stream_pos # (e.g. for a DW_FORM_strp it's the raw string offset into the table) # # offset: -# Offset of this attribute's value in the stream +# Offset of this attribute's value in the stream (absolute offset, relative +# the beginning of the whole stream) # AttributeValue = namedtuple( 'AttributeValue', 'name form value raw_value offset') @@ -159,12 +160,34 @@ class DIE(object): self.tag = abbrev_decl['tag'] self.has_children = abbrev_decl.has_children() + # The offset of the .debug_info section in the stream. Used to compute + # relative offset of attribute values to the beginning of the section. + section_offset = self.dwarfinfo.debug_info_loc.offset + + # Some attribute values need relocations. These are computed with the + # help of a DWARFRelocationManager for .debug_info, which is held by + # DWARFInfo for this purpose. + relocation_manager = self.dwarfinfo.relocation_manager['.debug_info'] + # Guided by the attributes listed in the abbreviation declaration, parse # values from the stream. # for name, form in abbrev_decl.iter_attr_specs(): attr_offset = self.stream.tell() raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream) + + # raw_value may need to be relocated, if there's a relocation + # registered for this offset in the relocation manager. + # Relocations are listed by offset relative to the beginning of + # the section. + offset_from_section = attr_offset - section_offset + if relocation_manager.has_relocation(offset_from_section): + # Applying the relocation may change the stream, so preserve it + with preserve_stream_pos(self.stream): + raw_value = relocation_manager.apply_relocation( + offset=offset_from_section, + value=raw_value) + value = self._translate_attr_value(form, raw_value) self.attributes[name] = AttributeValue( name=name, diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 051ea9f..1e2208b 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -14,6 +14,7 @@ from ..common.utils import struct_parse, dwarf_assert from .structs import DWARFStructs from .compileunit import CompileUnit from .abbrevtable import AbbrevTable +from .dwarfrelocationmanager import DWARFRelocationManager # Describes a debug section in a stream: offset and size @@ -27,7 +28,7 @@ class DWARFInfo(object): """ def __init__(self, stream, - little_endian, + elffile, debug_info_loc, debug_abbrev_loc, debug_str_loc, @@ -35,9 +36,9 @@ class DWARFInfo(object): """ stream: A stream (file-like object) that contains debug sections - little_endian: - Section contents are in little-endian data format - + elffile: + ELFFile reference + debug_*_loc: DebugSectionLocator for this section, specifying where it can be found in the stream @@ -48,7 +49,13 @@ class DWARFInfo(object): self.debug_str_loc = debug_str_loc self.debug_line_loc = debug_line_loc - self.little_endian = little_endian + self.elffile = elffile + self.little_endian = self.elffile.little_endian + + self.relocation_manager = {} + self.relocation_manager['.debug_info'] = DWARFRelocationManager( + elffile=self.elffile, + section_name='.debug_info') # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them diff --git a/elftools/dwarf/dwarfrelocationmanager.py b/elftools/dwarf/dwarfrelocationmanager.py index d8f7831..048e1c6 100644 --- a/elftools/dwarf/dwarfrelocationmanager.py +++ b/elftools/dwarf/dwarfrelocationmanager.py @@ -6,7 +6,6 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- - from ..elf.sections import RelocationSection @@ -20,28 +19,30 @@ class DWARFRelocationManager(object): self.section_name = section_name self._section = self.elffile.get_section_by_name(section_name) - # _relocs maps an offset in the section to a Relocation object + # _relocs maps an offset in the section to an index in the relocation + # table. # _reloc_section is the relocation section object # ... both are loaded by _load_relocations self._relocs = {} self._reloc_section = None self._load_relocations() - # _symtable: symbol table section attached to the relocation section - self._symtable = self.elffile.get_section( - self._reloc_section['sh_link']) - def has_relocation(self, offset): """ Does the given offset have a relocation registered for it? The offset is relative to its section. """ return offset in self._relocs - def apply_relocation(self, offset): - """ Apply the relocation registered for the given offset. Return the - relocated value. + def apply_relocation(self, offset, value): + """ Apply the relocation registered for the given offset. value is + the original value at that offset. Return the relocated value. """ - reloc = self._relocs[offset] + reloc_index = self._relocs[offset] + return self.elffile.apply_relocation( + reloc_section=self._reloc_section, + reloc_index=reloc_index, + offset=offset, + value=value) def _load_relocations(self): # Currently assume that only a single relocation section will exist @@ -53,7 +54,8 @@ class DWARFRelocationManager(object): if ( isinstance(section, RelocationSection) and section.name in reloc_section_names): self._reloc_section = section - for reloc in self._reloc_section.iter_relocations(): - self._relocs[reloc['r_offset']] = reloc + for i, reloc in enumerate( + self._reloc_section.iter_relocations()): + self._relocs[reloc['r_offset']] = i break diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index f404898..b7e87ab 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -68,10 +68,10 @@ def describe_symbol_visibility(x): def describe_symbol_shndx(x): return _DESCR_ST_SHNDX.get(x, '%3s' % x) -def describe_reloc_type(x, e_machine): - if e_machine in ('EM_386', 'EM_486'): +def describe_reloc_type(x, elffile): + if elffile.architecture_is_x86(): return _DESCR_RELOC_TYPE_i386.get(x, _unknown) - elif e_machine in ('EM_X86_64', 'EM_L10M'): + elif elffile.architecture_is_x64(): return _DESCR_RELOC_TYPE_x64.get(x, _unknown) else: return 'unrecognized: %-7x' % (x & 0xFFFFFFFF) diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index ebd04dd..0a12d4c 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -6,7 +6,7 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from ..common.exceptions import ELFError +from ..common.exceptions import ELFError, ELFRelocationError from ..common.utils import struct_parse, elf_assert from ..construct import ConstructError from .structs import ELFStructs @@ -14,6 +14,7 @@ from .sections import ( Section, StringTableSection, SymbolTableSection, NullSection, RelocationSection) from .segments import Segment, InterpSegment +from .enums import ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64 from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionLocator @@ -124,12 +125,34 @@ class ELFFile(object): return DWARFInfo( stream=self.stream, - little_endian=self.little_endian, + elffile=self, debug_info_loc=debug_sections['.debug_info'], debug_abbrev_loc=debug_sections['.debug_abbrev'], debug_str_loc=debug_sections['.debug_str'], debug_line_loc=debug_sections['.debug_line']) + def architecture_is_x86(self): + return self['e_machine'] in ('EM_386', 'EM_486') + + def architecture_is_x64(self): + return self['e_machine'] == 'EM_X86_64' + + def apply_relocation(self, reloc_section, reloc_index, offset, value): + """ Apply a relocation to the offset. The original value at offset is + also provided. Return a relocated value that should be written + back into the offset. + + The relocation to apply is specified by an index and a relocation + section where this index points. + + Throw ELFRelocationError if there's a problem with the relocation. + """ + # The symbol table associated with this relocation section + symtab = self.get_section(reloc_section['sh_link']) + # Relocation object + reloc = reloc_section.get_relocation(reloc_index) + return self._do_apply_relocation(reloc, symtab, offset, value) + #-------------------------------- PRIVATE --------------------------------# def __getitem__(self, name): @@ -242,10 +265,55 @@ class ELFFile(object): header=self._get_section_header(stringtable_section_num), name='', stream=self.stream) - + def _parse_elf_header(self): """ Parses the ELF file header and assigns the result to attributes of this object. """ return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0) + def _do_apply_relocation(self, reloc, symtab, offset, value): + # Only basic sanity checking here + if reloc['r_info_sym'] >= symtab.num_symbols(): + raise ELFRelocationError( + 'Invalid symbol reference in relocation: index %s' % ( + reloc['r_info_sym'])) + sym_value = symtab.get_symbol(reloc['r_info_sym'])['st_value'] + reloc_type = reloc['r_info_type'] + + if self.architecture_is_x86(): + if reloc.is_RELA(): + raise ELFRelocationError( + 'Unexpected RELA relocation for x86: %s' % reloc) + if reloc_type == ENUM_RELOC_TYPE_i386['R_386_NONE']: + # No relocation + return value + elif reloc_type == ENUM_RELOC_TYPE_i386['R_386_32']: + return sym_value + value + elif reloc_type == ENUM_RELOC_TYPE_i386['R_386_PC32']: + return sym_value + value - offset + else: + raise ELFRelocationError('Unsupported relocation type %s' % ( + reloc_type)) + elif self.architecture_is_x64(): + if not reloc.is_RELA(): + raise ELFRelocationError( + 'Unexpected REL relocation for x64: %s' % reloc) + if reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: + # No relocation + return value + elif reloc_type in ( + ENUM_RELOC_TYPE_x64['R_X86_64_64'], + ENUM_RELOC_TYPE_x64['R_X86_64_32'], + ENUM_RELOC_TYPE_x64['R_X86_64_32S']): + return sym_value + reloc['r_addend'] + else: + raise ELFRelocationError('Unsupported relocation type %s' % ( + reloc_type)) + else: + raise ELFRelocationError( + 'Relocations not supported for architecture %s' % ( + self['e_machine'])) + + raise ELFRelocationError('unreachable relocation code') + diff --git a/scripts/readelf.py b/scripts/readelf.py index 986b7ae..efbcf21 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -304,7 +304,7 @@ class ReadElf(object): self._format_hex(rel['r_info'], fieldsize=hexwidth, lead0x=False), describe_reloc_type( - rel['r_info_type'], self.elffile['e_machine']))) + rel['r_info_type'], self.elffile))) if rel['r_info_sym'] == 0: self._emitline() diff --git a/z.py b/z.py index 7c09211..8bf4743 100644 --- a/z.py +++ b/z.py @@ -29,10 +29,10 @@ print cu.structs.Dwarf_dw_form['DW_FORM_strp'].parse('\x01\x00\x00\x00\x01\x00\x print 'CU header', cu.header topdie = cu.get_top_DIE() -#print topdie +print topdie dinfo_sec = efile.get_section_by_name('.debug_info') relman = DWARFRelocationManager(efile, dinfo_sec.name) print relman._reloc_section.name, relman._reloc_section['sh_offset'] -pprint.pprint(relman._relocs) +#pprint.pprint(relman._relocs) -- 2.30.2