From 3b9ad8276fc99eadd1b2495a078d35a388bbd1d8 Mon Sep 17 00:00:00 2001 From: eliben Date: Thu, 22 Sep 2011 11:46:26 +0300 Subject: [PATCH] more preparations for full DIE parsing: - DWARFStructs got a new target_addr field that reflects the size of address fields in a CU - DWARFInfo now gives access to the symbol table - fixed stream parsing bugs that happened because the stream was not being preserved during parsing while issuing calls to other APIs that also move the stream --- elftools/common/utils.py | 15 +++++++++++ elftools/dwarf/die.py | 50 +++++++++++++++++++++++++++++-------- elftools/dwarf/dwarfinfo.py | 37 +++++++++++++++++++++++---- elftools/dwarf/structs.py | 31 ++++++++++++++++++----- z.py | 3 +++ 5 files changed, 114 insertions(+), 22 deletions(-) diff --git a/elftools/common/utils.py b/elftools/common/utils.py index a36b3ec..5358072 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -41,3 +41,18 @@ def _assert_with_exception(cond, msg, exception_type): if not cond: raise exception_type(msg) + +from contextlib import contextmanager + +@contextmanager +def preserve_stream_pos(stream): + """ Usage: + + # stream has some position FOO (return value of stream.tell()) + with preserve_stream_pos(stream): + # do stuff that manipulates the stream + # stream still has position FOO + """ + saved_pos = stream.tell() + yield + stream.seek(saved_pos) diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 4a034fb..fa1e5b8 100644 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -9,12 +9,23 @@ from collections import namedtuple from ..common.ordereddict import OrderedDict -from ..common.utils import struct_parse +from ..common.utils import struct_parse, preserve_stream_pos -# Describes an attribute value in the DIE: form and actual value +# Describes an attribute value in the DIE: # -AttributeValue = namedtuple('AttributeValue', 'form value') +# form: +# The DW_FORM_* name of this attribute +# +# value: +# The value parsed from the section and translated accordingly to the form +# (e.g. for a DW_FORM_strp it's the actual string taken from the string table) +# +# raw_value: +# Raw value as parsed from the section - used for debugging and presentation +# (e.g. for a DW_FORM_strp it's the raw string offset into the table) +# +AttributeValue = namedtuple('AttributeValue', 'form value raw_value') class DIE(object): @@ -30,34 +41,51 @@ class DIE(object): The size this DIE occupies in the section attributes: - An ordered dictionary mapping attribute names to values + An ordered dictionary mapping attribute names to values. It's + ordered to enable both efficient name->value mapping and + preserve the order of attributes in the section """ def __init__(self, cu, stream, offset): """ cu: CompileUnit object this DIE belongs to. Used to obtain context information (structs, abbrev table, etc.) - + stream, offset: The stream and offset into it where this DIE's data is located """ self.cu = cu + self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context self.stream = stream self.offset = offset + self.attributes = OrderedDict() self._parse_DIE() def _parse_DIE(self): """ Parses the DIE info from the section, based on the abbreviation table of the CU """ - saved_offset = self.offset + print self.offset, self.cu.structs.dwarf_format structs = self.cu.structs - # The DIE begins with the abbreviation code. Read it and use it to - # obtain the abbrev declaration for this DIE + # A DIE begins with the abbreviation code. Read it and use it to + # obtain the abbrev declaration for this DIE. + # Note: here and elsewhere, preserve_stream_pos is used on operations + # that manipulate the stream by reading data from it. # - abbrev_code = struct_parse(structs.Dwarf_uleb128(''), self.stream) - abbrev = self.cu.get_abbrev_table().get_abbrev(abbrev_code) + abbrev_code = struct_parse( + structs.Dwarf_uleb128(''), self.stream, self.offset) + with preserve_stream_pos(self.stream): + abbrev = self.cu.get_abbrev_table().get_abbrev(abbrev_code) + + print '**', abbrev_code, abbrev, abbrev.decl - print abbrev_code, abbrev, abbrev.decl + # Guided by the attributes listed in the abbreviation declaration, parse + # values from the stream. + # + for name, form in abbrev.iter_attr_specs(): + print '** parsing at stream + ', self.stream.tell() + raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream) + print '**', name, form, raw_value + #~ print structs.Dwarf_dw_form[form] diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 343b576..4301643 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -8,6 +8,7 @@ #------------------------------------------------------------------------------- from collections import namedtuple +from ..construct import CString from ..common.exceptions import DWARFError from ..common.utils import struct_parse, dwarf_assert from .structs import DWARFStructs @@ -48,10 +49,14 @@ class DWARFInfo(object): self.debug_line_loc = debug_line_loc self.little_endian = little_endian - self.dwarf_format = 32 + + # This is the DWARFStructs the context uses, so it doesn't depend on + # DWARF format and address_size (these are determined per CU) - so we + # set them to default values. self.structs = DWARFStructs( little_endian=self.little_endian, - dwarf_format=self.dwarf_format) + dwarf_format=32, + address_size=4) # Populate the list with CUs found in debug_info self._CU = self._parse_CUs() @@ -107,6 +112,15 @@ class DWARFInfo(object): "Offset '0x%x' to debug_info out of section bounds" % offset) return offset + self.debug_info_loc.offset + def get_string_from_table(self, offset): + """ Obtain a string from the string table section, given an offset + relative to the section. + """ + return struct_parse( + CString(''), + self.stream, + stream_pos=self.debug_str_loc.offset + offset) + def _parse_CUs(self): """ Parse CU entries from debug_info. """ @@ -124,14 +138,27 @@ class DWARFInfo(object): # initial_length = struct_parse( self.structs.Dwarf_uint32(''), self.stream, offset) - if initial_length == 0xFFFFFFFF: - self.dwarf_format = 64 + dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 + + # At this point we still haven't read the whole header, so we don't + # know the address_size. Therefore, we're going to create structs + # with a default address_size=4. If, after parsing the header, we + # find out address_size is actually 8, we just create a new structs + # object for this CU. + # cu_structs = DWARFStructs( little_endian=self.little_endian, - dwarf_format=self.dwarf_format) + dwarf_format=dwarf_format, + address_size=4) cu_header = struct_parse( cu_structs.Dwarf_CU_header, self.stream, offset) + if cu_header['address_size'] == 8: + cu_structs = DWARFStructs( + little_endian=self.little_endian, + dwarf_format=dwarf_format, + address_size=8) + cu_die_offset = self.stream.tell() dwarf_assert( self._is_supported_version(cu_header['version']), diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 536132b..0f3094c 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -8,8 +8,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- from ..construct import ( - UBInt8, UBInt16, UBInt32, UBInt64, - ULInt8, ULInt16, ULInt32, ULInt64, + UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum, PrefixedArray, CString, ) @@ -19,7 +18,8 @@ from .enums import * class DWARFStructs(object): """ Exposes Construct structs suitable for parsing information from DWARF - sections. Configurable with endianity and format (32 or 64-bit) + sections. Each compile unit in DWARF info can have its own structs + object. Accessible attributes (mostly described by in chapter 7 of the DWARF spec v3): @@ -30,6 +30,9 @@ class DWARFStructs(object): Dwarf_offset: 32-bit or 64-bit word, depending on dwarf_format + Dwarf_target_addr: + 32-bit or 64-bit word, depending on address size + Dwarf_initial_length: "Initial length field" encoding section 7.4 @@ -51,10 +54,22 @@ class DWARFStructs(object): See also the documentation of public methods. """ - def __init__(self, little_endian=True, dwarf_format=32): + def __init__(self, little_endian, dwarf_format, address_size): + """ little_endian: + True if the file is little endian, False if big + + dwarf_format: + DWARF Format: 32 or 64-bit (see spec section 7.4) + + address_size: + Target machine address size, in bytes (4 or 8). (See spec + section 7.5.1) + """ assert dwarf_format == 32 or dwarf_format == 64 + assert address_size == 8 or address_size == 4 self.little_endian = little_endian - self.dwarf_format = dwarf_format + self.dwarf_format = dwarf_format + self.address_size = address_size self._create_structs() def initial_lenght_field_size(self): @@ -69,12 +84,16 @@ class DWARFStructs(object): self.Dwarf_uint32 = ULInt32 self.Dwarf_uint64 = ULInt64 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64 + self.Dwarf_target_addr = ( + ULInt32 if self.address_size == 4 else ULInt64) else: self.Dwarf_uint8 = UBInt8 self.Dwarf_uint16 = UBInt16 self.Dwarf_uint32 = UBInt32 self.Dwarf_uint64 = UBInt64 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64 + self.Dwarf_target_addr = ( + UBInt32 if self.address_size == 4 else UBInt64) self._create_initial_length() self._create_leb128() @@ -120,7 +139,7 @@ class DWARFStructs(object): def _create_dw_form(self): self.Dwarf_dw_form = dict( - DW_FORM_addr=self.Dwarf_offset(''), + DW_FORM_addr=self.Dwarf_target_addr(''), DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8), DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16), diff --git a/z.py b/z.py index 013a7c7..4f2d848 100644 --- a/z.py +++ b/z.py @@ -21,7 +21,10 @@ print efile.has_dwarf_info() dwarfinfo = efile.get_dwarf_info() +print dwarfinfo.get_string_from_table(126) + cu = dwarfinfo.get_CU(1) +print 'CU header', cu.header print cu.get_top_DIE() #~ print dwarfinfo.structs.Dwarf_abbrev_entry.parse('\x13\x01\x01\x03\x50\x04\x00\x00') -- 2.30.2