From: eliben Date: Wed, 21 Sep 2011 09:09:53 +0000 (+0300) Subject: * completed implementation of abbrevtable X-Git-Tag: v0.10~102 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a98028e7d417087a79b9d3cb25cc2ff19655c112;p=pyelftools.git * completed implementation of abbrevtable * started implementing DIE * added code to CompileUnit to tie them all together DIEs start showing signs of life --- diff --git a/elftools/common/ordereddict.py b/elftools/common/ordereddict.py new file mode 100644 index 0000000..aabeafc --- /dev/null +++ b/elftools/common/ordereddict.py @@ -0,0 +1,282 @@ +# http://code.activestate.com/recipes/576693/ (r9) +# Created by Raymond Hettinger on Wed, 18 Mar 2009 (MIT) +# +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 +# and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. +# + +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) + + + + +#------------------------------------------------------------------------------- +if __name__ == "__main__": + od = OrderedDict() + d = dict() + + for key in ['joe', 'more', 'tem', 'opsdf', 'dsf']: + od[key] = d[key] = key + '1' + + for k in d: + print k, d[k] + + print '-------- ordered ----------' + + for k in od: + print k, od[k] diff --git a/elftools/dwarf/abbrevtable.py b/elftools/dwarf/abbrevtable.py index 14f88f5..328f49a 100644 --- a/elftools/dwarf/abbrevtable.py +++ b/elftools/dwarf/abbrevtable.py @@ -58,17 +58,28 @@ class AbbrevTable(object): class AbbrevDecl(object): """ Wraps a parsed abbreviation declaration, exposing its fields with dict-like access, and adding some convenience methods. + + The abbreviation declaration represents an "entry" that points to it. """ def __init__(self, code, decl): self.code = code self.decl = decl def has_children(self): + """ Does the entry have children? + """ return self['children_flag'] == 'DW_CHILDREN_yes' - + + def iter_attr_specs(self): + """ Iterate over the attribute specifications for the entry. Yield + (name, form) pairs. + """ + for attr_spec in self['attr_spec']: + # Ignore the terminating 'null' spec + if attr_spec.name == 'DW_AT_null': + break + yield attr_spec.name, attr_spec.form + def __getitem__(self, entry): return self.decl[entry] - - - diff --git a/elftools/dwarf/compileunit.py b/elftools/dwarf/compileunit.py index 92198e4..8852fac 100644 --- a/elftools/dwarf/compileunit.py +++ b/elftools/dwarf/compileunit.py @@ -6,10 +6,11 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from .die import DIE class CompileUnit(object): - def __init__(self, header, dwarfinfo, structs): + def __init__(self, header, dwarfinfo, structs, cu_die_offset): """ header: CU header for this compile unit @@ -18,16 +19,39 @@ class CompileUnit(object): structs: A DWARFStructs instance suitable for this compile unit + + cu_die_offset: + Offset in the stream of the top DIE of this CU """ self.dwarfinfo = dwarfinfo self.header = header self.structs = structs - + self.cu_die_offset = cu_die_offset + + # The abbreviation table for this CU. Filled lazily when DIEs are + # requested. + self._abbrev_table = None + + def get_abbrev_table(self): + """ Get the abbreviation table (AbbrevTable object) for this CU + """ + if self._abbrev_table is None: + self._abbrev_table = self.dwarfinfo.get_abbrev_table( + self['debug_abbrev_offset']) + return self._abbrev_table + + def get_top_DIE(self): + """ Get the top DIE (which is either a DW_TAG_compile_unit or + DW_TAG_partial_unit) of this CU + """ + return DIE( + cu=self, + stream=self.dwarfinfo.stream, + offset=self.cu_die_offset) + def __getitem__(self, name): """ Implement dict-like access to header entries """ return self.header[name] - - - + \ No newline at end of file diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py new file mode 100644 index 0000000..4a034fb --- /dev/null +++ b/elftools/dwarf/die.py @@ -0,0 +1,63 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/die.py +# +# DWARF Debugging Information Entry +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from collections import namedtuple + +from ..common.ordereddict import OrderedDict +from ..common.utils import struct_parse + + +# Describes an attribute value in the DIE: form and actual value +# +AttributeValue = namedtuple('AttributeValue', 'form value') + + +class DIE(object): + """ A DWARF debugging information entry. On creation, parses itself from + the stream. Each DIE is held by a CU. + + Accessible attributes: + + tag: + The DIE tag + + length: + The size this DIE occupies in the section + + attributes: + An ordered dictionary mapping attribute names to values + """ + def __init__(self, cu, stream, offset): + """ cu: + CompileUnit object this DIE belongs to. Used to obtain context + information (structs, abbrev table, etc.) + + stream, offset: + The stream and offset into it where this DIE's data is located + """ + self.cu = cu + self.stream = stream + self.offset = offset + self._parse_DIE() + + def _parse_DIE(self): + """ Parses the DIE info from the section, based on the abbreviation + table of the CU + """ + saved_offset = self.offset + structs = self.cu.structs + + # The DIE begins with the abbreviation code. Read it and use it to + # obtain the abbrev declaration for this DIE + # + abbrev_code = struct_parse(structs.Dwarf_uleb128(''), self.stream) + abbrev = self.cu.get_abbrev_table().get_abbrev(abbrev_code) + + print abbrev_code, abbrev, abbrev.decl + + diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 6e96f14..343b576 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -59,6 +59,22 @@ class DWARFInfo(object): # Cache for abbrev tables: a dict keyed by offset self._abbrevtable_cache = {} + def num_CUs(self): + """ Number of compile units in the debug info + """ + return len(self._CU) + + def get_CU(self, n): + """ Get the compile unit (CompileUnit object) at index #n + """ + return self._CU[n] + + def iter_CUs(self): + """ Yield all the compile units (CompileUnit objects) in the debug info + """ + for i in range(self.num_CUs()): + yield self.get_CU(i) + def get_abbrev_table(self, offset): """ Get an AbbrevTable from the given offset in the debug_abbrev section. @@ -81,6 +97,16 @@ class DWARFInfo(object): offset=offset + self.debug_abbrev_loc.offset) return self._abbrevtable_cache[offset] + def info_offset2absolute(self, offset): + """ Given an offset into the debug_info section, translate it to an + absolute offset into the stream. Raise an exception if the offset + exceeds the section bounds. + """ + dwarf_assert( + offset < self.debug_info_loc.size, + "Offset '0x%x' to debug_info out of section bounds" % offset) + return offset + self.debug_info_loc.offset + def _parse_CUs(self): """ Parse CU entries from debug_info. """ @@ -106,13 +132,15 @@ class DWARFInfo(object): cu_header = struct_parse( cu_structs.Dwarf_CU_header, self.stream, offset) + cu_die_offset = self.stream.tell() dwarf_assert( self._is_supported_version(cu_header['version']), "Expected supported DWARF version. Got '%s'" % cu_header['version']) CUlist.append(CompileUnit( header=cu_header, dwarfinfo=self, - structs=cu_structs)) + structs=cu_structs, + cu_die_offset=cu_die_offset)) # Compute the offset of the next CU in the section. The unit_length # field of the CU header contains its size not including the length # field itself. diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py index 8655e48..479733e 100644 --- a/elftools/dwarf/enums.py +++ b/elftools/dwarf/enums.py @@ -216,10 +216,6 @@ ENUM_DW_FORM = dict( DW_FORM_ref8 = 0x14, DW_FORM_ref_udata = 0x15, DW_FORM_indirect = 0x16, - DW_FORM_sec_offset = 0x17, - DW_FORM_exprloc = 0x18, - DW_FORM_flag_present = 0x19, - DW_FORM_ref_sig8 = 0x20, _default_ = Pass, ) diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index e77f98e..536132b 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -11,6 +11,7 @@ from ..construct import ( UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum, + PrefixedArray, CString, ) from .enums import * @@ -42,6 +43,11 @@ class DWARFStructs(object): Dwarf_abbrev_declaration: Abbreviation table declaration - doesn't include the initial code, only the contents. + + Dwarf_dw_form: + A dictionary mapping 'DW_FORM_*' keys into construct Structs + that parse such forms. These Structs have already been given + dummy names. See also the documentation of public methods. """ @@ -68,12 +74,13 @@ class DWARFStructs(object): self.Dwarf_uint16 = UBInt16 self.Dwarf_uint32 = UBInt32 self.Dwarf_uint64 = UBInt64 - self.Dwarf_offest = UBInt32 if self.dwarf_format == 32 else UBInt64 + self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64 self._create_initial_length() self._create_leb128() self._create_cu_header() self._create_abbrev_declaration() + self._create_dw_form() def _create_initial_length(self): def _InitialLength(name): @@ -107,10 +114,48 @@ class DWARFStructs(object): RepeatUntil( lambda obj, ctx: obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null', - Struct('spec', + Struct('attr_spec', Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT), Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)))) + def _create_dw_form(self): + self.Dwarf_dw_form = dict( + DW_FORM_addr=self.Dwarf_offset(''), + + DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8), + DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16), + DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32), + DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128), + + # All DW_FORM_data forms are assumed to be unsigned + DW_FORM_data1=self.Dwarf_uint8(''), + DW_FORM_data2=self.Dwarf_uint16(''), + DW_FORM_data4=self.Dwarf_uint32(''), + DW_FORM_data8=self.Dwarf_uint64(''), + DW_FORM_sdata=self.Dwarf_sleb128(''), + DW_FORM_udata=self.Dwarf_uleb128(''), + + DW_FORM_string=CString(''), + DW_FORM_strp=self.Dwarf_offset(''), + DW_FORM_flag=self.Dwarf_uint8(''), + + DW_FORM_ref1=self.Dwarf_uint8(''), + DW_FORM_ref2=self.Dwarf_uint16(''), + DW_FORM_ref4=self.Dwarf_uint32(''), + DW_FORM_ref8=self.Dwarf_uint64(''), + DW_FORM_ref_udata=self.Dwarf_uleb128(''), + DW_FORM_ref_addr=self.Dwarf_offset(''), + + DW_FORM_indirect=self.Dwarf_uleb128(''), + ) + + def _make_block_struct(self, length_field): + """ Create a struct for DW_FORM_block + """ + return PrefixedArray( + subcon=self.Dwarf_uint8('elem'), + length_field=length_field('')) + class _InitialLengthAdapter(Adapter): """ A standard Construct adapter that expects a sub-construct @@ -171,3 +216,4 @@ def _SLEB128(name): """ return Rename(name, _SLEB128Adapter(_LEB128_reader())) + diff --git a/z.py b/z.py index 6481458..013a7c7 100644 --- a/z.py +++ b/z.py @@ -21,14 +21,21 @@ print efile.has_dwarf_info() dwarfinfo = efile.get_dwarf_info() +cu = dwarfinfo.get_CU(1) +print cu.get_top_DIE() + #~ print dwarfinfo.structs.Dwarf_abbrev_entry.parse('\x13\x01\x01\x03\x50\x04\x00\x00') -print id(dwarfinfo.get_abbrev_table(0)) -print id(dwarfinfo.get_abbrev_table(0)) -pprint.pprint(dwarfinfo.get_abbrev_table(0)._abbrev_map) +#~ abbrevtable = dwarfinfo.get_abbrev_table(95) +#~ print id(abbrevtable) +#~ pprint.pprint(abbrevtable._abbrev_map) + +#~ ab1 = abbrevtable.get_abbrev(2) +#~ print ab1.has_children() +#~ for name, form in ab1.iter_attr_specs(): + #~ print name, form -print dwarfinfo.get_abbrev_table(0).get_abbrev(1).decl -print dwarfinfo.get_abbrev_table(0).get_abbrev(1).has_children() +#~ print dwarfinfo.get_abbrev_table(0).get_abbrev(1).has_children() #~ for cu in dwarfinfo._CU: #~ print cu, cu.header diff --git a/zd.py b/zd.py index 54af0f7..e5a4503 100644 --- a/zd.py +++ b/zd.py @@ -6,11 +6,16 @@ from elftools.dwarf.structs import DWARFStructs from elftools.dwarf.dwarfinfo import DWARFInfo -ds = DWARFStructs(little_endian=True, - dwarfclass=32) +ds = DWARFStructs(little_endian=True, dwarf_format=32) -print ds.Dwarf_xword('x').parse('\x04\x01\x00\x00') +print ds.Dwarf_offset('x').parse('\x04\x01\x00\x00') print ds.Dwarf_initial_length('joe').parse('\xff\xff\xff\xff\x32\x00\x00\x00\x00\x00\x00\x00') print ds.Dwarf_sleb128('kwa').parse('\x81\x7f') + +s = ds.Dwarf_dw_form['DW_FORM_block'] +#~ s = ds.Dwarf_dw_form['DW_FORM_addr'] + +print s.parse('\x04\x12\x13\x13\x16') +#~ print s.parse('\x04\x00\x12\x13')