From eefac9a7102528afeb1e95475e921bd50e08471c Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 27 Jan 2012 07:08:05 +0200 Subject: [PATCH] initial efforts porting pyelftools to python 3. some basic ELF functionality working --- elftools/common/construct_utils.py | 2 +- elftools/common/ordereddict.py | 18 -- elftools/common/py3compat.py | 40 ++++ elftools/construct/lib/utils.py | 22 --- elftools/construct/text.py | 286 ----------------------------- elftools/dwarf/callframe.py | 3 +- elftools/dwarf/die.py | 2 +- elftools/dwarf/dwarf_expr.py | 5 +- elftools/elf/elffile.py | 15 +- z.py | 13 +- 10 files changed, 61 insertions(+), 345 deletions(-) create mode 100644 elftools/common/py3compat.py delete mode 100644 elftools/construct/lib/utils.py delete mode 100644 elftools/construct/text.py diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py index d3e311a..53caa97 100644 --- a/elftools/common/construct_utils.py +++ b/elftools/common/construct_utils.py @@ -35,7 +35,7 @@ class RepeatUntilExcluding(Subconstruct): if self.predicate(subobj, context): break obj.append(subobj) - except ConstructError, ex: + except ConstructError as ex: raise ArrayError("missing terminator", ex) return obj def _build(self, obj, stream, context): diff --git a/elftools/common/ordereddict.py b/elftools/common/ordereddict.py index aabeafc..5e0f142 100644 --- a/elftools/common/ordereddict.py +++ b/elftools/common/ordereddict.py @@ -262,21 +262,3 @@ class OrderedDict(dict): "od.viewitems() -> a set-like object providing a view on od's items" return ItemsView(self) - - - -#------------------------------------------------------------------------------- -if __name__ == "__main__": - od = OrderedDict() - d = dict() - - for key in ['joe', 'more', 'tem', 'opsdf', 'dsf']: - od[key] = d[key] = key + '1' - - for k in d: - print k, d[k] - - print '-------- ordered ----------' - - for k in od: - print k, od[k] diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py new file mode 100644 index 0000000..bd08bd2 --- /dev/null +++ b/elftools/common/py3compat.py @@ -0,0 +1,40 @@ +#------------------------------------------------------------------------------- +# elftools: common/py3compat.py +# +# Python 3 compatibility code +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import sys +PY3 = sys.version_info[0] == 3 + + +if PY3: + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + + import collections + OrderedDict = collections.OrderedDict + + _iterkeys = "keys" + _iteritems = "items" +else: + import cStringIO + StringIO = BytesIO = cStringIO.StringIO + + from .ordereddict import OrderedDict + + _iterkeys = "iterkeys" + _iteritems = "iteritems" + + +def iterkeys(d): + """Return an iterator over the keys of a dictionary.""" + return getattr(d, _iterkeys)() + +def iteritems(d): + """Return an iterator over the items of a dictionary.""" + return getattr(d, _iteritems)() + diff --git a/elftools/construct/lib/utils.py b/elftools/construct/lib/utils.py deleted file mode 100644 index 86d8b03..0000000 --- a/elftools/construct/lib/utils.py +++ /dev/null @@ -1,22 +0,0 @@ -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - - -try: - from struct import Struct as Packer -except ImportError: - from struct import pack, unpack, calcsize - class Packer(object): - __slots__ = ["format", "size"] - def __init__(self, format): - self.format = format - self.size = calcsize(format) - def pack(self, *args): - return pack(self.format, *args) - def unpack(self, data): - return unpack(self.format, data) - - - diff --git a/elftools/construct/text.py b/elftools/construct/text.py deleted file mode 100644 index e461dff..0000000 --- a/elftools/construct/text.py +++ /dev/null @@ -1,286 +0,0 @@ -from core import * -from adapters import * -from macros import * - - -#=============================================================================== -# exceptions -#=============================================================================== -class QuotedStringError(ConstructError): - __slots__ = [] - - -#=============================================================================== -# constructs -#=============================================================================== -class QuotedString(Construct): - r""" - A quoted string (begins with an opening-quote, terminated by a - closing-quote, which may be escaped by an escape character) - - Parameters: - * name - the name of the field - * start_quote - the opening quote character. default is '"' - * end_quote - the closing quote character. default is '"' - * esc_char - the escape character, or None to disable escaping. defualt - is "\" (backslash) - * encoding - the character encoding (e.g., "utf8"), or None to return - raw bytes. defualt is None. - * allow_eof - whether to allow EOF before the closing quote is matched. - if False, an exception will be raised when EOF is reached by the closing - quote is missing. default is False. - - Example: - QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None) - """ - __slots__ = [ - "start_quote", "end_quote", "char", "esc_char", "encoding", - "allow_eof" - ] - def __init__(self, name, start_quote = '"', end_quote = None, - esc_char = '\\', encoding = None, allow_eof = False): - Construct.__init__(self, name) - if end_quote is None: - end_quote = start_quote - self.start_quote = Literal(start_quote) - self.char = Char("char") - self.end_quote = end_quote - self.esc_char = esc_char - self.encoding = encoding - self.allow_eof = allow_eof - - def _parse(self, stream, context): - self.start_quote._parse(stream, context) - text = [] - escaped = False - try: - while True: - ch = self.char._parse(stream, context) - if ch == self.esc_char: - if escaped: - text.append(ch) - escaped = False - else: - escaped = True - elif ch == self.end_quote and not escaped: - break - else: - text.append(ch) - escaped = False - except FieldError: - if not self.allow_eof: - raise - text = "".join(text) - if self.encoding is not None: - text = text.decode(self.encoding) - return text - - def _build(self, obj, stream, context): - self.start_quote._build(None, stream, context) - if self.encoding: - obj = obj.encode(self.encoding) - for ch in obj: - if ch == self.esc_char: - self.char._build(self.esc_char, stream, context) - elif ch == self.end_quote: - if self.esc_char is None: - raise QuotedStringError("found ending quote in data, " - "but no escape char defined", ch) - else: - self.char._build(self.esc_char, stream, context) - self.char._build(ch, stream, context) - self.char._build(self.end_quote, stream, context) - - def _sizeof(self, context): - raise SizeofError("can't calculate size") - - -#=============================================================================== -# macros -#=============================================================================== -class WhitespaceAdapter(Adapter): - """ - Adapter for whitespace sequences; do not use directly. - See Whitespace. - - Parameters: - * subcon - the subcon to adapt - * build_char - the character used for encoding (building) - """ - __slots__ = ["build_char"] - def __init__(self, subcon, build_char): - Adapter.__init__(self, subcon) - self.build_char = build_char - def _encode(self, obj, context): - return self.build_char - def _decode(self, obj, context): - return None - -def Whitespace(charset = " \t", optional = True): - """whitespace (space that is ignored between tokens). when building, the - first character of the charset is used. - * charset - the set of characters that are considered whitespace. default - is space and tab. - * optional - whether or not whitespace is optional. default is True. - """ - con = CharOf(None, charset) - if optional: - con = OptionalGreedyRange(con) - else: - con = GreedyRange(con) - return WhitespaceAdapter(con, build_char = charset[0]) - -def Literal(text): - """matches a literal string in the text - * text - the text (string) to match - """ - return ConstAdapter(Field(None, len(text)), text) - -def Char(name): - """a one-byte character""" - return Field(name, 1) - -def CharOf(name, charset): - """matches only characters of a given charset - * name - the name of the field - * charset - the set of valid characters - """ - return OneOf(Char(name), charset) - -def CharNoneOf(name, charset): - """matches only characters that do not belong to a given charset - * name - the name of the field - * charset - the set of invalid characters - """ - return NoneOf(Char(name), charset) - -def Alpha(name): - """a letter character (A-Z, a-z)""" - return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')) - -def Digit(name): - """a digit character (0-9)""" - return CharOf(name, set('0123456789')) - -def AlphaDigit(name): - """an alphanumeric character (A-Z, a-z, 0-9)""" - return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")) - -def BinDigit(name): - """a binary digit (0-1)""" - return CharOf(name, set('01')) - -def HexDigit(name): - """a hexadecimal digit (0-9, A-F, a-f)""" - return CharOf(name, set('0123456789abcdefABCDEF')) - -def Word(name): - """a sequence of letters""" - return StringAdapter(GreedyRange(Alpha(name))) - -class TextualIntAdapter(Adapter): - """ - Adapter for textual integers - - Parameters: - * subcon - the subcon to adapt - * radix - the base of the integer (decimal, hexadecimal, binary, ...) - * digits - the sequence of digits of that radix - """ - __slots__ = ["radix", "digits"] - def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"): - Adapter.__init__(self, subcon) - if radix > len(digits): - raise ValueError("not enough digits for radix %d" % (radix,)) - self.radix = radix - self.digits = digits - def _encode(self, obj, context): - chars = [] - if obj < 0: - chars.append("-") - n = -obj - else: - n = obj - r = self.radix - digs = self.digits - while n > 0: - n, d = divmod(n, r) - chars.append(digs[d]) - # obj2 = "".join(reversed(chars)) - # filler = digs[0] * (self._sizeof(context) - len(obj2)) - # return filler + obj2 - return "".join(reversed(chars)) - def _decode(self, obj, context): - return int("".join(obj), self.radix) - -def DecNumber(name): - """decimal number""" - return TextualIntAdapter(GreedyRange(Digit(name))) - -def BinNumber(name): - """binary number""" - return TextualIntAdapter(GreedyRange(Digit(name)), 2) - -def HexNumber(name): - """hexadecimal number""" - return TextualIntAdapter(GreedyRange(Digit(name)), 16) - -def StringUpto(name, charset): - """a string that stretches up to a terminator, or EOF. unlike CString, - StringUpto will no consume the terminator char. - * name - the name of the field - * charset - the set of terminator characters""" - return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset))) - -def Line(name): - r"""a textual line (up to "\n")""" - return StringUpto(name, "\n") - -class IdentifierAdapter(Adapter): - """ - Adapter for programmatic identifiers - - Parameters: - * subcon - the subcon to adapt - """ - def _encode(self, obj, context): - return obj[0], obj[1:] - def _decode(self, obj, context): - return obj[0] + "".join(obj[1]) - -def Identifier(name, - headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"), - tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_") - ): - """a programmatic identifier (symbol). must start with a char of headset, - followed by a sequence of tailset characters - * name - the name of the field - * headset - charset for the first character. default is A-Z, a-z, and _ - * tailset - charset for the tail. default is A-Z, a-z, 0-9 and _ - """ - return IdentifierAdapter( - Sequence(name, - CharOf("head", headset), - OptionalGreedyRange(CharOf("tail", tailset)), - ) - ) - - - - - - - - - - - - - - - - - - - - diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index f9a4500..de67a43 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -9,6 +9,7 @@ import copy from collections import namedtuple from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos) +from ..common.py3compat import iterkeys from .structs import DWARFStructs from .constants import * @@ -434,7 +435,7 @@ _PRIMARY_ARG_MASK = 0b00111111 # for DW_CFA_* instructions, and mapping their values to names. Since all # names were imported from constants with `import *`, we look in globals() _OPCODE_NAME_MAP = {} -for name in list(globals().iterkeys()): +for name in list(iterkeys(globals())): if name.startswith('DW_CFA'): _OPCODE_NAME_MAP[globals()[name]] = name diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index fe4537b..f0b5eb8 100644 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -8,7 +8,7 @@ #------------------------------------------------------------------------------- from collections import namedtuple -from ..common.ordereddict import OrderedDict +from ..common.py3compat import OrderedDict from ..common.utils import struct_parse, preserve_stream_pos diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index eccd486..267c1c7 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -6,8 +6,7 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from cStringIO import StringIO - +from ..common.py3compat import StringIO, iteritems from ..common.utils import struct_parse, bytelist2string @@ -86,7 +85,7 @@ _generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_reg', 0, 31, 0x50) _generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_breg', 0, 31, 0x70) # opcode -> name mapping -DW_OP_opcode2name = dict((v, k) for k, v in DW_OP_name2opcode.iteritems()) +DW_OP_opcode2name = dict((v, k) for k, v in iteritems(DW_OP_name2opcode)) class GenericExprVisitor(object): diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index dcec555..23349b4 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -6,7 +6,7 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from cStringIO import StringIO +from ..common.py3compat import StringIO from ..common.exceptions import ELFError from ..common.utils import struct_parse, elf_assert from ..construct import ConstructError @@ -26,7 +26,8 @@ class ELFFile(object): Accessible attributes: stream: - The stream holding the data of the file + The stream holding the data of the file - must be a binary + stream (bytes, not string). elfclass: 32 or 64 - specifies the word size of the target machine @@ -173,20 +174,20 @@ class ELFFile(object): # self.stream.seek(0) magic = self.stream.read(4) - elf_assert(magic == '\x7fELF', 'Magic number does not match') + elf_assert(magic == b'\x7fELF', 'Magic number does not match') ei_class = self.stream.read(1) - if ei_class == '\x01': + if ei_class == b'\x01': self.elfclass = 32 - elif ei_class == '\x02': + elif ei_class == b'\x02': self.elfclass = 64 else: raise ELFError('Invalid EI_CLASS %s' % repr(ei_class)) ei_data = self.stream.read(1) - if ei_data == '\x01': + if ei_data == b'\x01': self.little_endian = True - elif ei_data == '\x02': + elif ei_data == b'\x02': self.little_endian = False else: raise ELFError('Invalid EI_DATA %s' % repr(ei_data)) diff --git a/z.py b/z.py index 02d4f3f..d30feb3 100644 --- a/z.py +++ b/z.py @@ -1,6 +1,7 @@ # Just a script for playing around with pyelftools during testing # please ignore it! # +from __future__ import print_function import sys, pprint from elftools.elf.structs import ELFStructs @@ -13,18 +14,18 @@ from elftools.elf.relocation import * stream = open('test/testfiles/exe_simple64.elf', 'rb') efile = ELFFile(stream) -print 'elfclass', efile.elfclass -print '===> %s sections!' % efile.num_sections() -print efile.header +print('elfclass', efile.elfclass) +print('===> %s sections!' % efile.num_sections()) +print(efile.header) dinfo = efile.get_dwarf_info() from elftools.dwarf.locationlists import LocationLists from elftools.dwarf.descriptions import describe_DWARF_expr llists = LocationLists(dinfo.debug_loc_sec.stream, dinfo.structs) for loclist in llists.iter_location_lists(): - print '----> loclist!' + print('----> loclist!') for li in loclist: - print li - print describe_DWARF_expr(li.loc_expr, dinfo.structs) + print(li) + print(describe_DWARF_expr(li.loc_expr, dinfo.structs)) -- 2.30.2