From 79271e94a3254e38edc1cd5df33a0e7132cb8afc Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 27 Jan 2012 10:25:47 +0200 Subject: [PATCH] More changes to port to Python 3 All tox tests pass now!! --- elftools/common/py3compat.py | 20 +++++++ elftools/common/utils.py | 11 ++-- elftools/dwarf/descriptions.py | 9 +++- elftools/dwarf/dwarf_expr.py | 4 +- elftools/dwarf/lineprogram.py | 4 +- elftools/dwarf/structs.py | 2 +- elftools/elf/descriptions.py | 5 +- elftools/elf/elffile.py | 28 +++++----- elftools/elf/relocation.py | 4 +- elftools/elf/sections.py | 8 ++- examples/dwarf_die_tree.py | 3 +- examples/dwarf_location_lists.py | 4 +- examples/dwarf_range_lists.py | 3 +- examples/elf_low_high_api.py | 11 ++-- examples/elf_relocations.py | 8 +-- examples/elf_show_debug_sections.py | 6 ++- examples/examine_dwarf_info.py | 3 +- scripts/readelf.py | 67 +++++++++++------------ test/run_all_unittests.py | 7 ++- test/test_callframe.py | 84 ++++++++++++++--------------- test/test_dwarf_expr.py | 1 - test/test_dwarf_lineprogram.py | 62 ++++++++++----------- test/test_dwarf_structs.py | 26 ++++----- test/test_utils.py | 36 ++++++------- test/utils.py | 3 +- tox.ini | 2 +- 26 files changed, 231 insertions(+), 190 deletions(-) diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py index bd08bd2..9b4529f 100644 --- a/elftools/common/py3compat.py +++ b/elftools/common/py3compat.py @@ -20,6 +20,14 @@ if PY3: _iterkeys = "keys" _iteritems = "items" + _itervalues = "values" + + def bytes2str(b): return b.decode('latin-1') + def str2bytes(s): return s.encode('latin-1') + def int2byte(i):return bytes((i,)) + def byte2int(b): return b + + ifilter = filter else: import cStringIO StringIO = BytesIO = cStringIO.StringIO @@ -28,12 +36,24 @@ else: _iterkeys = "iterkeys" _iteritems = "iteritems" + _itervalues = "itervalues" + + def bytes2str(b): return b + def str2bytes(s): return s + int2byte = chr + byte2int = ord + + from itertools import ifilter def iterkeys(d): """Return an iterator over the keys of a dictionary.""" return getattr(d, _iterkeys)() +def itervalues(d): + """Return an iterator over the values of a dictionary.""" + return getattr(d, _itervalues)() + def iteritems(d): """Return an iterator over the items of a dictionary.""" return getattr(d, _iteritems)() diff --git a/elftools/common/utils.py b/elftools/common/utils.py index e0a4cfe..2daed04 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -8,14 +8,15 @@ #------------------------------------------------------------------------------- from contextlib import contextmanager from .exceptions import ELFParseError, ELFError, DWARFError +from .py3compat import int2byte from ..construct import ConstructError def bytelist2string(bytelist): - """ Convert a list of byte values (e.g. [0x10 0x20 0x00]) to a string - (e.g. '\x10\x20\x00'). + """ Convert a list of byte values (e.g. [0x10 0x20 0x00]) to a bytes object + (e.g. b'\x10\x20\x00'). """ - return ''.join(chr(b) for b in bytelist) + return b''.join(int2byte(b) for b in bytelist) def struct_parse(struct, stream, stream_pos=None): @@ -48,7 +49,7 @@ def parse_cstring_from_stream(stream, stream_pos=None): found = False while True: chunk = stream.read(CHUNKSIZE) - end_index = chunk.find('\x00') + end_index = chunk.find(b'\x00') if end_index >= 0: chunks.append(chunk[:end_index]) found = True @@ -57,7 +58,7 @@ def parse_cstring_from_stream(stream, stream_pos=None): chunks.append(chunk) if len(chunk) < CHUNKSIZE: break - return ''.join(chunks) if found else None + return b''.join(chunks) if found else None def elf_assert(cond, msg=''): diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index c467c6a..721c97a 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -12,6 +12,7 @@ from .constants import * from .dwarf_expr import GenericExprVisitor from .die import DIE from ..common.utils import preserve_stream_pos, dwarf_assert +from ..common.py3compat import bytes2str from .callframe import instruction_name, CIE, FDE @@ -184,7 +185,11 @@ def _describe_attr_split_64bit(attr, die, section_offset): return '0x%x 0x%x' % (low_word, high_word) def _describe_attr_strp(attr, die, section_offset): - return '(indirect string, offset: 0x%x): %s' % (attr.raw_value, attr.value) + return '(indirect string, offset: 0x%x): %s' % ( + attr.raw_value, bytes2str(attr.value)) + +def _describe_attr_string(attr, die, section_offset): + return bytes2str(attr.value) def _describe_attr_debool(attr, die, section_offset): """ To be consistent with readelf, generate 1 for True flags, 0 for False @@ -216,7 +221,7 @@ _ATTR_DESCRIPTION_MAP = defaultdict( DW_FORM_data2=_describe_attr_value_passthrough, DW_FORM_sdata=_describe_attr_value_passthrough, DW_FORM_udata=_describe_attr_value_passthrough, - DW_FORM_string=_describe_attr_value_passthrough, + DW_FORM_string=_describe_attr_string, DW_FORM_strp=_describe_attr_strp, DW_FORM_block1=_describe_attr_block, DW_FORM_block2=_describe_attr_block, diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index 267c1c7..270a781 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -6,7 +6,7 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from ..common.py3compat import StringIO, iteritems +from ..common.py3compat import BytesIO, iteritems from ..common.utils import struct_parse, bytelist2string @@ -116,7 +116,7 @@ class GenericExprVisitor(object): """ Process (visit) a DWARF expression. expr should be a list of (integer) byte values. """ - self.stream = StringIO(bytelist2string(expr)) + self.stream = BytesIO(bytelist2string(expr)) while True: # Get the next opcode from the stream. If nothing is left in the diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index 4d9c7b0..ee5193e 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -152,7 +152,7 @@ class LineProgram(object): if opcode >= self.header['opcode_base']: # Special opcode (follow the recipe in 6.2.5.1) adjusted_opcode = opcode - self['opcode_base'] - address_addend = ((adjusted_opcode / self['line_range']) * + address_addend = ((adjusted_opcode // self['line_range']) * self['minimum_instruction_length']) state.address += address_addend line_addend = (self['line_base'] + @@ -221,7 +221,7 @@ class LineProgram(object): add_entry_old_state(opcode, [operand]) elif opcode == DW_LNS_const_add_pc: adjusted_opcode = 255 - self['opcode_base'] - address_addend = ((adjusted_opcode / self['line_range']) * + address_addend = ((adjusted_opcode // self['line_range']) * self['minimum_instruction_length']) state.address += address_addend add_entry_old_state(opcode, [address_addend]) diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 10154ae..cfb2515 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -218,7 +218,7 @@ class DWARFStructs(object): Array(lambda ctx: ctx['opcode_base'] - 1, self.Dwarf_uint8('standard_opcode_lengths')), RepeatUntilExcluding( - lambda obj, ctx: obj == '', + lambda obj, ctx: obj == b'', CString('include_directory')), RepeatUntilExcluding( lambda obj, ctx: len(obj.name) == 0, diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index fa630de..2cde281 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -8,6 +8,7 @@ #------------------------------------------------------------------------------- from .enums import ENUM_E_VERSION, ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64 from .constants import P_FLAGS, SH_FLAGS +from ..common.py3compat import iteritems def describe_ei_class(x): @@ -231,9 +232,9 @@ _DESCR_ST_SHNDX = dict( ) _DESCR_RELOC_TYPE_i386 = dict( - (v, k) for k, v in ENUM_RELOC_TYPE_i386.iteritems()) + (v, k) for k, v in iteritems(ENUM_RELOC_TYPE_i386)) _DESCR_RELOC_TYPE_x64 = dict( - (v, k) for k, v in ENUM_RELOC_TYPE_x64.iteritems()) + (v, k) for k, v in iteritems(ENUM_RELOC_TYPE_x64)) diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 23349b4..bda6cae 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -6,7 +6,7 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from ..common.py3compat import StringIO +from ..common.py3compat import BytesIO from ..common.exceptions import ELFError from ..common.utils import struct_parse, elf_assert from ..construct import ConstructError @@ -109,7 +109,7 @@ class ELFFile(object): We assume that if it has the debug_info section, it has all theother required sections as well. """ - return bool(self.get_section_by_name('.debug_info')) + return bool(self.get_section_by_name(b'.debug_info')) def get_dwarf_info(self, relocate_dwarf_sections=True): """ Return a DWARFInfo object representing the debugging information in @@ -123,9 +123,9 @@ class ELFFile(object): # Sections that aren't found will be passed as None to DWARFInfo. # debug_sections = {} - for secname in ('.debug_info', '.debug_abbrev', '.debug_str', - '.debug_line', '.debug_frame', '.debug_loc', - '.debug_ranges'): + for secname in (b'.debug_info', b'.debug_abbrev', b'.debug_str', + b'.debug_line', b'.debug_frame', b'.debug_loc', + b'.debug_ranges'): section = self.get_section_by_name(secname) if section is None: debug_sections[secname] = None @@ -139,13 +139,13 @@ class ELFFile(object): little_endian=self.little_endian, default_address_size=self.elfclass / 8, machine_arch=self.get_machine_arch()), - debug_info_sec=debug_sections['.debug_info'], - debug_abbrev_sec=debug_sections['.debug_abbrev'], - debug_frame_sec=debug_sections['.debug_frame'], - debug_str_sec=debug_sections['.debug_str'], - debug_loc_sec=debug_sections['.debug_loc'], - debug_ranges_sec=debug_sections['.debug_ranges'], - debug_line_sec=debug_sections['.debug_line']) + debug_info_sec=debug_sections[b'.debug_info'], + debug_abbrev_sec=debug_sections[b'.debug_abbrev'], + debug_frame_sec=debug_sections[b'.debug_frame'], + debug_str_sec=debug_sections[b'.debug_str'], + debug_loc_sec=debug_sections[b'.debug_loc'], + debug_ranges_sec=debug_sections[b'.debug_ranges'], + debug_line_sec=debug_sections[b'.debug_line']) def get_machine_arch(self): """ Return the machine architecture, as detected from the ELF header. @@ -283,9 +283,7 @@ class ELFFile(object): """ self.stream.seek(section['sh_offset']) # The section data is read into a new stream, for processing - section_stream = StringIO() - # Using .write instead of initializing StringIO with the string because - # such a StringIO from cStringIO is read-only. + section_stream = BytesIO() section_stream.write(self.stream.read(section['sh_size'])) if relocate_dwarf_sections: diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index acb2da2..5ff853b 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -102,8 +102,8 @@ class RelocationHandler(object): found. """ reloc_section_names = ( - '.rel' + section.name, - '.rela' + section.name) + b'.rel' + section.name, + b'.rela' + section.name) # Find the relocation section aimed at this one. Currently assume # that either .rel or .rela section exists for this section, but # not both. diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index 11aa170..518c857 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -7,7 +7,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- from ..construct import CString -from ..common.utils import struct_parse, elf_assert +from ..common.utils import struct_parse, elf_assert, parse_cstring_from_stream class Section(object): @@ -63,10 +63,8 @@ class StringTableSection(Section): """ Get the string stored at the given offset in this string table. """ table_offset = self['sh_offset'] - return struct_parse( - CString(''), - self.stream, - stream_pos=table_offset + offset) + s = parse_cstring_from_stream(self.stream, table_offset + offset) + return s class SymbolTableSection(Section): diff --git a/examples/dwarf_die_tree.py b/examples/dwarf_die_tree.py index 9650f30..7eb07ff 100644 --- a/examples/dwarf_die_tree.py +++ b/examples/dwarf_die_tree.py @@ -17,6 +17,7 @@ try: except ImportError: sys.path.extend(['.', '..']) +from elftools.common.py3compat import bytes2str from elftools.elf.elffile import ELFFile @@ -54,7 +55,7 @@ def process_file(filename): # is done transparently by the library, and such a value will be # simply given as a string. name_attr = top_DIE.attributes['DW_AT_name'] - print(' name=%s' % name_attr.value) + print(' name=%s' % bytes2str(name_attr.value)) # Display DIEs recursively starting with top_DIE die_info_rec(top_DIE) diff --git a/examples/dwarf_location_lists.py b/examples/dwarf_location_lists.py index 3ac989c..6289618 100644 --- a/examples/dwarf_location_lists.py +++ b/examples/dwarf_location_lists.py @@ -17,6 +17,7 @@ try: except ImportError: sys.path.extend(['.', '..']) +from elftools.common.py3compat import itervalues from elftools.elf.elffile import ELFFile from elftools.dwarf.descriptions import ( describe_DWARF_expr, set_global_machine_arch) @@ -58,7 +59,7 @@ def process_file(filename): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. - for attr in DIE.attributes.itervalues(): + for attr in itervalues(DIE.attributes): if attribute_has_location_list(attr): # This is a location list. Its value is an offset into # the .debug_loc section, so we can use the location @@ -71,6 +72,7 @@ def process_file(filename): attr.name, show_loclist(loclist, dwarfinfo, indent=' '))) + def show_loclist(loclist, dwarfinfo, indent): """ Display a location list nicely, decoding the DWARF expressions contained within. diff --git a/examples/dwarf_range_lists.py b/examples/dwarf_range_lists.py index 48a0cbb..c310e58 100644 --- a/examples/dwarf_range_lists.py +++ b/examples/dwarf_range_lists.py @@ -17,6 +17,7 @@ try: except ImportError: sys.path.extend(['.', '..']) +from elftools.common.py3compat import itervalues from elftools.elf.elffile import ELFFile from elftools.dwarf.descriptions import ( describe_DWARF_expr, set_global_machine_arch) @@ -54,7 +55,7 @@ def process_file(filename): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. - for attr in DIE.attributes.itervalues(): + for attr in itervalues(DIE.attributes): if attribute_has_range_list(attr): # This is a range list. Its value is an offset into # the .debug_ranges section, so we can use the range diff --git a/examples/elf_low_high_api.py b/examples/elf_low_high_api.py index a5600c1..05d7564 100644 --- a/examples/elf_low_high_api.py +++ b/examples/elf_low_high_api.py @@ -18,6 +18,7 @@ try: except ImportError: sys.path.extend(['.', '..']) +from elftools.common.py3compat import bytes2str from elftools.elf.elffile import ELFFile from elftools.elf.sections import SymbolTableSection @@ -63,8 +64,10 @@ def section_info_highlevel(stream): elffile = ELFFile(stream) # Just use the public methods of ELFFile to get what we need + # Note that section names, like everything read from the file, are bytes + # objects. print(' %s sections' % elffile.num_sections()) - section = elffile.get_section_by_name('.symtab') + section = elffile.get_section_by_name(b'.symtab') if not section: print(' No symbol table found. Perhaps this ELF has been stripped?') @@ -72,8 +75,10 @@ def section_info_highlevel(stream): # A section type is in its header, but the name was decoded and placed in # a public attribute. + # bytes2str is used to print the name of the section for consistency of + # output between Python 2 and 3. The section name is a bytes object. print(' Section name: %s, type: %s' %( - section.name, section['sh_type'])) + bytes2str(section.name), section['sh_type'])) # But there's more... If this section is a symbol table section (which is # the case in the sample ELF file that comes with the examples), we can @@ -82,7 +87,7 @@ def section_info_highlevel(stream): num_symbols = section.num_symbols() print(" It's a symbol section with %s symbols" % num_symbols) print(" The name of the last symbol in the section is: %s" % ( - section.get_symbol(num_symbols - 1).name)) + bytes2str(section.get_symbol(num_symbols - 1).name))) if __name__ == '__main__': diff --git a/examples/elf_relocations.py b/examples/elf_relocations.py index 1817595..fd65541 100644 --- a/examples/elf_relocations.py +++ b/examples/elf_relocations.py @@ -17,6 +17,7 @@ try: except ImportError: sys.path.extend(['.', '..']) +from elftools.common.py3compat import bytes2str from elftools.elf.elffile import ELFFile from elftools.elf.relocation import RelocationSection @@ -28,14 +29,15 @@ def process_file(filename): # Read the .rela.dyn section from the file, by explicitly asking # ELFFile for this section - reladyn_name = '.rela.dyn' + # Recall that section names are bytes objects + reladyn_name = b'.rela.dyn' reladyn = elffile.get_section_by_name(reladyn_name) if not isinstance(reladyn, RelocationSection): - print(' The file has no %s section' % reladyn_name) + print(' The file has no %s section' % bytes2str(reladyn_name)) print(' %s section with %s relocations' % ( - reladyn_name, reladyn.num_relocations())) + bytes2str(reladyn_name), reladyn.num_relocations())) for reloc in reladyn.iter_relocations(): print(' Relocation (%s)' % 'RELA' if reloc.is_RELA() else 'REL') diff --git a/examples/elf_show_debug_sections.py b/examples/elf_show_debug_sections.py index a7e0d22..96e1dd9 100644 --- a/examples/elf_show_debug_sections.py +++ b/examples/elf_show_debug_sections.py @@ -16,6 +16,7 @@ try: except ImportError: sys.path.extend(['.', '..']) +from elftools.common.py3compat import bytes2str from elftools.elf.elffile import ELFFile @@ -25,8 +26,9 @@ def process_file(filename): elffile = ELFFile(f) for section in elffile.iter_sections(): - if section.name.startswith('.debug'): - print(' ' + section.name) + # Section names are bytes objects + if section.name.startswith(b'.debug'): + print(' ' + bytes2str(section.name)) if __name__ == '__main__': diff --git a/examples/examine_dwarf_info.py b/examples/examine_dwarf_info.py index 6be47b9..35ce35b 100644 --- a/examples/examine_dwarf_info.py +++ b/examples/examine_dwarf_info.py @@ -16,6 +16,7 @@ try: except ImportError: sys.path.extend(['.', '..']) +from elftools.common.py3compat import bytes2str from elftools.elf.elffile import ELFFile @@ -53,7 +54,7 @@ def process_file(filename): # is done transparently by the library, and such a value will be # simply given as a string. name_attr = top_DIE.attributes['DW_AT_name'] - print(' name=%s' % name_attr.value) + print(' name=%s' % bytes2str(name_attr.value)) if __name__ == '__main__': for filename in sys.argv[1:]: diff --git a/scripts/readelf.py b/scripts/readelf.py index 6ecac7b..04c75ba 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -9,7 +9,6 @@ #------------------------------------------------------------------------------- import os, sys from optparse import OptionParser -from itertools import ifilter import string # If elftools is not installed, maybe we're running from the root or scripts @@ -21,6 +20,8 @@ except ImportError: from elftools import __version__ from elftools.common.exceptions import ELFError +from elftools.common.py3compat import ( + ifilter, byte2int, bytes2str, itervalues, str2bytes) from elftools.elf.elffile import ELFFile from elftools.elf.segments import InterpSegment from elftools.elf.sections import SymbolTableSection @@ -65,7 +66,7 @@ class ReadElf(object): """ self._emitline('ELF Header:') self._emit(' Magic: ') - self._emitline(' '.join('%2.2x' % ord(b) + self._emitline(' '.join('%2.2x' % byte2int(b) for b in self.elffile.e_ident_raw)) header = self.elffile.header e_ident = header['e_ident'] @@ -173,7 +174,7 @@ class ReadElf(object): if isinstance(segment, InterpSegment): self._emitline(' [Requesting program interpreter: %s]' % - segment.get_interp_name()) + bytes2str(segment.get_interp_name())) # Sections to segments mapping # @@ -190,7 +191,7 @@ class ReadElf(object): for section in self.elffile.iter_sections(): if ( not section.is_null() and segment.section_in_segment(section)): - self._emit('%s ' % section.name) + self._emit('%s ' % bytes2str(section.name)) self._emitline('') @@ -217,7 +218,7 @@ class ReadElf(object): # for nsec, section in enumerate(self.elffile.iter_sections()): self._emit(' [%2u] %-17.17s %-15.15s ' % ( - nsec, section.name, describe_sh_type(section['sh_type']))) + nsec, bytes2str(section.name), describe_sh_type(section['sh_type']))) if self.elffile.elfclass == 32: self._emitline('%s %s %s %s %3s %2s %3s %2s' % ( @@ -259,11 +260,11 @@ class ReadElf(object): if section['sh_entsize'] == 0: self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % ( - section.name)) + bytes2str(section.name))) continue self._emitline("\nSymbol table '%s' contains %s entries:" % ( - section.name, section.num_symbols())) + bytes2str(section.name), section.num_symbols())) if self.elffile.elfclass == 32: self._emitline(' Num: Value Size Type Bind Vis Ndx Name') @@ -280,7 +281,7 @@ class ReadElf(object): describe_symbol_bind(symbol['st_info']['bind']), describe_symbol_visibility(symbol['st_other']['visibility']), describe_symbol_shndx(symbol['st_shndx']), - symbol.name)) + bytes2str(symbol.name))) def display_relocations(self): """ Display the relocations contained in the file @@ -292,7 +293,7 @@ class ReadElf(object): has_relocation_sections = True self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % ( - section.name, + bytes2str(section.name), self._format_hex(section['sh_offset']), section.num_relocations())) if section.is_RELA(): @@ -330,7 +331,7 @@ class ReadElf(object): symbol['st_value'], fullhex=True, lead0x=False), ' ' if self.elffile.elfclass == 32 else '', - symbol_name)) + bytes2str(symbol_name))) if section.is_RELA(): self._emit(' %s %x' % ( '+' if rel['r_addend'] >= 0 else '-', @@ -350,7 +351,7 @@ class ReadElf(object): section_spec)) return - self._emitline("\nHex dump of section '%s':" % section.name) + self._emitline("\nHex dump of section '%s':" % bytes2str(section.name)) self._note_relocs_for_section(section) addr = section['sh_addr'] data = section.data() @@ -364,18 +365,18 @@ class ReadElf(object): self._emit(' %s ' % self._format_hex(addr, fieldsize=8)) for i in range(16): if i < linebytes: - self._emit('%2.2x' % ord(data[dataptr + i])) + self._emit('%2.2x' % byte2int(data[dataptr + i])) else: self._emit(' ') if i % 4 == 3: self._emit(' ') for i in range(linebytes): - c = data[dataptr + i] - if c >= ' ' and ord(c) < 0x7f: - self._emit(c) + c = data[dataptr + i : dataptr + i + 1] + if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f: + self._emit(bytes2str(c)) else: - self._emit('.') + self._emit(bytes2str(b'.')) self._emitline() addr += linebytes @@ -393,27 +394,27 @@ class ReadElf(object): section_spec)) return - printables = set(string.printable) - self._emitline("\nString dump of section '%s':" % section.name) + self._emitline("\nString dump of section '%s':" % bytes2str(section.name)) found = False data = section.data() dataptr = 0 while dataptr < len(data): - while dataptr < len(data) and data[dataptr] not in printables: + while ( dataptr < len(data) and + not (32 <= byte2int(data[dataptr]) <= 127)): dataptr += 1 if dataptr >= len(data): break endptr = dataptr - while endptr < len(data) and data[endptr] != '\x00': + while endptr < len(data) and byte2int(data[endptr]) != 0: endptr += 1 found = True self._emitline(' [%6x] %s' % ( - dataptr, data[dataptr:endptr])) + dataptr, bytes2str(data[dataptr:endptr]))) dataptr = endptr @@ -479,7 +480,7 @@ class ReadElf(object): return None except ValueError: # Not a number. Must be a name then - return self.elffile.get_section_by_name(spec) + return self.elffile.get_section_by_name(str2bytes(spec)) def _note_relocs_for_section(self, section): """ If there are relocation sections pointing to the givne section, @@ -540,7 +541,7 @@ class ReadElf(object): die.abbrev_code, die.tag)) - for attr in die.attributes.itervalues(): + for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): @@ -568,10 +569,10 @@ class ReadElf(object): cu_filename = '' if len(lineprogram['include_directory']) > 0: cu_filename = '%s/%s' % ( - lineprogram['include_directory'][0], - lineprogram['file_entry'][0].name) + bytes2str(lineprogram['include_directory'][0]), + bytes2str(lineprogram['file_entry'][0].name)) else: - cu_filename = lineprogram['file_entry'][0].name + cu_filename = bytes2str(lineprogram['file_entry'][0].name) self._emitline('CU: %s:' % cu_filename) self._emitline('File name Line number Starting address') @@ -588,20 +589,20 @@ class ReadElf(object): if file_entry.dir_index == 0: # current directory self._emitline('\n./%s:[++]' % ( - file_entry.name)) + bytes2str(file_entry.name))) else: self._emitline('\n%s/%s:' % ( - lineprogram['include_directory'][file_entry.dir_index - 1], - file_entry.name)) + bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]), + bytes2str(file_entry.name))) elif entry.command == DW_LNE_define_file: self._emitline('%s:' % ( - lineprogram['include_directory'][entry.args[0].dir_index])) + bytes2str(lineprogram['include_directory'][entry.args[0].dir_index]))) elif not state.end_sequence: # readelf doesn't print the state after end_sequence # instructions. I think it's a bug but to be compatible # I don't print them too. self._emitline('%-35s %11d %18s' % ( - lineprogram['file_entry'][state.file - 1].name, + bytes2str(lineprogram['file_entry'][state.file - 1].name), state.line, '0' if state.address == 0 else self._format_hex(state.address))) @@ -621,7 +622,7 @@ class ReadElf(object): self._emitline('\n%08x %08x %08x CIE' % ( entry.offset, entry['length'], entry['CIE_id'])) self._emitline(' Version: %d' % entry['version']) - self._emitline(' Augmentation: "%s"' % entry['augmentation']) + self._emitline(' Augmentation: "%s"' % bytes2str(entry['augmentation'])) self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor']) self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor']) self._emitline(' Return address column: %d' % entry['return_address_register']) @@ -652,7 +653,7 @@ class ReadElf(object): entry.offset, entry['length'], entry['CIE_id'], - entry['augmentation'], + bytes2str(entry['augmentation']), entry['code_alignment_factor'], entry['data_alignment_factor'], entry['return_address_register'])) diff --git a/test/run_all_unittests.py b/test/run_all_unittests.py index e1a62f2..1130251 100755 --- a/test/run_all_unittests.py +++ b/test/run_all_unittests.py @@ -7,6 +7,8 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import print_function + try: import unittest2 as unittest except ImportError: @@ -18,6 +20,7 @@ if __name__ == '__main__': tests = unittest.TestLoader().discover('test', 'test*.py', 'test') unittest.TextTestRunner().run(tests) except ImportError as err: - print err - print '!! Please execute from the root directory of pyelftools' + print(err) + print('!! Please execute from the root directory of pyelftools') + diff --git a/test/test_callframe.py b/test/test_callframe.py index 3fe1b9a..33fb1e7 100644 --- a/test/test_callframe.py +++ b/test/test_callframe.py @@ -3,9 +3,9 @@ try: except ImportError: import unittest import sys -from cStringIO import StringIO sys.path.extend(['.', '..']) +from elftools.common.py3compat import BytesIO from elftools.dwarf.callframe import ( CallFrameInfo, CIE, FDE, instruction_name, CallFrameInstruction, RegisterRule) @@ -22,42 +22,42 @@ class TestCallFrame(unittest.TestCase): def test_spec_sample_d6(self): # D.6 sample in DWARFv3 - s = StringIO() - data = ('' + + s = BytesIO() + data = (b'' + # first comes the CIE - '\x20\x00\x00\x00' + # length - '\xff\xff\xff\xff' + # CIE_id - '\x03\x00\x04\x7c' + # version, augmentation, caf, daf - '\x08' + # return address - '\x0c\x07\x00' + - '\x08\x00' + - '\x07\x01' + - '\x07\x02' + - '\x07\x03' + - '\x08\x04' + - '\x08\x05' + - '\x08\x06' + - '\x08\x07' + - '\x09\x08\x01' + - '\x00' + + b'\x20\x00\x00\x00' + # length + b'\xff\xff\xff\xff' + # CIE_id + b'\x03\x00\x04\x7c' + # version, augmentation, caf, daf + b'\x08' + # return address + b'\x0c\x07\x00' + + b'\x08\x00' + + b'\x07\x01' + + b'\x07\x02' + + b'\x07\x03' + + b'\x08\x04' + + b'\x08\x05' + + b'\x08\x06' + + b'\x08\x07' + + b'\x09\x08\x01' + + b'\x00' + # then comes the FDE - '\x28\x00\x00\x00' + # length - '\x00\x00\x00\x00' + # CIE_pointer (to CIE at 0) - '\x44\x33\x22\x11' + # initial_location - '\x54\x00\x00\x00' + # address range - '\x41' + - '\x0e\x0c' + '\x41' + - '\x88\x01' + '\x41' + - '\x86\x02' + '\x41' + - '\x0d\x06' + '\x41' + - '\x84\x03' + '\x4b' + - '\xc4' + '\x41' + - '\xc6' + - '\x0d\x07' + '\x41' + - '\xc8' + '\x41' + - '\x0e\x00' + - '\x00\x00' + b'\x28\x00\x00\x00' + # length + b'\x00\x00\x00\x00' + # CIE_pointer (to CIE at 0) + b'\x44\x33\x22\x11' + # initial_location + b'\x54\x00\x00\x00' + # address range + b'\x41' + + b'\x0e\x0c' + b'\x41' + + b'\x88\x01' + b'\x41' + + b'\x86\x02' + b'\x41' + + b'\x0d\x06' + b'\x41' + + b'\x84\x03' + b'\x4b' + + b'\xc4' + b'\x41' + + b'\xc6' + + b'\x0d\x07' + b'\x41' + + b'\xc8' + b'\x41' + + b'\x0e\x00' + + b'\x00\x00' ) s.write(data) @@ -126,14 +126,14 @@ class TestCallFrame(unittest.TestCase): def test_describe_CFI_instructions(self): # The data here represents a single CIE - data = ('' + - '\x16\x00\x00\x00' + # length - '\xff\xff\xff\xff' + # CIE_id - '\x03\x00\x04\x7c' + # version, augmentation, caf, daf - '\x08' + # return address - '\x0c\x07\x02' + - '\x10\x02\x07\x03\x01\x02\x00\x00\x06\x06') - s = StringIO(data) + data = (b'' + + b'\x16\x00\x00\x00' + # length + b'\xff\xff\xff\xff' + # CIE_id + b'\x03\x00\x04\x7c' + # version, augmentation, caf, daf + b'\x08' + # return address + b'\x0c\x07\x02' + + b'\x10\x02\x07\x03\x01\x02\x00\x00\x06\x06') + s = BytesIO(data) structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) cfi = CallFrameInfo(s, len(data), structs) diff --git a/test/test_dwarf_expr.py b/test/test_dwarf_expr.py index d740eaf..0efface 100644 --- a/test/test_dwarf_expr.py +++ b/test/test_dwarf_expr.py @@ -3,7 +3,6 @@ try: except ImportError: import unittest import sys -from cStringIO import StringIO sys.path.extend(('..', '.')) from elftools.dwarf.descriptions import ExprDumper, set_global_machine_arch diff --git a/test/test_dwarf_lineprogram.py b/test/test_dwarf_lineprogram.py index d3e1100..75c88c8 100644 --- a/test/test_dwarf_lineprogram.py +++ b/test/test_dwarf_lineprogram.py @@ -3,9 +3,9 @@ try: except ImportError: import unittest import sys -from cStringIO import StringIO sys.path.extend(['.', '..']) +from elftools.common.py3compat import BytesIO, iteritems from elftools.dwarf.lineprogram import LineProgram, LineState, LineProgramEntry from elftools.dwarf.structs import DWARFStructs from elftools.dwarf.constants import * @@ -17,20 +17,20 @@ class TestLineProgram(unittest.TestCase): """ ds = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) header = ds.Dwarf_lineprog_header.parse( - '\x04\x10\x00\x00' + # initial lenght - '\x03\x00' + # version - '\x20\x00\x00\x00' + # header length - '\x01\x01\x01\x0F' + # flags - '\x0A' + # opcode_base - '\x00\x01\x04\x08\x0C\x01\x01\x01\x00' + # standard_opcode_lengths + b'\x04\x10\x00\x00' + # initial lenght + b'\x03\x00' + # version + b'\x20\x00\x00\x00' + # header length + b'\x01\x01\x01\x0F' + # flags + b'\x0A' + # opcode_base + b'\x00\x01\x04\x08\x0C\x01\x01\x01\x00' + # standard_opcode_lengths # 2 dir names followed by a NULL - '\x61\x62\x00\x70\x00\x00' + + b'\x61\x62\x00\x70\x00\x00' + # a file entry - '\x61\x72\x00\x0C\x0D\x0F' + + b'\x61\x72\x00\x0C\x0D\x0F' + # and another entry - '\x45\x50\x51\x00\x86\x12\x07\x08' + + b'\x45\x50\x51\x00\x86\x12\x07\x08' + # followed by NULL - '\x00') + b'\x00') lp = LineProgram(header, stream, ds, 0, len(stream.getvalue())) return lp @@ -39,20 +39,20 @@ class TestLineProgram(unittest.TestCase): """ Assert that the state attributes specified in kwargs have the given values (the rest are default). """ - for k, v in kwargs.iteritems(): + for k, v in iteritems(kwargs): self.assertEqual(getattr(state, k), v) def test_spec_sample_59(self): # Sample in figure 59 of DWARFv3 - s = StringIO() + s = BytesIO() s.write( - '\x02\xb9\x04' + - '\x0b' + - '\x38' + - '\x82' + - '\x73' + - '\x02\x02' + - '\x00\x01\x01') + b'\x02\xb9\x04' + + b'\x0b' + + b'\x38' + + b'\x82' + + b'\x73' + + b'\x02\x02' + + b'\x00\x01\x01') lp = self._make_program_in_stream(s) linetable = lp.get_entries() @@ -73,18 +73,18 @@ class TestLineProgram(unittest.TestCase): def test_spec_sample_60(self): # Sample in figure 60 of DWARFv3 - s = StringIO() + s = BytesIO() s.write( - '\x09\x39\x02' + - '\x0b' + - '\x09\x03\x00' + - '\x0b' + - '\x09\x08\x00' + - '\x0a' + - '\x09\x07\x00' + - '\x0a' + - '\x09\x02\x00' + - '\x00\x01\x01') + b'\x09\x39\x02' + + b'\x0b' + + b'\x09\x03\x00' + + b'\x0b' + + b'\x09\x08\x00' + + b'\x0a' + + b'\x09\x07\x00' + + b'\x0a' + + b'\x09\x02\x00' + + b'\x00\x01\x01') lp = self._make_program_in_stream(s) linetable = lp.get_entries() diff --git a/test/test_dwarf_structs.py b/test/test_dwarf_structs.py index d85ee0f..eaf972c 100644 --- a/test/test_dwarf_structs.py +++ b/test/test_dwarf_structs.py @@ -13,28 +13,28 @@ class TestDWARFStructs(unittest.TestCase): ds = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) c = ds.Dwarf_lineprog_header.parse( - '\x04\x10\x00\x00' + # initial lenght - '\x05\x02' + # version - '\x20\x00\x00\x00' + # header length - '\x05\x10\x40\x50' + # until and including line_range - '\x06' + # opcode_base - '\x00\x01\x04\x08\x0C' + # standard_opcode_lengths + b'\x04\x10\x00\x00' + # initial lenght + b'\x05\x02' + # version + b'\x20\x00\x00\x00' + # header length + b'\x05\x10\x40\x50' + # until and including line_range + b'\x06' + # opcode_base + b'\x00\x01\x04\x08\x0C' + # standard_opcode_lengths # 2 dir names followed by a NULL - '\x61\x62\x00\x70\x00\x00' + + b'\x61\x62\x00\x70\x00\x00' + # a file entry - '\x61\x72\x00\x0C\x0D\x0F' + + b'\x61\x72\x00\x0C\x0D\x0F' + # and another entry - '\x45\x50\x51\x00\x86\x12\x07\x08' + + b'\x45\x50\x51\x00\x86\x12\x07\x08' + # followed by NULL - '\x00') + b'\x00') self.assertEqual(c.version, 0x205) self.assertEqual(c.opcode_base, 6) self.assertEqual(c.standard_opcode_lengths, [0, 1, 4, 8, 12]) - self.assertEqual(c.include_directory, ['ab', 'p']) + self.assertEqual(c.include_directory, [b'ab', b'p']) self.assertEqual(len(c.file_entry), 2) - self.assertEqual(c.file_entry[0].name, 'ar') - self.assertEqual(c.file_entry[1].name, 'EPQ') + self.assertEqual(c.file_entry[0].name, b'ar') + self.assertEqual(c.file_entry[1].name, b'EPQ') self.assertEqual(c.file_entry[1].dir_index, 0x12 * 128 + 6) diff --git a/test/test_utils.py b/test/test_utils.py index a30ac3a..f0142d8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -3,46 +3,46 @@ try: except ImportError: import unittest import sys -from cStringIO import StringIO from random import randint sys.path.extend(['.', '..']) +from elftools.common.py3compat import int2byte, BytesIO from elftools.common.utils import (parse_cstring_from_stream, preserve_stream_pos) class Test_parse_cstring_from_stream(unittest.TestCase): - def _make_random_string(self, n): - return ''.join(chr(randint(32, 127)) for i in range(n)) + def _make_random_bytes(self, n): + return b''.join(int2byte(randint(32, 127)) for i in range(n)) def test_small1(self): - sio = StringIO('abcdefgh\x0012345') - self.assertEqual(parse_cstring_from_stream(sio), 'abcdefgh') - self.assertEqual(parse_cstring_from_stream(sio, 2), 'cdefgh') - self.assertEqual(parse_cstring_from_stream(sio, 8), '') + sio = BytesIO(b'abcdefgh\x0012345') + self.assertEqual(parse_cstring_from_stream(sio), b'abcdefgh') + self.assertEqual(parse_cstring_from_stream(sio, 2), b'cdefgh') + self.assertEqual(parse_cstring_from_stream(sio, 8), b'') def test_small2(self): - sio = StringIO('12345\x006789\x00abcdefg\x00iii') - self.assertEqual(parse_cstring_from_stream(sio), '12345') - self.assertEqual(parse_cstring_from_stream(sio, 5), '') - self.assertEqual(parse_cstring_from_stream(sio, 6), '6789') + sio = BytesIO(b'12345\x006789\x00abcdefg\x00iii') + self.assertEqual(parse_cstring_from_stream(sio), b'12345') + self.assertEqual(parse_cstring_from_stream(sio, 5), b'') + self.assertEqual(parse_cstring_from_stream(sio, 6), b'6789') def test_large1(self): - text = 'i' * 400 + '\x00' + 'bb' - sio = StringIO(text) - self.assertEqual(parse_cstring_from_stream(sio), 'i' * 400) - self.assertEqual(parse_cstring_from_stream(sio, 150), 'i' * 250) + text = b'i' * 400 + b'\x00' + b'bb' + sio = BytesIO(text) + self.assertEqual(parse_cstring_from_stream(sio), b'i' * 400) + self.assertEqual(parse_cstring_from_stream(sio, 150), b'i' * 250) def test_large2(self): - text = self._make_random_string(5000) + '\x00' + 'jujajaja' - sio = StringIO(text) + text = self._make_random_bytes(5000) + b'\x00' + b'jujajaja' + sio = BytesIO(text) self.assertEqual(parse_cstring_from_stream(sio), text[:5000]) self.assertEqual(parse_cstring_from_stream(sio, 2348), text[2348:5000]) class Test_preserve_stream_pos(object): def test_basic(self): - sio = StringIO('abcdef') + sio = BytesIO('abcdef') with preserve_stream_pos(sio): sio.seek(4) self.assertEqual(stream.tell(), 0) diff --git a/test/utils.py b/test/utils.py index 803a999..0ee0ebf 100644 --- a/test/utils.py +++ b/test/utils.py @@ -7,6 +7,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- import os, subprocess, tempfile +from elftools.common.py3compat import bytes2str def run_exe(exe_path, args): @@ -19,7 +20,7 @@ def run_exe(exe_path, args): popen_cmd.insert(0, 'python') proc = subprocess.Popen(popen_cmd, stdout=subprocess.PIPE) proc_stdout = proc.communicate()[0] - return proc.returncode, proc_stdout + return proc.returncode, bytes2str(proc_stdout) def is_in_rootdir(): diff --git a/tox.ini b/tox.ini index 4c4a5c2..73fbb84 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27,py26 +envlist = py27,py26,py32 [testenv] commands = -- 2.30.2