From: Eli Bendersky Date: Thu, 27 Oct 2011 15:34:02 +0000 (+0200) Subject: started location_expr, added unit tests. moved readelf unittest to a more appropriate... X-Git-Tag: v0.10~80 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ebe51162e7e67181187a42579c6348add07bcf33;p=pyelftools.git started location_expr, added unit tests. moved readelf unittest to a more appropriate file name --- diff --git a/elftools/common/utils.py b/elftools/common/utils.py index 5358072..2b55e5e 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -10,6 +10,13 @@ from .exceptions import ELFParseError, ELFError, DWARFError from ..construct import ConstructError +def bytelist2string(bytelist): + """ Convert a list of byte values (e.g. [0x10 0x20 0x00]) to a string + (e.g. '\x10\x20\x00'). + """ + return ''.join(chr(b) for b in bytelist) + + def struct_parse(struct, stream, stream_pos=None): """ Convenience function for using the given struct to parse a stream. If stream_pos is provided, the stream is seeked to this position before diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 50c7273..9cef354 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -211,9 +211,14 @@ def _make_extra_string(s=''): return extra -def location_list_extra(attr, die, section_offset): - pass -_location_list_extra = _make_extra_string('(location list)') +def _location_list_extra(attr, die, section_offset): + # According to section 2.6 of the DWARF spec v3, class loclistptr means + # a location list, and class block means a location expression. + # + if attr.form in ('DW_FORM_data4', 'DW_FORM_data8'): + return '(location list)' + else: + return '<> %s %s' % (attr.value, type(attr.value)) _EXTRA_INFO_DESCRIPTION_MAP = defaultdict( @@ -246,6 +251,10 @@ _EXTRA_INFO_DESCRIPTION_MAP = defaultdict( DW_AT_segment=_location_list_extra, DW_AT_static_link=_location_list_extra, DW_AT_use_location=_location_list_extra, + DW_AT_allocated=_location_list_extra, + DW_AT_associated=_location_list_extra, + DW_AT_data_location=_location_list_extra, + DW_AT_stride=_location_list_extra, ) diff --git a/elftools/dwarf/location_expr.py b/elftools/dwarf/location_expr.py new file mode 100644 index 0000000..1a2d4f3 --- /dev/null +++ b/elftools/dwarf/location_expr.py @@ -0,0 +1,169 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/location_expr.py +# +# Decoding DWARF location expressions +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from cStringIO import StringIO + +from ..common.utils import struct_parse, bytelist2string + + +# Location expression opcodes. +# +DW_OP_name2opcode = dict( + DW_OP_addr=0x03, + DW_OP_deref=0x06, + DW_OP_const1u=0x08, + DW_OP_const1s=0x09, + DW_OP_const2u=0x0a, + DW_OP_const2s=0x0b, + DW_OP_const4u=0x0c, + DW_OP_const4s=0x0d, + DW_OP_const8u=0x0e, + DW_OP_const8s=0x0f, + DW_OP_constu=0x10, + DW_OP_consts=0x11, + DW_OP_dup=0x12, + DW_OP_drop=0x13, + DW_OP_over=0x14, + DW_OP_pick=0x15, + DW_OP_swap=0x16, + DW_OP_rot=0x17, + DW_OP_xderef=0x18, + DW_OP_abs=0x19, + DW_OP_and=0x1a, + DW_OP_div=0x1b, + DW_OP_minus=0x1c, + DW_OP_mod=0x1d, + DW_OP_mul=0x1e, + DW_OP_neg=0x1f, + DW_OP_not=0x20, + DW_OP_or=0x21, + DW_OP_plus=0x22, + DW_OP_plus_uconst=0x23, + DW_OP_shl=0x24, + DW_OP_shr=0x25, + DW_OP_shra=0x26, + DW_OP_xor=0x27, + DW_OP_bra=0x28, + DW_OP_eq=0x29, + DW_OP_ge=0x2a, + DW_OP_gt=0x2b, + DW_OP_le=0x2c, + DW_OP_lt=0x2d, + DW_OP_ne=0x2e, + DW_OP_skip=0x2f, + DW_OP_regx=0x90, + DW_OP_fbreg=0x91, + DW_OP_bregx=0x92, + DW_OP_piece=0x93, + DW_OP_deref_size=0x94, + DW_OP_xderef_size=0x95, + DW_OP_nop=0x96, + DW_OP_push_object_address=0x97, + DW_OP_call2=0x98, + DW_OP_call4=0x99, + DW_OP_call_ref=0x9a, + DW_OP_form_tls_address=0x9b, + DW_OP_call_frame_cfa=0x9c, + DW_OP_bit_piece=0x9d, + DW_OP_implicit_value=0x9e, + DW_OP_stack_value=0x9f, + DW_OP_GNU_push_tls_address=0xe0, + DW_OP_GNU_uninit=0xf0, + DW_OP_GNU_encoded_addr=0xf1, + DW_OP_GNU_implicit_pointer=0xf2, + DW_OP_GNU_entry_value=0xf3, + DW_OP_GNU_const_type=0xf4, + DW_OP_GNU_regval_type=0xf5, + DW_OP_GNU_deref_type=0xf6, + DW_OP_GNU_convert=0xf7, + DW_OP_GNU_reinterpret=0xf9, +) + +def _generate_dynamic_values(map, prefix, index_start, index_end, value_start): + """ Generate values in a map (dict) dynamically. Each key starts with + a (string) prefix, followed by an index in the inclusive range + [index_start, index_end]. The values start at value_start. + """ + for index in range(index_start, index_end + 1): + name = '%s%s' % (prefix, index) + value = value_start + index - index_start + map[name] = value + +_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_lit', 0, 31, 0x30) +_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_reg', 0, 31, 0x50) +_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_breg', 0, 31, 0x70) + +DW_OP_opcode2name = dict((v, k) for k, v in DW_OP_name2opcode.iteritems()) + + +class GenericLocationExprVisitor(object): + def __init__(self, structs): + self.structs = structs + self.stream = None + self._init_dispatch_table() + + self._cur_opcode = None + self._cur_opcode_name = None + self._cur_args = [] + + def process_expr(self, expr): + """ Process (visit) a location expression. Currently two possible + types are supported for expr: + + 1. file-like stream object + 2. List of byte values (the result of parsed DW_FORM_block* + attributes). + """ + if hasattr(expr, 'read') and hasattr(expr, 'seek'): + self.stream = expr + else: + self.stream = StringIO(bytelist2string(expr)) + + while True: + # Get the next opcode from the stream. If nothing is left in the + # stream, we're done. + byte = self.stream.read(1) + if len(byte) == 0: + break + + # Decode the opcode and its name + self._cur_opcode = ord(byte) + self._cur_opcode_name = DW_OP_opcode2name[self._cur_opcode] + + # Dispatch to a visitor function + visitor = self._dispatch_table.get( + self._cur_opcode, + self._default_visitor) + visitor(self._cur_opcode, self._cur_opcode_name) + + # Finally call the post-visit function + self._after_visit( + self._cur_opcode, self._cur_opcode_name, self._cur_args) + + def _after_visit(self, opcode, opcode_name, *args): + raise NotImplementedError() + + def _default_visitor(self, opcode, opcode_name): + raise NotImplementedError() + + def _visit_OP_with_no_args(self, opcode, opcode_name): + self._cur_args = [] + + def _visit_OP_addr(self, opcode, opcode_name): + self._cur_args = [ + struct_parse(self.structs.Dwarf_target_addr(''), self.stream)] + + def _init_dispatch_table(self): + self._dispatch_table = {} + def add(opcode_name, func): + self._dispatch_table[DW_OP_name2opcode[opcode_name]] = func + + add('DW_OP_addr', self._visit_OP_addr) + add('DW_OP_deref', self._visit_OP_with_no_args) + + diff --git a/tests/run_readelf_tests.py b/tests/run_readelf_tests.py new file mode 100755 index 0000000..27a2bf4 --- /dev/null +++ b/tests/run_readelf_tests.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python +#------------------------------------------------------------------------------- +# tests/run_readelf_tests.py +# +# Automatic test runner for elftools & readelf +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os, sys +from difflib import SequenceMatcher +import logging +import subprocess +import tempfile + + +# Create a global logger object +# +testlog = logging.getLogger('run_tests') +testlog.setLevel(logging.DEBUG) +testlog.addHandler(logging.StreamHandler(sys.stdout)) + + +def discover_testfiles(rootdir): + """ Discover test files in the given directory. Yield them one by one. + """ + for filename in os.listdir(rootdir): + _, ext = os.path.splitext(filename) + if ext == '.elf': + yield os.path.join(rootdir, filename) + + +def run_exe(exe_path, args): + """ Runs the given executable as a subprocess, given the + list of arguments. Captures its return code (rc) and stdout and + returns a pair: rc, stdout_str + """ + popen_cmd = [exe_path] + args + if os.path.splitext(exe_path)[1] == '.py': + popen_cmd.insert(0, 'python') + proc = subprocess.Popen(popen_cmd, stdout=subprocess.PIPE) + proc_stdout = proc.communicate()[0] + return proc.returncode, proc_stdout + + +def run_test_on_file(filename): + """ Runs a test on the given input filename. Return True if all test + runs succeeded. + """ + success = True + testlog.info("Running test on file '%s'" % filename) + for option in ['-e', '-s', '-r', '-x.text', '-p.shstrtab']: + testlog.info("..option='%s'" % option) + # stdouts will be a 2-element list: output of readelf and output + # of scripts/readelf.py + stdouts = [] + for exe_path in ['readelf', 'scripts/readelf.py']: + args = [option, filename] + testlog.info("....executing: '%s %s'" % ( + exe_path, ' '.join(args))) + rc, stdout = run_exe(exe_path, args) + if rc != 0: + testlog.error("@@ aborting - '%s' returned '%s'" % (exe_path, rc)) + return False + stdouts.append(stdout) + testlog.info('....comparing output...') + rc, errmsg = compare_output(*stdouts) + if rc: + testlog.info('.......................SUCCESS') + else: + success = False + testlog.info('.......................FAIL') + testlog.info('@@ ' + errmsg) + dump_output_to_temp_files(*stdouts) + return success + + +def compare_output(s1, s2): + """ Compare stdout strings s1 and s2. + Return pair success, errmsg. If comparison succeeds, success is True + and errmsg is empty. Otherwise success is False and errmsg holds a + description of the mismatch. + + Note: this function contains some rather horrible hacks to ignore + differences which are not important for the verification of pyelftools. + This is due to some intricacies of binutils's readelf which pyelftools + doesn't currently implement, or silly inconsistencies in the output of + readelf, which I was reluctant to replicate. + Read the documentation for more details. + """ + lines1 = s1.lower().splitlines() + lines2 = s2.lower().splitlines() + if len(lines1) != len(lines2): + return False, 'Number of lines different: %s vs %s' % ( + len(lines1), len(lines2)) + + flag_after_symtable = False + + for i in range(len(lines1)): + if 'symbol table' in lines1[i]: + flag_after_symtable = True + + # Compare ignoring whitespace + if lines1[i].split() != lines2[i].split(): + sm = SequenceMatcher() + sm.set_seqs(lines1[i], lines2[i]) + if flag_after_symtable: + # Detect readelf's adding @ with lib and version after + # symbol name. + changes = sm.get_opcodes() + if ( len(changes) == 2 and changes[1][0] == 'delete' and + lines1[i][changes[1][1]] == '@'): + continue + + errmsg = 'Mismatch on line #%s:\n>>%s<<\n>>%s<<\n' % ( + i, lines1[i], lines2[i]) + return False, errmsg + return True, '' + + +def dump_output_to_temp_files(*args): + """ Dumps the output strings given in 'args' to temp files: one for each + arg. + """ + for i, s in enumerate(args): + fd, path = tempfile.mkstemp( + prefix='out' + str(i + 1) + '_', + suffix='.stdout') + file = os.fdopen(fd, 'w') + file.write(s) + file.close() + testlog.info('@@ Output #%s dumped to file: %s' % (i + 1, path)) + + +def die(msg): + testlog.error('Error: %s' % msg) + sys.exit(1) + + +def is_in_rootdir(): + """ Check whether the current dir is the root dir of pyelftools + """ + dirstuff = os.listdir('.') + return 'tests' in dirstuff and 'elftools' in dirstuff + + +def main(): + if not is_in_rootdir(): + die('Please run me from the root dir of pyelftools!') + + # If file names are given as command-line arguments, only these files + # are taken as inputs. Otherwise, autodiscovery is performed. + # + if len(sys.argv) > 1: + filenames = sys.argv[1:] + else: + filenames = list(discover_testfiles('tests/testfiles')) + + success = True + for filename in filenames: + success = success and run_test_on_file(filename) + + if success: + testlog.info('\nConclusion: SUCCESS') + else: + testlog.info('\nConclusion: FAIL') + + +if __name__ == '__main__': + main() + #testlog.info(list(discover_testfiles('tests/testfiles'))) + #print run_exe('scripts/readelf.py', ['-h', 'tests/testfiles/z32.o.elf']) + + + + diff --git a/tests/run_tests.py b/tests/run_tests.py deleted file mode 100755 index a89d163..0000000 --- a/tests/run_tests.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -#------------------------------------------------------------------------------- -# tests/run_tests.py -# -# Automatic test runner for elftools & readelf -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- -import os, sys -from difflib import SequenceMatcher -import logging -import subprocess -import tempfile - - -# Create a global logger object -# -testlog = logging.getLogger('run_tests') -testlog.setLevel(logging.DEBUG) -testlog.addHandler(logging.StreamHandler(sys.stdout)) - - -def discover_testfiles(rootdir): - """ Discover test files in the given directory. Yield them one by one. - """ - for filename in os.listdir(rootdir): - _, ext = os.path.splitext(filename) - if ext == '.elf': - yield os.path.join(rootdir, filename) - - -def run_exe(exe_path, args): - """ Runs the given executable as a subprocess, given the - list of arguments. Captures its return code (rc) and stdout and - returns a pair: rc, stdout_str - """ - popen_cmd = [exe_path] + args - if os.path.splitext(exe_path)[1] == '.py': - popen_cmd.insert(0, 'python') - proc = subprocess.Popen(popen_cmd, stdout=subprocess.PIPE) - proc_stdout = proc.communicate()[0] - return proc.returncode, proc_stdout - - -def run_test_on_file(filename): - """ Runs a test on the given input filename. Return True if all test - runs succeeded. - """ - success = True - testlog.info("Running test on file '%s'" % filename) - for option in ['-e', '-s', '-r', '-x.text', '-p.shstrtab']: - testlog.info("..option='%s'" % option) - # stdouts will be a 2-element list: output of readelf and output - # of scripts/readelf.py - stdouts = [] - for exe_path in ['readelf', 'scripts/readelf.py']: - args = [option, filename] - testlog.info("....executing: '%s %s'" % ( - exe_path, ' '.join(args))) - rc, stdout = run_exe(exe_path, args) - if rc != 0: - testlog.error("@@ aborting - '%s' returned '%s'" % (exe_path, rc)) - return False - stdouts.append(stdout) - testlog.info('....comparing output...') - rc, errmsg = compare_output(*stdouts) - if rc: - testlog.info('.......................SUCCESS') - else: - success = False - testlog.info('.......................FAIL') - testlog.info('@@ ' + errmsg) - dump_output_to_temp_files(*stdouts) - return success - - -def compare_output(s1, s2): - """ Compare stdout strings s1 and s2. - Return pair success, errmsg. If comparison succeeds, success is True - and errmsg is empty. Otherwise success is False and errmsg holds a - description of the mismatch. - - Note: this function contains some rather horrible hacks to ignore - differences which are not important for the verification of pyelftools. - This is due to some intricacies of binutils's readelf which pyelftools - doesn't currently implement, or silly inconsistencies in the output of - readelf, which I was reluctant to replicate. - Read the documentation for more details. - """ - lines1 = s1.lower().splitlines() - lines2 = s2.lower().splitlines() - if len(lines1) != len(lines2): - return False, 'Number of lines different: %s vs %s' % ( - len(lines1), len(lines2)) - - flag_after_symtable = False - - for i in range(len(lines1)): - if 'symbol table' in lines1[i]: - flag_after_symtable = True - - # Compare ignoring whitespace - if lines1[i].split() != lines2[i].split(): - sm = SequenceMatcher() - sm.set_seqs(lines1[i], lines2[i]) - if flag_after_symtable: - # Detect readelf's adding @ with lib and version after - # symbol name. - changes = sm.get_opcodes() - if ( len(changes) == 2 and changes[1][0] == 'delete' and - lines1[i][changes[1][1]] == '@'): - continue - - errmsg = 'Mismatch on line #%s:\n>>%s<<\n>>%s<<\n' % ( - i, lines1[i], lines2[i]) - return False, errmsg - return True, '' - - -def dump_output_to_temp_files(*args): - """ Dumps the output strings given in 'args' to temp files: one for each - arg. - """ - for i, s in enumerate(args): - fd, path = tempfile.mkstemp( - prefix='out' + str(i + 1) + '_', - suffix='.stdout') - file = os.fdopen(fd, 'w') - file.write(s) - file.close() - testlog.info('@@ Output #%s dumped to file: %s' % (i + 1, path)) - - -def die(msg): - testlog.error('Error: %s' % msg) - sys.exit(1) - - -def is_in_rootdir(): - """ Check whether the current dir is the root dir of pyelftools - """ - dirstuff = os.listdir('.') - return 'tests' in dirstuff and 'elftools' in dirstuff - - -def main(): - if not is_in_rootdir(): - die('Please run me from the root dir of pyelftools!') - - # If file names are given as command-line arguments, only these files - # are taken as inputs. Otherwise, autodiscovery is performed. - # - if len(sys.argv) > 1: - filenames = sys.argv[1:] - else: - filenames = list(discover_testfiles('tests/testfiles')) - - success = True - for filename in filenames: - success = success and run_test_on_file(filename) - - if success: - testlog.info('\nConclusion: SUCCESS') - else: - testlog.info('\nConclusion: FAIL') - - -if __name__ == '__main__': - main() - #testlog.info(list(discover_testfiles('tests/testfiles'))) - #print run_exe('scripts/readelf.py', ['-h', 'tests/testfiles/z32.o.elf']) - - - - diff --git a/tests/test_dwarf_location_expr.py b/tests/test_dwarf_location_expr.py new file mode 100644 index 0000000..7653a0b --- /dev/null +++ b/tests/test_dwarf_location_expr.py @@ -0,0 +1,32 @@ +import sys, unittest + +sys.path.extend(('..', '.')) +from elftools.dwarf.location_expr import ( + GenericLocationExprVisitor, DW_OP_opcode2name) +from elftools.dwarf.structs import DWARFStructs + + +class MyTestVisitor(GenericLocationExprVisitor): + def __init__(self, structs): + super(MyTestVisitor, self).__init__(structs) + self.results = [] + + def _after_visit(self, opcode, opcode_name, *args): + self.results.append((opcode_name, args)) + + +class TestGenericLocationExprVisitor(unittest.TestCase): + structs32 = DWARFStructs( + little_endian=True, + dwarf_format=32, + address_size=4) + + def test_basic(self): + visitor = MyTestVisitor(self.structs32) + visitor.process_expr([0x03, 0x01, 0x02, 0, 0, 0x06, 0x06]) + print visitor.results + + +if __name__ == '__main__': + unittest.main() + diff --git a/z.py b/z.py index 7780365..34f4bc5 100644 --- a/z.py +++ b/z.py @@ -34,66 +34,9 @@ print 'siblings.....' for s in c.iter_siblings(): print s -#~ print c.get_parent() -#~ print topdie +from elftools.dwarf.location_expr import _DW_OP_name2opcode, _DW_OP_opcode2name -#~ def recp(d, indent=0): - #~ s = str(d) - #~ lines = s.split('\n') - #~ print '\n'.join(' ' * indent + l for l in lines) - - #~ for c in d.iter_children(): - #~ recp(c, indent + 6) +print hex(_DW_OP_name2opcode['DW_OP_lit14']) +print _DW_OP_opcode2name[0x0e] -#~ recp(topdie) - -#~ for c in topdie.iter_children(): - #~ print c -#~ for die in cu._dielist: - #~ print 'DIE %s, size=%s' % (die.tag, die.size) - #~ for attrname, val in die.attributes.iteritems(): - #~ print ' ', attrname, val - -#~ topdie = cu.get_top_DIE() - -#~ print topdie.size, topdie.tag - -#~ print len(cu._dielist) - -#~ print dwarfinfo.structs.Dwarf_abbrev_entry.parse('\x13\x01\x01\x03\x50\x04\x00\x00') - -#~ abbrevtable = dwarfinfo.get_abbrev_table(95) -#~ print id(abbrevtable) -#~ pprint.pprint(abbrevtable._abbrev_map) - -#~ ab1 = abbrevtable.get_abbrev(2) -#~ print ab1.has_children() -#~ for name, form in ab1.iter_attr_specs(): - #~ print name, form - -#~ print dwarfinfo.get_abbrev_table(0).get_abbrev(1).has_children() - -#~ for cu in dwarfinfo._CU: - #~ print cu, cu.header - - - - -#~ print efile.get_section_by_name('.debug_info').name - -#~ print '===> %s segments!' % efile.num_segments() - -#~ for sec in efile.iter_sections(): - #~ print type(sec), sec.name - #~ if isinstance(sec, SymbolTableSection): - #~ print ' linked string table:', sec.stringtable.name - -#~ for seg in efile.iter_segments(): - #~ print type(seg), seg['p_type'], seg['p_offset'] - -#~ for sec in efile.iter_sections(): - #~ if isinstance(sec, SymbolTableSection): - #~ print 'symbol table "%s ~~~"' % sec.name - #~ for sym in sec.iter_symbols(): - #~ print '%-26s %s %s' % (sym.name, sym['st_info']['type'], sym['st_info']['bind'])