From 3501f9f21a6efd296d1f09cae9fe603d75b0db4d Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 9 Dec 2011 12:29:06 +0200 Subject: [PATCH] Implement register name decoding in descriptions. This also allows to remove the hack in run_readelf_tests for ignoring differences in reg names (previously missing) --- elftools/dwarf/descriptions.py | 116 ++++++++++++++++++++++++++++++++- elftools/dwarf/dwarf_expr.py | 54 +-------------- scripts/readelf.py | 5 +- tests/run_readelf_tests.py | 9 --- tests/test_dwarf_expr.py | 15 ++++- 5 files changed, 133 insertions(+), 66 deletions(-) diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 4f88bf3..f7afcd8 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -9,11 +9,16 @@ from collections import defaultdict from .constants import * -from .dwarf_expr import ExprDumper +from .dwarf_expr import GenericExprVisitor from .die import DIE from ..common.utils import preserve_stream_pos +def set_global_machine_arch(machine_arch): + global _MACHINE_ARCH + _MACHINE_ARCH = machine_arch + + def describe_attr_value(attr, die, section_offset): """ Given an attribute attr, return the textual representation of its value, suitable for tools like readelf. @@ -32,8 +37,24 @@ def describe_attr_value(attr, die, section_offset): return str(val_description) + '\t' + extra_info +def describe_reg_name(regnum, machine_arch): + """ Provide a textual description for a register name, given its serial + number. The number is expected to be valid. + """ + if machine_arch == 'x86': + return _REG_NAMES_x86[regnum] + elif machine_arch == 'x64': + return _REG_NAMES_x64[regnum] + else: + return '' + #------------------------------------------------------------------------------- +# The machine architecture. Set globally via set_global_machine_arch +# +_MACHINE_ARCH = None + + def _describe_attr_ref(attr, die, section_offset): return '<0x%x>' % (attr.value + die.cu.cu_offset) @@ -301,4 +322,97 @@ _EXTRA_INFO_DESCRIPTION_MAP = defaultdict( DW_AT_import=_import_extra, ) +# 8 in a line, for easier counting +_REG_NAMES_x86 = [ + 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi', + 'eip', 'eflags', '', 'st0', 'st1', 'st2', 'st3', 'st4', + 'st5', 'st6', 'st7', '', '', 'xmm0', 'xmm1', 'xmm2', + 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'mm0', 'mm1', 'mm2', + 'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'fcw', 'fsw', 'mxcsr', + 'es', 'cs', 'ss', 'ds', 'fs', 'gs', '', '', 'tr', 'ldtr' +] + +_REG_NAMES_x64 = [ + 'rax', 'rdx', 'rcx', 'rbx', 'rsi', 'rdi', 'rbp', 'rsp', + 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15', + 'rip', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6', + 'xmm7', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14', + 'xmm15', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6', + 'st7', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', + 'mm7', 'rflags', 'es', 'cs', 'ss', 'ds', 'fs', 'gs', + '', '', 'fs.base', 'gs.base', '', '', 'tr', 'ldtr', + 'mxcsr', 'fcw', 'fsw' +] + + +class ExprDumper(GenericExprVisitor): + """ A concrete visitor for DWARF expressions that dumps a textual + representation of the complete expression. + + Usage: after creation, call process_expr, and then get_str for a + semicolon-delimited string representation of the decoded expression. + """ + def __init__(self, structs): + super(ExprDumper, self).__init__(structs) + self._init_lookups() + self._str_parts = [] + + def clear(self): + self._str_parts = [] + + def get_str(self): + return '; '.join(self._str_parts) + + def _init_lookups(self): + self._ops_with_decimal_arg = set([ + 'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s', + 'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_constu', 'DW_OP_consts', + 'DW_OP_pick', 'DW_OP_plus_uconst', 'DW_OP_bra', 'DW_OP_skip', + 'DW_OP_fbreg', 'DW_OP_piece', 'DW_OP_deref_size', + 'DW_OP_xderef_size', 'DW_OP_regx',]) + + for n in range(0, 32): + self._ops_with_decimal_arg.add('DW_OP_breg%s' % n) + + self._ops_with_two_decimal_args = set([ + 'DW_OP_const8u', 'DW_OP_const8s', 'DW_OP_bregx', 'DW_OP_bit_piece']) + + self._ops_with_hex_arg = set( + ['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref']) + + def _after_visit(self, opcode, opcode_name, args): + self._str_parts.append(self._dump_to_string(opcode, opcode_name, args)) + + def _dump_to_string(self, opcode, opcode_name, args): + if len(args) == 0: + if opcode_name.startswith('DW_OP_reg'): + regnum = int(opcode_name[9:]) + return '%s (%s)' % ( + opcode_name, + describe_reg_name(regnum, _MACHINE_ARCH)) + else: + return opcode_name + elif opcode_name in self._ops_with_decimal_arg: + if opcode_name.startswith('DW_OP_breg'): + regnum = int(opcode_name[10:]) + return '%s (%s): %s' % ( + opcode_name, + describe_reg_name(regnum, _MACHINE_ARCH), + args[0]) + elif opcode_name.endswith('regx'): + # applies to both regx and bregx + return '%s: %s (%s)' % ( + opcode_name, + args[0], + describe_reg_name(args[0], _MACHINE_ARCH)) + else: + return '%s: %s' % (opcode_name, args[0]) + elif opcode_name in self._ops_with_hex_arg: + return '%s: %x' % (opcode_name, args[0]) + elif opcode_name in self._ops_with_two_decimal_args: + return '%s: %s %s' % (opcode_name, args[0], args[1]) + else: + return '' % opcode_name + + diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index 432f454..eccd486 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -100,7 +100,7 @@ class GenericExprVisitor(object): the visitor then just execute process_expr. The subclass can keep its own internal information updated in _after_visit and provide methods to extract it. For a good example of this usage, see the - ExprDumper class in this module. + ExprDumper class in the descriptions module. A more complex usage could be to override visiting methods for specific instructions, by placing them into the dispatch table. @@ -256,55 +256,3 @@ class GenericExprVisitor(object): self._make_visitor_arg_struct(self.structs.Dwarf_offset(''))) -class ExprDumper(GenericExprVisitor): - """ A concrete visitor for DWARF expressions that dumps a textual - representation of the complete expression. - - Usage: after creation, call process_expr, and then get_str for a - semicolon-delimited string representation of the decoded expression. - """ - def __init__(self, structs): - super(ExprDumper, self).__init__(structs) - self._init_lookups() - self._str_parts = [] - - def clear(self): - self._str_parts = [] - - def get_str(self): - return '; '.join(self._str_parts) - - def _init_lookups(self): - self._ops_with_decimal_arg = set([ - 'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s', - 'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_constu', 'DW_OP_consts', - 'DW_OP_pick', 'DW_OP_plus_uconst', 'DW_OP_bra', 'DW_OP_skip', - 'DW_OP_fbreg', 'DW_OP_piece', 'DW_OP_deref_size', - 'DW_OP_xderef_size', 'DW_OP_regx',]) - - for n in range(0, 32): - self._ops_with_decimal_arg.add('DW_OP_breg%s' % n) - - self._ops_with_two_decimal_args = set([ - 'DW_OP_const8u', 'DW_OP_const8s', 'DW_OP_bregx', 'DW_OP_bit_piece']) - - self._ops_with_hex_arg = set( - ['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref']) - - def _after_visit(self, opcode, opcode_name, args): - self._str_parts.append(self._dump_to_string(opcode, opcode_name, args)) - - def _dump_to_string(self, opcode, opcode_name, args): - if len(args) == 0: - return opcode_name - elif opcode_name in self._ops_with_decimal_arg: - return '%s: %s' % (opcode_name, args[0]) - elif opcode_name in self._ops_with_hex_arg: - return '%s: %x' % (opcode_name, args[0]) - elif opcode_name in self._ops_with_two_decimal_args: - return '%s: %s %s' % (opcode_name, args[0], args[1]) - else: - return '' % opcode_name - - - diff --git a/scripts/readelf.py b/scripts/readelf.py index c11a197..335d5ee 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -35,7 +35,8 @@ from elftools.elf.descriptions import ( describe_symbol_shndx, describe_reloc_type, ) from elftools.dwarf.dwarfinfo import DWARFInfo -from elftools.dwarf.descriptions import describe_attr_value +from elftools.dwarf.descriptions import ( + describe_attr_value, set_global_machine_arch) from elftools.dwarf.constants import ( DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file) @@ -425,6 +426,8 @@ class ReadElf(object): if self._dwarfinfo is None: return + set_global_machine_arch(self.elffile.get_machine_arch()) + if dump_what == 'info': self._dump_debug_info() elif dump_what == 'decodedline': diff --git a/tests/run_readelf_tests.py b/tests/run_readelf_tests.py index af2efe8..7386e03 100755 --- a/tests/run_readelf_tests.py +++ b/tests/run_readelf_tests.py @@ -117,15 +117,6 @@ def compare_output(s1, s2): if ( len(changes) == 2 and changes[1][0] == 'delete' and lines1[i][changes[1][1]] == '@'): ok = True - elif 'dw_op' in lines1[i] and 'reg' in lines1[i]: - # readelf decodes register names, we don't do that. - no_worries = False - for change in changes: - if ( change[0] == 'delete' and - re.search('\(\w+', lines1[i][change[1]:change[2]])): - no_worries = True - if no_worries: - ok = True else: for s in ('t (tls)', 'l (large)'): if s in lines1[i] or s in lines2[i]: diff --git a/tests/test_dwarf_expr.py b/tests/test_dwarf_expr.py index 67cfc48..8e293db 100644 --- a/tests/test_dwarf_expr.py +++ b/tests/test_dwarf_expr.py @@ -2,7 +2,7 @@ import sys, unittest from cStringIO import StringIO sys.path.extend(('..', '.')) -from elftools.dwarf.dwarf_expr import ExprDumper +from elftools.dwarf.descriptions import ExprDumper, set_global_machine_arch from elftools.dwarf.structs import DWARFStructs @@ -14,6 +14,7 @@ class TestExprDumper(unittest.TestCase): def setUp(self): self.visitor = ExprDumper(self.structs32) + set_global_machine_arch('x64') def test_basic_single(self): self.visitor.process_expr([0x1b]) @@ -23,13 +24,23 @@ class TestExprDumper(unittest.TestCase): self.setUp() self.visitor.process_expr([0x74, 0x82, 0x01]) self.assertEqual(self.visitor.get_str(), - 'DW_OP_breg4: 130') + 'DW_OP_breg4 (rsi): 130') self.setUp() self.visitor.process_expr([0x91, 0x82, 0x01]) self.assertEqual(self.visitor.get_str(), 'DW_OP_fbreg: 130') + self.setUp() + self.visitor.process_expr([0x51]) + self.assertEqual(self.visitor.get_str(), + 'DW_OP_reg1 (rdx)') + + self.setUp() + self.visitor.process_expr([0x90, 16]) + self.assertEqual(self.visitor.get_str(), + 'DW_OP_regx: 16 (rip)') + self.setUp() self.visitor.process_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]) self.assertEqual(self.visitor.get_str(), -- 2.30.2