From cf93b8679522034c4d761a92d942ef48fa5bbc9c Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Sat, 10 Dec 2011 09:57:28 +0200 Subject: [PATCH] call frame instructions parsing kinda done. now need to run some tests: ++ write unit tests for this --- elftools/dwarf/callframe.py | 129 ++++++++++++++++++++++++- elftools/dwarf/constants.py | 181 +++++++++++++++++++++--------------- elftools/dwarf/structs.py | 1 + 3 files changed, 231 insertions(+), 80 deletions(-) diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 280caa4..e352e52 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -6,8 +6,37 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from ..common.utils import (struct_parse) +from collections import namedtuple + +from ..common.utils import (struct_parse, dwarf_assert) from .structs import DWARFStructs +from .constants import * + + +CallFrameInstruction = namedtuple( + 'CallFrameInstruction', 'opcode args') + + +class CIE(object): + def __init__(self, header, instructions): + self.header = header + self.instructions = instructions + + def __getitem__(self, name): + """ Implement dict-like access to header entries + """ + return self.header[name] + + +class FDE(object): + def __init__(self, header, instructions): + self.header = header + self.instructions = instructions + + def __getitem__(self, name): + """ Implement dict-like access to header entries + """ + return self.header[name] class CallFrameInfo(object): @@ -17,8 +46,11 @@ class CallFrameInfo(object): self.base_structs = base_structs def _parse_entries(self): + entries = [] offset = 0 while offset < self.size: + # Throughout the body of this loop, offset keeps pointing to the + # beginning of the entry entry_length = struct_parse( self.base_structs.Dwarf_uint32(''), self.stream, offset) dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32 @@ -28,8 +60,95 @@ class CallFrameInfo(object): dwarf_format=dwarf_format, address_size=self.base_structs.address_size) - # ZZZ: it will be easier to split entry reading: - # header: what comes before the instructions - # the instructions are parsed separately (their length is computed - # from the length and the tell() after parsing the header) + # Read the next field to see whether this is a CIE or FDE + CIE_id = struct_parse( + entry_structs.Dwarf_offset('').parse, self.stream) + + is_CIE = ( + dwarf_format == 32 and CIE_id = 0xFFFFFFFF or + CIE_id == 0xFFFFFFFFFFFFFFFF) + + if is_CIE: + header_struct = self.Dwarf_CIE_header + else: + header_struct = self.Dwarf_FDE_header + + # Parse the header, which goes up to and including the + # return_address_register field + header = struct_parse( + header_struct, self.stream, offset) + + # For convenience, compute the end offset for this entry + end_offset = ( + offset + header.length - structs.initial_length_field_size()) + + # At this point self.stream is at the start of the instruction list + # for this entry + instructions = self._parse_instructions( + structs, self.stream.tell(), end_offset) + + # ZZZ: for FDE's, I need some offset->CIE mapping cache stored + + def _parse_instructions(self, structs, offset, end_offset): + """ Parse a list of CFI instructions from self.stream, starting with + the offset and until (not including) end_offset. + Return a list of CallFrameInstruction objects. + """ + instructions = [] + while offset < end_offset: + opcode = struct_parse(structs.Dwarf_uint8(''), self.stream, offset) + args = [] + + primary = opcode & 0b11000000 + primary_arg = opcode & 0b00111111 + if primary == DW_CFA_advance_loc: + args = [primary_arg] + elif primary == DW_CFA_offset: + args = [ + primary_arg, + struct_parse(structs.Dwarf_uleb128(''), self.stream)] + elif primary == DW_CFA_restore: + args = [primary_arg] + # primary == 0 and real opcode is extended + elif opcode in (DW_CFA_nop, DW_CFA_remember_state, + DW_CFA_restore_state): + args = [] + elif opcode == DW_CFA_set_loc: + args = [ + struct_parse(structs.Dwarf_target_addr(''), self.stream)] + elif opcode == DW_CFA_advance_loc1: + args = [struct_parse(structs.Dwarf_uint8(''), self.stream)] + elif opcode == DW_CFA_advance_loc2: + args = [struct_parse(structs.Dwarf_uint16(''), self.stream)] + elif opcode == DW_CFA_advance_loc4: + args = [struct_parse(structs.Dwarf_uint32(''), self.stream)] + elif opcode in (DW_CFA_offset_extended, DW_CFA_register, + DW_CFA_def_cfa, DW_CFA_val_offset): + args = [ + struct_parse(structs.Dwarf_uleb128(''), self.stream), + struct_parse(structs.Dwarf_uleb128(''), self.stream)] + elif opcode in (DW_CFA_restore_extended, DW_CFA_undefined, + DW_CFA_same_value, DW_CFA_def_cfa_register, + DW_CFA_def_cfa_offset): + args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)] + elif opcode == DW_CFA_def_cfa_offset_sf: + args = [struct_parse(structs.Dwarf_sleb128(''), self.stream)] + elif opcode == DW_CFA_def_cfa_expression: + args = [struct_parse( + structs.Dwarf_dw_form['DW_FORM_block'], self.stream)] + elif opcode in (DW_CFA_expression, DW_CFA_val_expression): + arsg = [ + struct_parse(structs.Dwarf_uleb128(''), self.stream), + struct_parse( + structs.Dwarf_dw_form['DW_FORM_block'], self.stream)] + elif opcode in (DW_CFA_offset_extended_sf, + DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf): + args = [ + struct_parse(structs.Dwarf_uleb128(''), self.stream), + struct_parse(structs.Dwarf_sleb128(''), self.stream)] + else: + dwarf_assert(False, 'Unknown CFI opcode: %s' % opcode) + + instructions.append(CallFrameInstruction(opcode=opcode, args=args)) + offset = self.stream.tell() diff --git a/elftools/dwarf/constants.py b/elftools/dwarf/constants.py index ae7e6b2..745bd6d 100644 --- a/elftools/dwarf/constants.py +++ b/elftools/dwarf/constants.py @@ -9,113 +9,113 @@ # Inline codes # -DW_INL_not_inlined=0 -DW_INL_inlined=1 -DW_INL_declared_not_inlined=2 -DW_INL_declared_inlined=3 +DW_INL_not_inlined = 0 +DW_INL_inlined = 1 +DW_INL_declared_not_inlined = 2 +DW_INL_declared_inlined = 3 # Source languages # -DW_LANG_C89=0x0001 -DW_LANG_C=0x0002 -DW_LANG_Ada83=0x0003 -DW_LANG_C_plus_plus=0x0004 -DW_LANG_Cobol74=0x0005 -DW_LANG_Cobol85=0x0006 -DW_LANG_Fortran77=0x0007 -DW_LANG_Fortran90=0x0008 -DW_LANG_Pascal83=0x0009 -DW_LANG_Modula2=0x000a -DW_LANG_Java=0x000b -DW_LANG_C99=0x000c -DW_LANG_Ada95=0x000d -DW_LANG_Fortran95=0x000e -DW_LANG_PLI=0x000f -DW_LANG_ObjC=0x0010 -DW_LANG_ObjC_plus_plus=0x0011 -DW_LANG_UPC=0x0012 -DW_LANG_D=0x0013 -DW_LANG_Python=0x0014 -DW_LANG_Mips_Assembler=0x8001 -DW_LANG_Upc=0x8765 -DW_LANG_HP_Bliss=0x8003 -DW_LANG_HP_Basic91=0x8004 -DW_LANG_HP_Pascal91=0x8005 -DW_LANG_HP_IMacro=0x8006 -DW_LANG_HP_Assembler=0x8007 +DW_LANG_C89 = 0x0001 +DW_LANG_C = 0x0002 +DW_LANG_Ada83 = 0x0003 +DW_LANG_C_plus_plus = 0x0004 +DW_LANG_Cobol74 = 0x0005 +DW_LANG_Cobol85 = 0x0006 +DW_LANG_Fortran77 = 0x0007 +DW_LANG_Fortran90 = 0x0008 +DW_LANG_Pascal83 = 0x0009 +DW_LANG_Modula2 = 0x000a +DW_LANG_Java = 0x000b +DW_LANG_C99 = 0x000c +DW_LANG_Ada95 = 0x000d +DW_LANG_Fortran95 = 0x000e +DW_LANG_PLI = 0x000f +DW_LANG_ObjC = 0x0010 +DW_LANG_ObjC_plus_plus = 0x0011 +DW_LANG_UPC = 0x0012 +DW_LANG_D = 0x0013 +DW_LANG_Python = 0x0014 +DW_LANG_Mips_Assembler = 0x8001 +DW_LANG_Upc = 0x8765 +DW_LANG_HP_Bliss = 0x8003 +DW_LANG_HP_Basic91 = 0x8004 +DW_LANG_HP_Pascal91 = 0x8005 +DW_LANG_HP_IMacro = 0x8006 +DW_LANG_HP_Assembler = 0x8007 # Encoding # -DW_ATE_void=0x0 -DW_ATE_address=0x1 -DW_ATE_boolean=0x2 -DW_ATE_complex_float=0x3 -DW_ATE_float=0x4 -DW_ATE_signed=0x5 -DW_ATE_signed_char=0x6 -DW_ATE_unsigned=0x7 -DW_ATE_unsigned_char=0x8 -DW_ATE_imaginary_float=0x9 -DW_ATE_packed_decimal=0xa -DW_ATE_numeric_string=0xb -DW_ATE_edited=0xc -DW_ATE_signed_fixed=0xd -DW_ATE_unsigned_fixed=0xe -DW_ATE_decimal_float=0xf -DW_ATE_UTF=0x10 -DW_ATE_lo_user=0x80 -DW_ATE_hi_user=0xff -DW_ATE_HP_float80=0x80 -DW_ATE_HP_complex_float80=0x81 -DW_ATE_HP_float128=0x82 -DW_ATE_HP_complex_float128=0x83 -DW_ATE_HP_floathpintel=0x84 -DW_ATE_HP_imaginary_float80=0x85 -DW_ATE_HP_imaginary_float128=0x86 +DW_ATE_void = 0x0 +DW_ATE_address = 0x1 +DW_ATE_boolean = 0x2 +DW_ATE_complex_float = 0x3 +DW_ATE_float = 0x4 +DW_ATE_signed = 0x5 +DW_ATE_signed_char = 0x6 +DW_ATE_unsigned = 0x7 +DW_ATE_unsigned_char = 0x8 +DW_ATE_imaginary_float = 0x9 +DW_ATE_packed_decimal = 0xa +DW_ATE_numeric_string = 0xb +DW_ATE_edited = 0xc +DW_ATE_signed_fixed = 0xd +DW_ATE_unsigned_fixed = 0xe +DW_ATE_decimal_float = 0xf +DW_ATE_UTF = 0x10 +DW_ATE_lo_user = 0x80 +DW_ATE_hi_user = 0xff +DW_ATE_HP_float80 = 0x80 +DW_ATE_HP_complex_float80 = 0x81 +DW_ATE_HP_float128 = 0x82 +DW_ATE_HP_complex_float128 = 0x83 +DW_ATE_HP_floathpintel = 0x84 +DW_ATE_HP_imaginary_float80 = 0x85 +DW_ATE_HP_imaginary_float128 = 0x86 # Access # -DW_ACCESS_public=1 -DW_ACCESS_protected=2 -DW_ACCESS_private=3 +DW_ACCESS_public = 1 +DW_ACCESS_protected = 2 +DW_ACCESS_private = 3 # Visibility # -DW_VIS_local=1 -DW_VIS_exported=2 -DW_VIS_qualified=3 +DW_VIS_local = 1 +DW_VIS_exported = 2 +DW_VIS_qualified = 3 # Virtuality # -DW_VIRTUALITY_none=0 -DW_VIRTUALITY_virtual=1 -DW_VIRTUALITY_pure_virtual=2 +DW_VIRTUALITY_none = 0 +DW_VIRTUALITY_virtual = 1 +DW_VIRTUALITY_pure_virtual = 2 # ID case # -DW_ID_case_sensitive=0 -DW_ID_up_case=1 -DW_ID_down_case=2 -DW_ID_case_insensitive=3 +DW_ID_case_sensitive = 0 +DW_ID_up_case = 1 +DW_ID_down_case = 2 +DW_ID_case_insensitive = 3 # Calling convention # -DW_CC_normal=0x1 -DW_CC_program=0x2 -DW_CC_nocall=0x3 +DW_CC_normal = 0x1 +DW_CC_program = 0x2 +DW_CC_nocall = 0x3 # Ordering # -DW_ORD_row_major=0 -DW_ORD_col_major=1 +DW_ORD_row_major = 0 +DW_ORD_col_major = 1 # Line program opcodes @@ -136,3 +136,34 @@ DW_LNE_end_sequence = 0x01 DW_LNE_set_address = 0x02 DW_LNE_define_file = 0x03 + +# Call frame instructions +# +DW_CFA_advance_loc = 0b01000000 +DW_CFA_offset = 0b10000000 +DW_CFA_restore = 0b11000000 +DW_CFA_nop = 0x00 +DW_CFA_set_loc = 0x01 +DW_CFA_advance_loc1 = 0x02 +DW_CFA_advance_loc2 = 0x03 +DW_CFA_advance_loc4 = 0x04 +DW_CFA_offset_extended = 0x05 +DW_CFA_restore_extended = 0x06 +DW_CFA_undefined = 0x07 +DW_CFA_same_value = 0x08 +DW_CFA_register = 0x09 +DW_CFA_remember_state = 0x0a +DW_CFA_restore_state = 0x0b +DW_CFA_def_cfa = 0x0c +DW_CFA_def_cfa_register = 0x0d +DW_CFA_def_cfa_offset = 0x0e +DW_CFA_def_cfa_expression = 0x0f +DW_CFA_expression = 0x10 +DW_CFA_offset_extended_sf = 0x11 +DW_CFA_def_cfa_sf = 0x12 +DW_CFA_def_cfa_offset_sf = 0x13 +DW_CFA_val_offset = 0x14 +DW_CFA_val_offset_sf = 0x15 +DW_CFA_val_expression = 0x16 + + diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index d25178c..10154ae 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -126,6 +126,7 @@ class DWARFStructs(object): self._create_abbrev_declaration() self._create_dw_form() self._create_lineprog_header() + self._create_callframe_entry_headers() def _create_initial_length(self): def _InitialLength(name): -- 2.30.2