call frame instructions parsing kinda done. now need to run some tests: ++ write...
authorEli Bendersky <eliben@gmail.com>
Sat, 10 Dec 2011 07:57:28 +0000 (09:57 +0200)
committerEli Bendersky <eliben@gmail.com>
Sat, 10 Dec 2011 07:57:28 +0000 (09:57 +0200)
elftools/dwarf/callframe.py
elftools/dwarf/constants.py
elftools/dwarf/structs.py

index 280caa44810111a5c9e4c9e5d34c1cf049e02eb0..e352e52fc3a69617445d14880e39edd2678e3729 100644 (file)
@@ -6,8 +6,37 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
-from ..common.utils import (struct_parse)
+from collections import namedtuple
+
+from ..common.utils import (struct_parse, dwarf_assert)
 from .structs import DWARFStructs
+from .constants import * 
+
+
+CallFrameInstruction = namedtuple(
+    'CallFrameInstruction', 'opcode args')
+
+
+class CIE(object):
+    def __init__(self, header, instructions):
+        self.header = header
+        self.instructions = instructions
+
+    def __getitem__(self, name):
+        """ Implement dict-like access to header entries
+        """
+        return self.header[name]
+
+
+class FDE(object):
+    def __init__(self, header, instructions):
+        self.header = header
+        self.instructions = instructions
+
+    def __getitem__(self, name):
+        """ Implement dict-like access to header entries
+        """
+        return self.header[name]
 
 
 class CallFrameInfo(object):
@@ -17,8 +46,11 @@ class CallFrameInfo(object):
         self.base_structs = base_structs
 
     def _parse_entries(self):
+        entries = []
         offset = 0
         while offset < self.size:
+            # Throughout the body of this loop, offset keeps pointing to the
+            # beginning of the entry
             entry_length = struct_parse(
                 self.base_structs.Dwarf_uint32(''), self.stream, offset)
             dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32
@@ -28,8 +60,95 @@ class CallFrameInfo(object):
                 dwarf_format=dwarf_format,
                 address_size=self.base_structs.address_size)
 
-            # ZZZ: it will be easier to split entry reading:
-            # header: what comes before the instructions
-            # the instructions are parsed separately (their length is computed
-            # from the length and the tell() after parsing the header)
+            # Read the next field to see whether this is a CIE or FDE
+            CIE_id = struct_parse(
+                entry_structs.Dwarf_offset('').parse, self.stream)
+
+            is_CIE = (
+                dwarf_format == 32 and CIE_id = 0xFFFFFFFF or 
+                CIE_id == 0xFFFFFFFFFFFFFFFF)
+
+            if is_CIE:
+                header_struct = self.Dwarf_CIE_header
+            else:
+                header_struct = self.Dwarf_FDE_header
+
+            # Parse the header, which goes up to and including the
+            # return_address_register field
+            header = struct_parse(
+                header_struct, self.stream, offset)
+
+            # For convenience, compute the end offset for this entry
+            end_offset = (
+                offset + header.length - structs.initial_length_field_size())
+
+            # At this point self.stream is at the start of the instruction list
+            # for this entry
+            instructions = self._parse_instructions(
+                structs, self.stream.tell(), end_offset)
+
+            # ZZZ: for FDE's, I need some offset->CIE mapping cache stored
+
+    def _parse_instructions(self, structs, offset, end_offset):
+        """ Parse a list of CFI instructions from self.stream, starting with
+            the offset and until (not including) end_offset.
+            Return a list of CallFrameInstruction objects.
+        """
+        instructions = []
+        while offset < end_offset:
+            opcode = struct_parse(structs.Dwarf_uint8(''), self.stream, offset)
+            args = []
+
+            primary = opcode & 0b11000000
+            primary_arg = opcode & 0b00111111
+            if primary == DW_CFA_advance_loc:
+                args = [primary_arg]
+            elif primary == DW_CFA_offset:
+                args = [
+                    primary_arg,
+                    struct_parse(structs.Dwarf_uleb128(''), self.stream)]
+            elif primary == DW_CFA_restore:
+                args = [primary_arg]
+            # primary == 0 and real opcode is extended
+            elif opcode in (DW_CFA_nop, DW_CFA_remember_state,
+                            DW_CFA_restore_state):
+                args = []
+            elif opcode == DW_CFA_set_loc:
+                args = [
+                    struct_parse(structs.Dwarf_target_addr(''), self.stream)]
+            elif opcode == DW_CFA_advance_loc1:
+                args = [struct_parse(structs.Dwarf_uint8(''), self.stream)]
+            elif opcode == DW_CFA_advance_loc2:
+                args = [struct_parse(structs.Dwarf_uint16(''), self.stream)]
+            elif opcode == DW_CFA_advance_loc4:
+                args = [struct_parse(structs.Dwarf_uint32(''), self.stream)]
+            elif opcode in (DW_CFA_offset_extended, DW_CFA_register,
+                            DW_CFA_def_cfa, DW_CFA_val_offset):
+                args = [
+                    struct_parse(structs.Dwarf_uleb128(''), self.stream),
+                    struct_parse(structs.Dwarf_uleb128(''), self.stream)]
+            elif opcode in (DW_CFA_restore_extended, DW_CFA_undefined,
+                            DW_CFA_same_value, DW_CFA_def_cfa_register,
+                            DW_CFA_def_cfa_offset):
+                args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)]
+            elif opcode == DW_CFA_def_cfa_offset_sf:
+                args = [struct_parse(structs.Dwarf_sleb128(''), self.stream)]
+            elif opcode == DW_CFA_def_cfa_expression:
+                args = [struct_parse(
+                    structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
+            elif opcode in (DW_CFA_expression, DW_CFA_val_expression):
+                arsg = [
+                    struct_parse(structs.Dwarf_uleb128(''), self.stream),
+                    struct_parse(
+                        structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
+            elif opcode in (DW_CFA_offset_extended_sf,
+                            DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf):
+                args = [
+                    struct_parse(structs.Dwarf_uleb128(''), self.stream),
+                    struct_parse(structs.Dwarf_sleb128(''), self.stream)]
+            else:
+                dwarf_assert(False, 'Unknown CFI opcode: %s' % opcode)
+
+            instructions.append(CallFrameInstruction(opcode=opcode, args=args))
+            offset = self.stream.tell()
 
index ae7e6b2e4a7e5269059db4c865147925839eca5a..745bd6dc220378baa71432f7f13eb0825f7b0061 100644 (file)
 \r
 # Inline codes\r
 #\r
-DW_INL_not_inlined=0\r
-DW_INL_inlined=1\r
-DW_INL_declared_not_inlined=2\r
-DW_INL_declared_inlined=3\r
+DW_INL_not_inlined = 0\r
+DW_INL_inlined = 1\r
+DW_INL_declared_not_inlined = 2\r
+DW_INL_declared_inlined = 3\r
 \r
 \r
 # Source languages\r
 #\r
-DW_LANG_C89=0x0001\r
-DW_LANG_C=0x0002\r
-DW_LANG_Ada83=0x0003\r
-DW_LANG_C_plus_plus=0x0004\r
-DW_LANG_Cobol74=0x0005\r
-DW_LANG_Cobol85=0x0006\r
-DW_LANG_Fortran77=0x0007\r
-DW_LANG_Fortran90=0x0008\r
-DW_LANG_Pascal83=0x0009\r
-DW_LANG_Modula2=0x000a\r
-DW_LANG_Java=0x000b\r
-DW_LANG_C99=0x000c\r
-DW_LANG_Ada95=0x000d\r
-DW_LANG_Fortran95=0x000e\r
-DW_LANG_PLI=0x000f\r
-DW_LANG_ObjC=0x0010\r
-DW_LANG_ObjC_plus_plus=0x0011\r
-DW_LANG_UPC=0x0012\r
-DW_LANG_D=0x0013\r
-DW_LANG_Python=0x0014\r
-DW_LANG_Mips_Assembler=0x8001\r
-DW_LANG_Upc=0x8765\r
-DW_LANG_HP_Bliss=0x8003\r
-DW_LANG_HP_Basic91=0x8004\r
-DW_LANG_HP_Pascal91=0x8005\r
-DW_LANG_HP_IMacro=0x8006\r
-DW_LANG_HP_Assembler=0x8007\r
+DW_LANG_C89 = 0x0001\r
+DW_LANG_C = 0x0002\r
+DW_LANG_Ada83 = 0x0003\r
+DW_LANG_C_plus_plus = 0x0004\r
+DW_LANG_Cobol74 = 0x0005\r
+DW_LANG_Cobol85 = 0x0006\r
+DW_LANG_Fortran77 = 0x0007\r
+DW_LANG_Fortran90 = 0x0008\r
+DW_LANG_Pascal83 = 0x0009\r
+DW_LANG_Modula2 = 0x000a\r
+DW_LANG_Java = 0x000b\r
+DW_LANG_C99 = 0x000c\r
+DW_LANG_Ada95 = 0x000d\r
+DW_LANG_Fortran95 = 0x000e\r
+DW_LANG_PLI = 0x000f\r
+DW_LANG_ObjC = 0x0010\r
+DW_LANG_ObjC_plus_plus = 0x0011\r
+DW_LANG_UPC = 0x0012\r
+DW_LANG_D = 0x0013\r
+DW_LANG_Python = 0x0014\r
+DW_LANG_Mips_Assembler = 0x8001\r
+DW_LANG_Upc = 0x8765\r
+DW_LANG_HP_Bliss = 0x8003\r
+DW_LANG_HP_Basic91 = 0x8004\r
+DW_LANG_HP_Pascal91 = 0x8005\r
+DW_LANG_HP_IMacro = 0x8006\r
+DW_LANG_HP_Assembler = 0x8007\r
 \r
 \r
 # Encoding\r
 #\r
-DW_ATE_void=0x0\r
-DW_ATE_address=0x1\r
-DW_ATE_boolean=0x2\r
-DW_ATE_complex_float=0x3\r
-DW_ATE_float=0x4\r
-DW_ATE_signed=0x5\r
-DW_ATE_signed_char=0x6\r
-DW_ATE_unsigned=0x7\r
-DW_ATE_unsigned_char=0x8\r
-DW_ATE_imaginary_float=0x9\r
-DW_ATE_packed_decimal=0xa\r
-DW_ATE_numeric_string=0xb\r
-DW_ATE_edited=0xc\r
-DW_ATE_signed_fixed=0xd\r
-DW_ATE_unsigned_fixed=0xe\r
-DW_ATE_decimal_float=0xf\r
-DW_ATE_UTF=0x10\r
-DW_ATE_lo_user=0x80\r
-DW_ATE_hi_user=0xff\r
-DW_ATE_HP_float80=0x80\r
-DW_ATE_HP_complex_float80=0x81\r
-DW_ATE_HP_float128=0x82\r
-DW_ATE_HP_complex_float128=0x83\r
-DW_ATE_HP_floathpintel=0x84\r
-DW_ATE_HP_imaginary_float80=0x85\r
-DW_ATE_HP_imaginary_float128=0x86\r
+DW_ATE_void = 0x0\r
+DW_ATE_address = 0x1\r
+DW_ATE_boolean = 0x2\r
+DW_ATE_complex_float = 0x3\r
+DW_ATE_float = 0x4\r
+DW_ATE_signed = 0x5\r
+DW_ATE_signed_char = 0x6\r
+DW_ATE_unsigned = 0x7\r
+DW_ATE_unsigned_char = 0x8\r
+DW_ATE_imaginary_float = 0x9\r
+DW_ATE_packed_decimal = 0xa\r
+DW_ATE_numeric_string = 0xb\r
+DW_ATE_edited = 0xc\r
+DW_ATE_signed_fixed = 0xd\r
+DW_ATE_unsigned_fixed = 0xe\r
+DW_ATE_decimal_float = 0xf\r
+DW_ATE_UTF = 0x10\r
+DW_ATE_lo_user = 0x80\r
+DW_ATE_hi_user = 0xff\r
+DW_ATE_HP_float80 = 0x80\r
+DW_ATE_HP_complex_float80 = 0x81\r
+DW_ATE_HP_float128 = 0x82\r
+DW_ATE_HP_complex_float128 = 0x83\r
+DW_ATE_HP_floathpintel = 0x84\r
+DW_ATE_HP_imaginary_float80 = 0x85\r
+DW_ATE_HP_imaginary_float128 = 0x86\r
 \r
 \r
 # Access\r
 #\r
-DW_ACCESS_public=1\r
-DW_ACCESS_protected=2\r
-DW_ACCESS_private=3\r
+DW_ACCESS_public = 1\r
+DW_ACCESS_protected = 2\r
+DW_ACCESS_private = 3\r
 \r
 \r
 # Visibility\r
 #\r
-DW_VIS_local=1\r
-DW_VIS_exported=2\r
-DW_VIS_qualified=3\r
+DW_VIS_local = 1\r
+DW_VIS_exported = 2\r
+DW_VIS_qualified = 3\r
 \r
 \r
 # Virtuality\r
 #\r
-DW_VIRTUALITY_none=0\r
-DW_VIRTUALITY_virtual=1\r
-DW_VIRTUALITY_pure_virtual=2\r
+DW_VIRTUALITY_none = 0\r
+DW_VIRTUALITY_virtual = 1\r
+DW_VIRTUALITY_pure_virtual = 2\r
 \r
 \r
 # ID case\r
 #\r
-DW_ID_case_sensitive=0\r
-DW_ID_up_case=1\r
-DW_ID_down_case=2\r
-DW_ID_case_insensitive=3\r
+DW_ID_case_sensitive = 0\r
+DW_ID_up_case = 1\r
+DW_ID_down_case = 2\r
+DW_ID_case_insensitive = 3\r
 \r
 \r
 # Calling convention\r
 #\r
-DW_CC_normal=0x1\r
-DW_CC_program=0x2\r
-DW_CC_nocall=0x3\r
+DW_CC_normal = 0x1\r
+DW_CC_program = 0x2\r
+DW_CC_nocall = 0x3\r
 \r
 \r
 # Ordering\r
 #\r
-DW_ORD_row_major=0\r
-DW_ORD_col_major=1\r
+DW_ORD_row_major = 0\r
+DW_ORD_col_major = 1\r
 \r
 \r
 # Line program opcodes\r
@@ -136,3 +136,34 @@ DW_LNE_end_sequence = 0x01
 DW_LNE_set_address = 0x02\r
 DW_LNE_define_file = 0x03\r
 \r
+\r
+# Call frame instructions\r
+#\r
+DW_CFA_advance_loc = 0b01000000\r
+DW_CFA_offset = 0b10000000\r
+DW_CFA_restore = 0b11000000\r
+DW_CFA_nop = 0x00\r
+DW_CFA_set_loc = 0x01\r
+DW_CFA_advance_loc1 = 0x02\r
+DW_CFA_advance_loc2 = 0x03\r
+DW_CFA_advance_loc4 = 0x04\r
+DW_CFA_offset_extended = 0x05\r
+DW_CFA_restore_extended = 0x06\r
+DW_CFA_undefined = 0x07\r
+DW_CFA_same_value = 0x08\r
+DW_CFA_register = 0x09\r
+DW_CFA_remember_state = 0x0a\r
+DW_CFA_restore_state = 0x0b\r
+DW_CFA_def_cfa = 0x0c\r
+DW_CFA_def_cfa_register = 0x0d\r
+DW_CFA_def_cfa_offset = 0x0e\r
+DW_CFA_def_cfa_expression = 0x0f\r
+DW_CFA_expression = 0x10\r
+DW_CFA_offset_extended_sf = 0x11\r
+DW_CFA_def_cfa_sf = 0x12\r
+DW_CFA_def_cfa_offset_sf = 0x13\r
+DW_CFA_val_offset = 0x14\r
+DW_CFA_val_offset_sf = 0x15\r
+DW_CFA_val_expression = 0x16\r
+\r
+\r
index d25178c683bbb54bc8186c5c0c8d3e5dcbc8922d..10154ae2471fcb5f397b99f36eece643a803d52f 100644 (file)
@@ -126,6 +126,7 @@ class DWARFStructs(object):
         self._create_abbrev_declaration()
         self._create_dw_form()
         self._create_lineprog_header()
+        self._create_callframe_entry_headers()
 
     def _create_initial_length(self):
         def _InitialLength(name):