Started adding abbrev table stuff
authoreliben <devnull@localhost>
Tue, 20 Sep 2011 09:57:01 +0000 (12:57 +0300)
committereliben <devnull@localhost>
Tue, 20 Sep 2011 09:57:01 +0000 (12:57 +0300)
elftools/dwarf/abbrevtable.py [new file with mode: 0644]
elftools/dwarf/compileunit.py
elftools/dwarf/dwarfinfo.py
elftools/dwarf/enums.py [new file with mode: 0644]
elftools/dwarf/structs.py

diff --git a/elftools/dwarf/abbrevtable.py b/elftools/dwarf/abbrevtable.py
new file mode 100644 (file)
index 0000000..63485bd
--- /dev/null
@@ -0,0 +1,16 @@
+#-------------------------------------------------------------------------------
+# elftools: dwarf/abbrevtable.py
+#
+# DWARF abbreviation table
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+
+
+class AbbrevTable(object):
+    def __init__(self, structs, stream):
+        self.structs = structs
+        self.stream = stream
+
+
index a22b761ab25c368a3cc2c4da0ecbca3a3f4d444c..6eb5afa18826d87417058cd6d8a3f17cdb095387 100644 (file)
@@ -10,9 +10,7 @@
 
 class CompileUnit(object):
     def __init__(self, dwarfinfo, header, structs):
-        """ Arguments:
-            
-            dwarfinfo:
+        """ dwarfinfo:
                 The DWARFInfo context object which created this one
             
             header:
@@ -24,7 +22,6 @@ class CompileUnit(object):
         self.dwarfinfo = dwarfinfo
         self.header = header
         self.structs = structs
-        self.cu_die = cu_die
     
     def __getitem__(self, name):
         """ Implement dict-like access to header entries
index af9c7add031f001c2f5730e7e6435dfaf36d0e7c..f49458fa8b7bff6061a7dab6ada4607c56a525ee 100644 (file)
@@ -12,6 +12,7 @@ from ..common.exceptions import DWARFError
 from ..common.utils import struct_parse, dwarf_assert
 from .structs import DWARFStructs
 from .compileunit import CompileUnit
+from .abbrevtable import AbbrevTable
 
 
 # Describes a debug section in a stream: offset and size
@@ -20,18 +21,26 @@ DebugSectionLocator = namedtuple('DebugSectionLocator', 'offset size')
 
 
 class DWARFInfo(object):
-    """ Creation: the constructor accepts a stream (file-like object) that
-        contains debug sections, along with locators (DebugSectionLocator)
-        of the required sections. In addition, little_endian is a boolean
-        parameter specifying endianity.
+    """ Acts also as a "context" to other major objects, bridging between 
+        various parts of the debug infromation.
     """
-    def __init__(self, 
+    def __init__(self,
             stream,
             little_endian,
             debug_info_loc,
             debug_abbrev_loc,
             debug_str_loc,
             debug_line_loc):
+        """ stream: 
+                A stream (file-like object) that contains debug sections
+            
+            little_endian:
+                Section contents are in little-endian data format
+            
+            debug_*_loc:
+                DebugSectionLocator for this section, specifying where it can
+                be found in the stream
+        """
         self.stream = stream
         self.debug_info_loc = debug_info_loc
         self.debug_abbrev_loc = debug_abbrev_loc
@@ -46,6 +55,32 @@ class DWARFInfo(object):
         
         # Populate the list with CUs found in debug_info
         self._CU = self._parse_CUs()
+        
+        # Cache for abbrev tables: a dict keyed by offset
+        self._abbrevtable_cache = {}
+    
+    def get_abbrev_table(self, offset):
+        """ Get an AbbrevTable from the given offset in the debug_abbrev
+            section.
+            
+            The only verification done on the offset is that it's within the
+            bounds of the section (if not, an exception is raised).
+            It is the caller's responsibility to make sure the offset actually
+            points to a valid abbreviation table.
+            
+            AbbrevTable objects are cached internally (two calls for the same
+            offset will return the same object).
+        """
+        section_boundary = self.debug_abbrev_loc.offset + self.debug_abbrev_loc.size
+        dwarf_assert(
+            self.debug_abbrev_loc.offset <= offset < section_boundary,
+            "Offset '0x%x' to abbrev table out of section bounds" % offset)
+        if offset not in self._abbrevtable_cache:
+            self._abbrevtable_cache[offset] = AbbrevTable(
+                structs=self.structs,
+                stream=self.stream)
+        
+        return self._abbrevtable_cache[offset]
     
     def _parse_CUs(self):
         """ Parse CU entries from debug_info.
@@ -58,7 +93,7 @@ class DWARFInfo(object):
             # states that the first 32-bit word of the CU header determines 
             # whether the CU is represented with 32-bit or 64-bit DWARF format.
             # 
-            # So we peek at the first byte in the CU header to determine its
+            # So we peek at the first word in the CU header to determine its
             # dwarf format. Based on it, we then create a new DWARFStructs
             # instance suitable for this CU and use it to parse the rest.
             #
@@ -71,7 +106,7 @@ class DWARFInfo(object):
                 dwarf_format=self.dwarf_format)
             
             cu_header = struct_parse(
-                self.structs.Dwarf_CU_header, self.stream, offset)
+                cu_structs.Dwarf_CU_header, self.stream, offset)
             dwarf_assert(
                 self._is_supported_version(cu_header['version']),
                 "Expected supported DWARF version. Got '%s'" % cu_header['version'])
diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py
new file mode 100644 (file)
index 0000000..8655e48
--- /dev/null
@@ -0,0 +1,226 @@
+#-------------------------------------------------------------------------------
+# elftools: dwarf/enums.py
+#
+# Mappings of enum names to values
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+from ..construct import Pass
+
+
+ENUM_DW_TAG = dict(
+    DW_TAG_null                     = 0x00,
+    DW_TAG_array_type               = 0x01,
+    DW_TAG_class_type               = 0x02,
+    DW_TAG_entry_point              = 0x03,
+    DW_TAG_enumeration_type         = 0x04,
+    DW_TAG_formal_parameter         = 0x05,
+    DW_TAG_imported_declaration     = 0x08,
+    DW_TAG_label                    = 0x0a,
+    DW_TAG_lexical_block            = 0x0b,
+    DW_TAG_member                   = 0x0d,
+    DW_TAG_pointer_type             = 0x0f,
+    DW_TAG_reference_type           = 0x10,
+    DW_TAG_compile_unit             = 0x11,
+    DW_TAG_string_type              = 0x12,
+    DW_TAG_structure_type           = 0x13,
+    DW_TAG_subroutine_type          = 0x15,
+    DW_TAG_typedef                  = 0x16,
+    DW_TAG_union_type               = 0x17,
+    DW_TAG_unspecified_parameters   = 0x18,
+    DW_TAG_variant                  = 0x19,
+    DW_TAG_common_block             = 0x1a,
+    DW_TAG_common_inclusion         = 0x1b,
+    DW_TAG_inheritance              = 0x1c,
+    DW_TAG_inlined_subroutine       = 0x1d,
+    DW_TAG_module                   = 0x1e,
+    DW_TAG_ptr_to_member_type       = 0x1f,
+    DW_TAG_set_type                 = 0x20,
+    DW_TAG_subrange_type            = 0x21,
+    DW_TAG_with_stmt                = 0x22,
+    DW_TAG_access_declaration       = 0x23,
+    DW_TAG_base_type                = 0x24,
+    DW_TAG_catch_block              = 0x25,
+    DW_TAG_const_type               = 0x26,
+    DW_TAG_constant                 = 0x27,
+    DW_TAG_enumerator               = 0x28,
+    DW_TAG_file_type                = 0x29,
+    DW_TAG_friend                   = 0x2a,
+    DW_TAG_namelist                 = 0x2b,
+    DW_TAG_namelist_item            = 0x2c,
+    DW_TAG_namelist_items           = 0x2c,
+    DW_TAG_packed_type              = 0x2d,
+    DW_TAG_subprogram               = 0x2e,
+    DW_TAG_template_type_parameter  = 0x2f,
+    DW_TAG_template_type_param      = 0x2f,
+    DW_TAG_template_value_parameter = 0x30,
+    DW_TAG_template_value_param     = 0x30,
+    DW_TAG_thrown_type              = 0x31,
+    DW_TAG_try_block                = 0x32,
+    DW_TAG_variant_part             = 0x33,
+    DW_TAG_variable                 = 0x34,
+    DW_TAG_volatile_type            = 0x35,
+    DW_TAG_dwarf_procedure          = 0x36,
+    DW_TAG_restrict_type            = 0x37,
+    DW_TAG_interface_type           = 0x38,
+    DW_TAG_namespace                = 0x39,
+    DW_TAG_imported_module          = 0x3a,
+    DW_TAG_unspecified_type         = 0x3b,
+    DW_TAG_partial_unit             = 0x3c,
+    DW_TAG_imported_unit            = 0x3d,
+    DW_TAG_mutable_type             = 0x3e,
+    DW_TAG_condition                = 0x3f,
+    DW_TAG_shared_type              = 0x40,
+    DW_TAG_type_unit                = 0x41,
+    DW_TAG_rvalue_reference_type    = 0x42,
+
+    DW_TAG_lo_user                  = 0x4080,
+    DW_TAG_hi_user                  = 0xffff,
+
+    _default_                       = Pass,
+)
+
+
+ENUM_DW_CHILDREN = dict(
+    DW_CHILDREN_no  = 0x00,
+    DW_CHILDREN_yes = 0x01,
+)
+
+
+ENUM_DW_AT = dict(
+    DW_AT_null                  = 0x00,
+    DW_AT_sibling               = 0x01,
+    DW_AT_location              = 0x02,
+    DW_AT_name                  = 0x03,
+    DW_AT_ordering              = 0x09,
+    DW_AT_subscr_data           = 0x0a,
+    DW_AT_byte_size             = 0x0b,
+    DW_AT_bit_offset            = 0x0c,
+    DW_AT_bit_size              = 0x0d,
+    DW_AT_element_list          = 0x0f,
+    DW_AT_stmt_list             = 0x10,
+    DW_AT_low_pc                = 0x11,
+    DW_AT_high_pc               = 0x12,
+    DW_AT_language              = 0x13,
+    DW_AT_member                = 0x14,
+    DW_AT_discr                 = 0x15,
+    DW_AT_discr_value           = 0x16,
+    DW_AT_visibility            = 0x17,
+    DW_AT_import                = 0x18,
+    DW_AT_string_length         = 0x19,
+    DW_AT_common_reference      = 0x1a,
+    DW_AT_comp_dir              = 0x1b,
+    DW_AT_const_value           = 0x1c,
+    DW_AT_containing_type       = 0x1d,
+    DW_AT_default_value         = 0x1e,
+    DW_AT_inline                = 0x20,
+    DW_AT_is_optional           = 0x21,
+    DW_AT_lower_bound           = 0x22,
+    DW_AT_producer              = 0x25,
+    DW_AT_prototyped            = 0x27,
+    DW_AT_return_addr           = 0x2a,
+    DW_AT_start_scope           = 0x2c,
+    DW_AT_bit_stride            = 0x2e,
+    DW_AT_stride_size           = 0x2e,
+    DW_AT_upper_bound           = 0x2f,
+    DW_AT_abstract_origin       = 0x31,
+    DW_AT_accessibility         = 0x32,
+    DW_AT_address_class         = 0x33,
+    DW_AT_artificial            = 0x34,
+    DW_AT_base_types            = 0x35,
+    DW_AT_calling_convention    = 0x36,
+    DW_AT_count                 = 0x37,
+    DW_AT_data_member_location  = 0x38,
+    DW_AT_decl_column           = 0x39,
+    DW_AT_decl_file             = 0x3a,
+    DW_AT_decl_line             = 0x3b,
+    DW_AT_declaration           = 0x3c,
+    DW_AT_discr_list            = 0x3d,
+    DW_AT_encoding              = 0x3e,
+    DW_AT_external              = 0x3f,
+    DW_AT_frame_base            = 0x40,
+    DW_AT_friend                = 0x41,
+    DW_AT_identifier_case       = 0x42,
+    DW_AT_macro_info            = 0x43,
+    DW_AT_namelist_item         = 0x44,
+    DW_AT_priority              = 0x45,
+    DW_AT_segment               = 0x46,
+    DW_AT_specification         = 0x47,
+    DW_AT_static_link           = 0x48,
+    DW_AT_type                  = 0x49,
+    DW_AT_use_location          = 0x4a,
+    DW_AT_variable_parameter    = 0x4b,
+    DW_AT_virtuality            = 0x4c,
+    DW_AT_vtable_elem_location  = 0x4d,
+    DW_AT_allocated             = 0x4e,
+    DW_AT_associated            = 0x4f,
+    DW_AT_data_location         = 0x50,
+    DW_AT_byte_stride           = 0x51,
+    DW_AT_stride                = 0x51,
+    DW_AT_entry_pc              = 0x52,
+    DW_AT_use_UTF8              = 0x53,
+    DW_AT_extension             = 0x54,
+    DW_AT_ranges                = 0x55,
+    DW_AT_trampoline            = 0x56,
+    DW_AT_call_column           = 0x57,
+    DW_AT_call_file             = 0x58,
+    DW_AT_call_line             = 0x59,
+    DW_AT_description           = 0x5a,
+    DW_AT_binary_scale          = 0x5b,
+    DW_AT_decimal_scale         = 0x5c,
+    DW_AT_small                 = 0x5d,
+    DW_AT_decimal_sign          = 0x5e,
+    DW_AT_digit_count           = 0x5f,
+    DW_AT_picture_string        = 0x60,
+    DW_AT_mutable               = 0x61,
+    DW_AT_threads_scaled        = 0x62,
+    DW_AT_explicit              = 0x63,
+    DW_AT_object_pointer        = 0x64,
+    DW_AT_endianity             = 0x65,
+    DW_AT_elemental             = 0x66,
+    DW_AT_pure                  = 0x67,
+    DW_AT_recursive             = 0x68,
+    DW_AT_signature             = 0x69,
+    DW_AT_main_subprogram       = 0x6a,
+    DW_AT_data_bit_offset       = 0x6b,
+    DW_AT_const_expr            = 0x6c,
+
+    DW_AT_lo_user               = 0x2000,
+    DW_AT_hi_user               = 0x3fff,
+
+    _default_                   = Pass,
+)
+
+
+ENUM_DW_FORM = dict(
+    DW_FORM_null            = 0x00,
+    DW_FORM_addr            = 0x01,
+    DW_FORM_block2          = 0x03,
+    DW_FORM_block4          = 0x04,
+    DW_FORM_data2           = 0x05,
+    DW_FORM_data4           = 0x06,
+    DW_FORM_data8           = 0x07,
+    DW_FORM_string          = 0x08,
+    DW_FORM_block           = 0x09,
+    DW_FORM_block1          = 0x0a,
+    DW_FORM_data1           = 0x0b,
+    DW_FORM_flag            = 0x0c,
+    DW_FORM_sdata           = 0x0d,
+    DW_FORM_strp            = 0x0e,
+    DW_FORM_udata           = 0x0f,
+    DW_FORM_ref_addr        = 0x10,
+    DW_FORM_ref1            = 0x11,
+    DW_FORM_ref2            = 0x12,
+    DW_FORM_ref4            = 0x13,
+    DW_FORM_ref8            = 0x14,
+    DW_FORM_ref_udata       = 0x15,
+    DW_FORM_indirect        = 0x16,
+    DW_FORM_sec_offset      = 0x17,
+    DW_FORM_exprloc         = 0x18,
+    DW_FORM_flag_present    = 0x19,
+    DW_FORM_ref_sig8        = 0x20,
+
+    _default_               = Pass,
+)
+
index 7001b7a7de81676a29f8ba3f80226e6ef0a1e6fc..ab272693fbca5063d9d355b6cb0c866558992dce 100644 (file)
@@ -13,6 +13,8 @@ from ..construct import (
     Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename,
     )
 
+from .enums import *
+
 
 class DWARFStructs(object):
     """ Exposes Construct structs suitable for parsing information from DWARF 
@@ -37,6 +39,10 @@ class DWARFStructs(object):
             Dwarf_CU_header:
                 Compilation unit header
         
+            Dwarf_abbrev_entry:
+                Abbreviation table entry - doesn't include the initial code,
+                only the contents.
+        
         See also the documentation of public methods.
     """
     def __init__(self, little_endian=True, dwarf_format=32):
@@ -67,6 +73,7 @@ class DWARFStructs(object):
         self._create_initial_length()
         self._create_leb128()
         self._create_cu_header()
+        self._create_abbrev_entry()
 
     def _create_initial_length(self):
         def _InitialLength(name):
@@ -92,6 +99,15 @@ class DWARFStructs(object):
             self.Dwarf_uint16('version'),
             self.Dwarf_offset('debug_abbrev_offset'),
             self.Dwarf_uint8('address_size'))
+    
+    def _create_abbrev_entry(self):
+        self.Dwarf_abbrev_entry = Struct('Dwarf_abbrev_entry',
+            self.Dwarf_uleb128('tag'),                  # ZZZ: wrap in enums
+            self.Dwarf_uint8('children_flag'),
+            RepeatUntil(lambda obj, ctx: obj.name == obj.form == 0,
+                Struct('spec',
+                    self.Dwarf_uleb128('name'),
+                    self.Dwarf_uleb128('form'))))
 
 
 class _InitialLengthAdapter(Adapter):