From: eliben Date: Tue, 20 Sep 2011 09:57:01 +0000 (+0300) Subject: Started adding abbrev table stuff X-Git-Tag: v0.10~104 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=748a5955fae45a5751ed363aa015433fe17e9379;p=pyelftools.git Started adding abbrev table stuff --- diff --git a/elftools/dwarf/abbrevtable.py b/elftools/dwarf/abbrevtable.py new file mode 100644 index 0000000..63485bd --- /dev/null +++ b/elftools/dwarf/abbrevtable.py @@ -0,0 +1,16 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/abbrevtable.py +# +# DWARF abbreviation table +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- + + +class AbbrevTable(object): + def __init__(self, structs, stream): + self.structs = structs + self.stream = stream + + diff --git a/elftools/dwarf/compileunit.py b/elftools/dwarf/compileunit.py index a22b761..6eb5afa 100644 --- a/elftools/dwarf/compileunit.py +++ b/elftools/dwarf/compileunit.py @@ -10,9 +10,7 @@ class CompileUnit(object): def __init__(self, dwarfinfo, header, structs): - """ Arguments: - - dwarfinfo: + """ dwarfinfo: The DWARFInfo context object which created this one header: @@ -24,7 +22,6 @@ class CompileUnit(object): self.dwarfinfo = dwarfinfo self.header = header self.structs = structs - self.cu_die = cu_die def __getitem__(self, name): """ Implement dict-like access to header entries diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index af9c7ad..f49458f 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -12,6 +12,7 @@ from ..common.exceptions import DWARFError from ..common.utils import struct_parse, dwarf_assert from .structs import DWARFStructs from .compileunit import CompileUnit +from .abbrevtable import AbbrevTable # Describes a debug section in a stream: offset and size @@ -20,18 +21,26 @@ DebugSectionLocator = namedtuple('DebugSectionLocator', 'offset size') class DWARFInfo(object): - """ Creation: the constructor accepts a stream (file-like object) that - contains debug sections, along with locators (DebugSectionLocator) - of the required sections. In addition, little_endian is a boolean - parameter specifying endianity. + """ Acts also as a "context" to other major objects, bridging between + various parts of the debug infromation. """ - def __init__(self, + def __init__(self, stream, little_endian, debug_info_loc, debug_abbrev_loc, debug_str_loc, debug_line_loc): + """ stream: + A stream (file-like object) that contains debug sections + + little_endian: + Section contents are in little-endian data format + + debug_*_loc: + DebugSectionLocator for this section, specifying where it can + be found in the stream + """ self.stream = stream self.debug_info_loc = debug_info_loc self.debug_abbrev_loc = debug_abbrev_loc @@ -46,6 +55,32 @@ class DWARFInfo(object): # Populate the list with CUs found in debug_info self._CU = self._parse_CUs() + + # Cache for abbrev tables: a dict keyed by offset + self._abbrevtable_cache = {} + + def get_abbrev_table(self, offset): + """ Get an AbbrevTable from the given offset in the debug_abbrev + section. + + The only verification done on the offset is that it's within the + bounds of the section (if not, an exception is raised). + It is the caller's responsibility to make sure the offset actually + points to a valid abbreviation table. + + AbbrevTable objects are cached internally (two calls for the same + offset will return the same object). + """ + section_boundary = self.debug_abbrev_loc.offset + self.debug_abbrev_loc.size + dwarf_assert( + self.debug_abbrev_loc.offset <= offset < section_boundary, + "Offset '0x%x' to abbrev table out of section bounds" % offset) + if offset not in self._abbrevtable_cache: + self._abbrevtable_cache[offset] = AbbrevTable( + structs=self.structs, + stream=self.stream) + + return self._abbrevtable_cache[offset] def _parse_CUs(self): """ Parse CU entries from debug_info. @@ -58,7 +93,7 @@ class DWARFInfo(object): # states that the first 32-bit word of the CU header determines # whether the CU is represented with 32-bit or 64-bit DWARF format. # - # So we peek at the first byte in the CU header to determine its + # So we peek at the first word in the CU header to determine its # dwarf format. Based on it, we then create a new DWARFStructs # instance suitable for this CU and use it to parse the rest. # @@ -71,7 +106,7 @@ class DWARFInfo(object): dwarf_format=self.dwarf_format) cu_header = struct_parse( - self.structs.Dwarf_CU_header, self.stream, offset) + cu_structs.Dwarf_CU_header, self.stream, offset) dwarf_assert( self._is_supported_version(cu_header['version']), "Expected supported DWARF version. Got '%s'" % cu_header['version']) diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py new file mode 100644 index 0000000..8655e48 --- /dev/null +++ b/elftools/dwarf/enums.py @@ -0,0 +1,226 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/enums.py +# +# Mappings of enum names to values +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from ..construct import Pass + + +ENUM_DW_TAG = dict( + DW_TAG_null = 0x00, + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_namelist_items = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, + DW_TAG_template_type_parameter = 0x2f, + DW_TAG_template_type_param = 0x2f, + DW_TAG_template_value_parameter = 0x30, + DW_TAG_template_value_param = 0x30, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + DW_TAG_mutable_type = 0x3e, + DW_TAG_condition = 0x3f, + DW_TAG_shared_type = 0x40, + DW_TAG_type_unit = 0x41, + DW_TAG_rvalue_reference_type = 0x42, + + DW_TAG_lo_user = 0x4080, + DW_TAG_hi_user = 0xffff, + + _default_ = Pass, +) + + +ENUM_DW_CHILDREN = dict( + DW_CHILDREN_no = 0x00, + DW_CHILDREN_yes = 0x01, +) + + +ENUM_DW_AT = dict( + DW_AT_null = 0x00, + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_bit_stride = 0x2e, + DW_AT_stride_size = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_item = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_byte_stride = 0x51, + DW_AT_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + DW_AT_description = 0x5a, + DW_AT_binary_scale = 0x5b, + DW_AT_decimal_scale = 0x5c, + DW_AT_small = 0x5d, + DW_AT_decimal_sign = 0x5e, + DW_AT_digit_count = 0x5f, + DW_AT_picture_string = 0x60, + DW_AT_mutable = 0x61, + DW_AT_threads_scaled = 0x62, + DW_AT_explicit = 0x63, + DW_AT_object_pointer = 0x64, + DW_AT_endianity = 0x65, + DW_AT_elemental = 0x66, + DW_AT_pure = 0x67, + DW_AT_recursive = 0x68, + DW_AT_signature = 0x69, + DW_AT_main_subprogram = 0x6a, + DW_AT_data_bit_offset = 0x6b, + DW_AT_const_expr = 0x6c, + + DW_AT_lo_user = 0x2000, + DW_AT_hi_user = 0x3fff, + + _default_ = Pass, +) + + +ENUM_DW_FORM = dict( + DW_FORM_null = 0x00, + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16, + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + DW_FORM_ref_sig8 = 0x20, + + _default_ = Pass, +) + diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 7001b7a..ab27269 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -13,6 +13,8 @@ from ..construct import ( Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, ) +from .enums import * + class DWARFStructs(object): """ Exposes Construct structs suitable for parsing information from DWARF @@ -37,6 +39,10 @@ class DWARFStructs(object): Dwarf_CU_header: Compilation unit header + Dwarf_abbrev_entry: + Abbreviation table entry - doesn't include the initial code, + only the contents. + See also the documentation of public methods. """ def __init__(self, little_endian=True, dwarf_format=32): @@ -67,6 +73,7 @@ class DWARFStructs(object): self._create_initial_length() self._create_leb128() self._create_cu_header() + self._create_abbrev_entry() def _create_initial_length(self): def _InitialLength(name): @@ -92,6 +99,15 @@ class DWARFStructs(object): self.Dwarf_uint16('version'), self.Dwarf_offset('debug_abbrev_offset'), self.Dwarf_uint8('address_size')) + + def _create_abbrev_entry(self): + self.Dwarf_abbrev_entry = Struct('Dwarf_abbrev_entry', + self.Dwarf_uleb128('tag'), # ZZZ: wrap in enums + self.Dwarf_uint8('children_flag'), + RepeatUntil(lambda obj, ctx: obj.name == obj.form == 0, + Struct('spec', + self.Dwarf_uleb128('name'), + self.Dwarf_uleb128('form')))) class _InitialLengthAdapter(Adapter):