From 9d67bf4444598dd6167daccf75734d789521162b Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Sat, 25 May 2013 07:03:27 -0700 Subject: [PATCH] Initial DWARF4 support for Clang-generated DWARF. Based on patch by Stefan Hepp --- CHANGES | 2 ++ elftools/dwarf/descriptions.py | 8 +++++++ elftools/dwarf/dwarfinfo.py | 30 +++++++++++++------------- elftools/dwarf/enums.py | 39 ++++++++++++++++++++++++++++++++-- elftools/dwarf/lineprogram.py | 4 ++-- elftools/dwarf/structs.py | 9 +++++++- test/run_readelf_tests.py | 9 +++++--- 7 files changed, 78 insertions(+), 23 deletions(-) diff --git a/CHANGES b/CHANGES index a67c9b9..fe39abb 100644 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,8 @@ Changelog - Added some initial support for parsing Solaris OpenCSW ELF files (contributed by Yann Rouillard). + - Added some initial support for DWARF4 and DWARF generated by recent + versions of Clang. + Version 0.21 (17.04.2013) diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 987e0d0..3a950a0 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -197,6 +197,9 @@ def _describe_attr_debool(attr, die, section_offset): """ return '1' if attr.value else '0' +def _describe_attr_present(attr, die, section_offset): + return '1' + def _describe_attr_block(attr, die, section_offset): s = '%s byte block: ' % len(attr.value) s += ' '.join('%x' % item for item in attr.value) + ' ' @@ -227,6 +230,11 @@ _ATTR_DESCRIPTION_MAP = defaultdict( DW_FORM_block2=_describe_attr_block, DW_FORM_block4=_describe_attr_block, DW_FORM_block=_describe_attr_block, + DW_FORM_flag_present=_describe_attr_present, + # Not sure how to print them + # DW_FORM_exprloc=_describe_attr_value_passthrough, + # DW_FORM_ref_sig8=_describe_attr_ref, + ) diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 9aa2f52..2c6cc89 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -21,7 +21,7 @@ from .ranges import RangeLists # Describes a debug section -# +# # stream: a stream object containing the data of this section # name: section name in the container file # global_offset: the global offset of the section in its container file @@ -30,7 +30,7 @@ from .ranges import RangeLists # 'name' and 'global_offset' are for descriptional purposes only and # aren't strictly required for the DWARF parsing to work. # -DebugSectionDescriptor = namedtuple('DebugSectionDescriptor', +DebugSectionDescriptor = namedtuple('DebugSectionDescriptor', 'stream name global_offset size') @@ -51,7 +51,7 @@ DwarfConfig = namedtuple('DwarfConfig', class DWARFInfo(object): - """ Acts also as a "context" to other major objects, bridging between + """ Acts also as a "context" to other major objects, bridging between various parts of the debug infromation. """ def __init__(self, @@ -68,7 +68,7 @@ class DWARFInfo(object): debug_*_sec: DebugSectionDescriptor for a section. Pass None for sections - that don't exist. These arguments are best given with + that don't exist. These arguments are best given with keyword syntax. """ self.config = config @@ -80,7 +80,7 @@ class DWARFInfo(object): self.debug_ranges_sec = debug_ranges_sec self.debug_line_sec = debug_line_sec - # This is the DWARFStructs the context uses, so it doesn't depend on + # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them # to default values. self.structs = DWARFStructs( @@ -119,7 +119,7 @@ class DWARFInfo(object): return self._abbrevtable_cache[offset] def get_string_from_table(self, offset): - """ Obtain a string from the string table section, given an offset + """ Obtain a string from the string table section, given an offset relative to the section. """ return parse_cstring_from_stream(self.debug_str_sec.stream, offset) @@ -175,18 +175,18 @@ class DWARFInfo(object): # Compute the offset of the next CU in the section. The unit_length # field of the CU header contains its size not including the length # field itself. - offset = ( offset + - cu['unit_length'] + + offset = ( offset + + cu['unit_length'] + cu.structs.initial_length_field_size()) yield cu - + def _parse_CU_at_offset(self, offset): """ Parse and return a CU at the given offset in the debug_info stream. """ # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3 - # states that the first 32-bit word of the CU header determines + # states that the first 32-bit word of the CU header determines # whether the CU is represented with 32-bit or 64-bit DWARF format. - # + # # So we peek at the first word in the CU header to determine its # dwarf format. Based on it, we then create a new DWARFStructs # instance suitable for this CU and use it to parse the rest. @@ -205,7 +205,7 @@ class DWARFInfo(object): little_endian=self.config.little_endian, dwarf_format=dwarf_format, address_size=4) - + cu_header = struct_parse( cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset) if cu_header['address_size'] == 8: @@ -213,7 +213,7 @@ class DWARFInfo(object): little_endian=self.config.little_endian, dwarf_format=dwarf_format, address_size=8) - + cu_die_offset = self.debug_info_sec.stream.tell() dwarf_assert( self._is_supported_version(cu_header['version']), @@ -224,11 +224,11 @@ class DWARFInfo(object): structs=cu_structs, cu_offset=offset, cu_die_offset=cu_die_offset) - + def _is_supported_version(self, version): """ DWARF version supported by this parser """ - return 2 <= version <= 3 + return 2 <= version <= 4 def _parse_line_program_at_offset(self, debug_line_offset, structs): """ Given an offset to the .debug_line section, parse the line program diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py index 1338725..2b00b2a 100644 --- a/elftools/dwarf/enums.py +++ b/elftools/dwarf/enums.py @@ -185,7 +185,9 @@ ENUM_DW_AT = dict( DW_AT_main_subprogram = 0x6a, DW_AT_data_bit_offset = 0x6b, DW_AT_const_expr = 0x6c, - + DW_AT_enum_class = 0x6d, + DW_AT_linkage_name = 0x6e, + DW_AT_MIPS_fde = 0x2001, DW_AT_MIPS_loop_begin = 0x2002, DW_AT_MIPS_tail_loop_begin = 0x2003, @@ -197,8 +199,37 @@ ENUM_DW_AT = dict( DW_AT_MIPS_abstract_name = 0x2009, DW_AT_MIPS_clone_origin = 0x200a, DW_AT_MIPS_has_inlines = 0x200b, + DW_AT_MIPS_stride_byte = 0x200c, + DW_AT_MIPS_stride_elem = 0x200d, + DW_AT_MIPS_ptr_dopetype = 0x200e, + DW_AT_MIPS_allocatable_dopetype = 0x200f, + DW_AT_MIPS_assumed_shape_dopetype = 0x2010, + DW_AT_MIPS_assumed_size = 0x2011, + + DW_AT_sf_names = 0x2101, + DW_AT_src_info = 0x2102, + DW_AT_mac_info = 0x2103, + DW_AT_src_coords = 0x2104, + DW_AT_body_begin = 0x2105, + DW_AT_body_end = 0x2106, + DW_AT_GNU_vector = 0x2107, + DW_AT_GNU_template_name = 0x2110, + + DW_AT_APPLE_optimized = 0x3fe1, + DW_AT_APPLE_flags = 0x3fe2, + DW_AT_APPLE_isa = 0x3fe3, + DW_AT_APPLE_block = 0x3fe4, + DW_AT_APPLE_major_runtime_vers = 0x3fe5, + DW_AT_APPLE_runtime_class = 0x3fe6, + DW_AT_APPLE_omit_frame_ptr = 0x3fe7, + DW_AT_APPLE_property_name = 0x3fe8, + DW_AT_APPLE_property_getter = 0x3fe9, + DW_AT_APPLE_property_setter = 0x3fea, + DW_AT_APPLE_property_attribute = 0x3feb, + DW_AT_APPLE_objc_complete_type = 0x3fec, + DW_AT_APPLE_property = 0x3fed, - _default_ = Pass, + _default_ = Pass, ) @@ -225,6 +256,10 @@ ENUM_DW_FORM = dict( DW_FORM_ref8 = 0x14, DW_FORM_ref_udata = 0x15, DW_FORM_indirect = 0x16, + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + DW_FORM_ref_sig8 = 0x20, _default_ = Pass, ) diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index ee5193e..810e603 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -215,10 +215,10 @@ class LineProgram(object): add_entry_old_state(opcode, [operand]) elif opcode == DW_LNS_negate_stmt: state.is_stmt = not state.is_stmt - add_entry_old_state(opcode, [operand]) + add_entry_old_state(opcode, []) elif opcode == DW_LNS_set_basic_block: state.basic_block = True - add_entry_old_state(opcode, [operand]) + add_entry_old_state(opcode, []) elif opcode == DW_LNS_const_add_pc: adjusted_opcode = 255 - self['opcode_base'] address_addend = ((adjusted_opcode // self['line_range']) * diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index cfb2515..d76d271 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -11,7 +11,7 @@ from ..construct import ( UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum, - Array, PrefixedArray, CString, Embed, + Array, PrefixedArray, CString, Embed, StaticField ) from ..common.construct_utils import RepeatUntilExcluding @@ -193,6 +193,13 @@ class DWARFStructs(object): DW_FORM_ref_addr=self.Dwarf_offset(''), DW_FORM_indirect=self.Dwarf_uleb128(''), + + DW_FORM_flag_present = StaticField('', 0), + # Needs checkings + #DW_FORM_sec_offset = self.Dwarf_offset(''), + #DW_FORM_exprloc = self.Dwarf_uleb128(''), + #DW_FORM_ref_sig8 = self.Dwarf_offset(''), + ) def _create_lineprog_header(self): diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index f9d07ac..bd15fff 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -90,9 +90,9 @@ def compare_output(s1, s2): Note: this function contains some rather horrible hacks to ignore differences which are not important for the verification of pyelftools. This is due to some intricacies of binutils's readelf which pyelftools - doesn't currently implement, or silly inconsistencies in the output of - readelf, which I was reluctant to replicate. - Read the documentation for more details. + doesn't currently implement, features that binutils doesn't support, + or silly inconsistencies in the output of readelf, which I was reluctant + to replicate. Read the documentation for more details. """ def prepare_lines(s): return [line for line in s.lower().splitlines() if line.strip() != ''] @@ -146,6 +146,9 @@ def compare_output(s1, s2): elif 'os/abi' in lines1[i]: if 'unix - gnu' in lines1[i] and 'unix - linux' in lines2[i]: ok = True + elif ( 'unknown at value' in lines1[i] and + 'dw_at_apple' in lines2[i]): + ok = True else: for s in ('t (tls)', 'l (large)'): if s in lines1[i] or s in lines2[i]: -- 2.30.2