From: William Woodruff Date: Thu, 27 May 2021 13:38:35 +0000 (-0400) Subject: dwarf: initial DWARFv5 support (#363) X-Git-Tag: v0.28~16 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4384ad8928ad0167f8cc543056433080decef9cb;p=pyelftools.git dwarf: initial DWARFv5 support (#363) * dwarf: initial DWARFv5 support * dwarf/structs: use Embed to select header layout * dwarf/structs: DW_FORM_strx family Not sure how best to handle 24-bit values yet. * dwarf/structs: use IfThenElse `If` alone wraps the else in a `Value`. * dwarf/structs: DW_FORM_addrx family handling * dwarf_expr: support DW_OP_addrx Not complete, but gets readelf.py to the end of a single binary. * dwarf/constants: DW_UT_* constants * dwarf/structs: fix some DW_FORMs * elftools, test: plumbing for DWARFv5 sections * dwarf/constants: fix typo * dwarf/structs: re-add a comment that got squashed * dwarf/structs: DWARFv5 table header scaffolding * dwarf/constants: typo * test: add a basic DWARFv5 test --- diff --git a/elftools/dwarf/constants.py b/elftools/dwarf/constants.py index d1a86fc..6542095 100644 --- a/elftools/dwarf/constants.py +++ b/elftools/dwarf/constants.py @@ -207,3 +207,18 @@ DW_CFA_val_offset = 0x14 DW_CFA_val_offset_sf = 0x15 DW_CFA_val_expression = 0x16 DW_CFA_GNU_args_size = 0x2e + + +# Compilation unit types +# +# DWARFv5 introduces the "unit_type" field to each CU header, allowing +# individual CUs to indicate whether they're complete, partial, and so forth. +# See DWARFv5 3.1 ("Unit Entries") and 7.5.1 ("Unit Headers"). +DW_UT_compile = 0x01 +DW_UT_type = 0x02 +DW_UT_partial = 0x03 +DW_UT_skeleton = 0x04 +DW_UT_split_compile = 0x05 +DW_UT_split_type = 0x06 +DW_UT_lo_user = 0x80 +DW_UT_hi_user = 0xff diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index bb85daa..07c6fa1 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -197,6 +197,7 @@ def _init_dispatch_table(structs): return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))] add('DW_OP_addr', parse_op_addr()) + add('DW_OP_addrx', parse_arg_struct(structs.Dwarf_uleb128(''))) add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8(''))) add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8(''))) add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16(''))) diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 103fc49..48c4bb5 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -71,7 +71,9 @@ class DWARFInfo(object): debug_ranges_sec, debug_line_sec, debug_pubtypes_sec, - debug_pubnames_sec): + debug_pubnames_sec, + debug_addr_sec, + debug_str_offsets_sec): """ config: A DwarfConfig object @@ -434,7 +436,7 @@ class DWARFInfo(object): def _is_supported_version(self, version): """ DWARF version supported by this parser """ - return 2 <= version <= 4 + return 2 <= version <= 5 def _parse_line_program_at_offset(self, debug_line_offset, structs): """ Given an offset to the .debug_line section, parse the line program diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 9513053..16f29f6 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -11,7 +11,7 @@ from ..construct import ( UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray, - CString, Embed, StaticField + CString, Embed, StaticField, IfThenElse ) from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128 from .enums import * @@ -89,7 +89,7 @@ class DWARFStructs(object): section 7.5.1) """ assert dwarf_format == 32 or dwarf_format == 64 - assert address_size == 8 or address_size == 4 + assert address_size == 8 or address_size == 4, str(address_size) self.little_endian = little_endian self.dwarf_format = dwarf_format self.address_size = address_size @@ -138,6 +138,8 @@ class DWARFStructs(object): self._create_callframe_entry_headers() self._create_aranges_header() self._create_nameLUT_header() + self._create_string_offsets_table_header() + self._create_address_table_header() def _create_initial_length(self): def _InitialLength(name): @@ -160,8 +162,16 @@ class DWARFStructs(object): self.Dwarf_CU_header = Struct('Dwarf_CU_header', self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), - self.Dwarf_offset('debug_abbrev_offset'), - self.Dwarf_uint8('address_size')) + # DWARFv5 reverses the order of address_size and debug_abbrev_offset. + IfThenElse('', lambda ctx: ctx['version'] >= 5, + Embed(Struct('', + self.Dwarf_uint8('unit_type'), + self.Dwarf_uint8('address_size'), + self.Dwarf_offset('debug_abbrev_offset'))), + Embed(Struct('', + self.Dwarf_offset('debug_abbrev_offset'), + self.Dwarf_uint8('address_size'))), + )) def _create_abbrev_declaration(self): self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry', @@ -177,6 +187,11 @@ class DWARFStructs(object): def _create_dw_form(self): self.Dwarf_dw_form = dict( DW_FORM_addr=self.Dwarf_target_addr(''), + DW_FORM_addrx=self.Dwarf_uleb128(''), + DW_FORM_addrx1=self.Dwarf_uint8(''), + DW_FORM_addrx2=self.Dwarf_uint16(''), + # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO + DW_FORM_addrx4=self.Dwarf_uint32(''), DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8), DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16), @@ -193,6 +208,10 @@ class DWARFStructs(object): DW_FORM_string=CString(''), DW_FORM_strp=self.Dwarf_offset(''), + DW_FORM_strx1=self.Dwarf_uint8(''), + DW_FORM_strx2=self.Dwarf_uint16(''), + # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO + DW_FORM_strx4=self.Dwarf_uint64(''), DW_FORM_flag=self.Dwarf_uint8(''), DW_FORM_ref=self.Dwarf_uint32(''), @@ -233,6 +252,22 @@ class DWARFStructs(object): self.Dwarf_length('debug_info_length') ) + def _create_string_offsets_table_header(self): + self.Dwarf_string_offsets_table_header = Struct( + "Dwarf_string_offets_table_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_uint16('padding'), + ) + + def _create_address_table_header(self): + self.Dwarf_address_table_header = Struct("Dwarf_address_table_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_uint8('address_size'), + self.Dwarf_uint8('segment_selector_size'), + ) + def _create_lineprog_header(self): # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 779ae3b..acddc89 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -212,7 +212,7 @@ class ELFFile(object): section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str', '.debug_line', '.debug_frame', '.debug_loc', '.debug_ranges', '.debug_pubtypes', - '.debug_pubnames') + '.debug_pubnames', '.debug_addr', '.debug_str_offsets') compressed = bool(self.get_section_by_name('.zdebug_info')) if compressed: @@ -224,7 +224,8 @@ class ELFFile(object): (debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name, debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name, debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name, - debug_pubnames_name, eh_frame_sec_name) = section_names + debug_pubnames_name, debug_addr_name, debug_str_offsets_name, + eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -253,8 +254,10 @@ class ELFFile(object): debug_loc_sec=debug_sections[debug_loc_sec_name], debug_ranges_sec=debug_sections[debug_ranges_sec_name], debug_line_sec=debug_sections[debug_line_sec_name], - debug_pubtypes_sec = debug_sections[debug_pubtypes_name], - debug_pubnames_sec = debug_sections[debug_pubnames_name] + debug_pubtypes_sec=debug_sections[debug_pubtypes_name], + debug_pubnames_sec=debug_sections[debug_pubnames_name], + debug_addr_sec=debug_sections[debug_addr_name], + debug_str_offsets_sec=debug_sections[debug_str_offsets_name], ) def has_ehabi_info(self): diff --git a/test/test_dwarf_v5.py b/test/test_dwarf_v5.py new file mode 100644 index 0000000..0468d07 --- /dev/null +++ b/test/test_dwarf_v5.py @@ -0,0 +1,21 @@ +import os +import unittest + +from elftools.elf.elffile import ELFFile + + +class TestDWARFv5(unittest.TestCase): + def test_dwarfv5_parses(self): + dwarfv5_basic = os.path.join('test', 'testfiles_for_unittests', 'dwarfv5_basic.elf') + with open(dwarfv5_basic, 'rb') as f: + elf = ELFFile(f) + # DWARFv5 debugging information is detected. + self.assertTrue(elf.has_dwarf_info()) + + # Fetching DWARFInfo for DWARFv5 doesn't completely explode. + dwarf = elf.get_dwarf_info() + self.assertIsNotNone(dwarf) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_refaddr_bitness.py b/test/test_refaddr_bitness.py index 85fc56d..b9e8874 100644 --- a/test/test_refaddr_bitness.py +++ b/test/test_refaddr_bitness.py @@ -43,7 +43,9 @@ class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase): debug_ranges_sec = None, debug_line_sec = None, debug_pubtypes_sec = None, - debug_pubnames_sec = None + debug_pubnames_sec = None, + debug_addr_sec=None, + debug_str_offsets_sec=None, ) CUs = [cu for cu in di.iter_CUs()] diff --git a/test/testfiles_for_unittests/dwarfv5_basic.elf b/test/testfiles_for_unittests/dwarfv5_basic.elf new file mode 100755 index 0000000..4a9363c Binary files /dev/null and b/test/testfiles_for_unittests/dwarfv5_basic.elf differ