From: Ronan Dunklau Date: Mon, 8 Aug 2022 14:50:55 +0000 (+0200) Subject: Supplementary object files (#426) X-Git-Tag: v0.29~8 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e972a570d18f32210fa9d4bcfc615e244ad54555;p=pyelftools.git Supplementary object files (#426) * Recognize DW_FORM_ref_udata as a reference type. References to other DIEs can also be implemented with a form DW_FORM_ref_udata, for using the ULEB128 encoding * Add support for DWARF supplementary object files. --- diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py index e3980b2..46bbfb3 100644 --- a/elftools/common/py3compat.py +++ b/elftools/common/py3compat.py @@ -13,6 +13,7 @@ PY3 = sys.version_info[0] == 3 if PY3: import io from pathlib import Path + StringIO = io.StringIO BytesIO = io.BytesIO diff --git a/elftools/dwarf/compileunit.py b/elftools/dwarf/compileunit.py index f1884e2..704dec3 100644 --- a/elftools/dwarf/compileunit.py +++ b/elftools/dwarf/compileunit.py @@ -165,7 +165,7 @@ class CompileUnit(object): sibling = child.attributes["DW_AT_sibling"] if sibling.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', 'DW_FORM_ref8', - 'DW_FORM_ref'): + 'DW_FORM_ref', 'DW_FORM_ref_udata'): cur_offset = sibling.value + self.cu_offset elif sibling.form == 'DW_FORM_ref_addr': cur_offset = sibling.value @@ -198,10 +198,14 @@ class CompileUnit(object): """ Given a DIE, this yields it with its subtree including null DIEs (child list terminators). """ + # If the die is an imported unit, replace it with what it refers to if + # we can + if die.tag == 'DW_TAG_imported_unit' and self.dwarfinfo.supplementary_dwarfinfo: + die = die.get_DIE_from_attribute('DW_AT_import') yield die if die.has_children: for c in die.iter_children(): - for d in self._iter_DIE_subtree(c): + for d in die.cu._iter_DIE_subtree(c): yield d yield die._terminator diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 76f2096..b26f8a1 100755 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -108,7 +108,7 @@ class DIE(object): """ attr = self.attributes[name] if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', - 'DW_FORM_ref8', 'DW_FORM_ref'): + 'DW_FORM_ref8', 'DW_FORM_ref', 'DW_FORM_ref_udata'): refaddr = self.cu.cu_offset + attr.raw_value return self.cu.get_DIE_from_refaddr(refaddr) elif attr.form in ('DW_FORM_ref_addr'): @@ -116,7 +116,10 @@ class DIE(object): elif attr.form in ('DW_FORM_ref_sig8'): # Implement search type units for matching signature raise NotImplementedError('%s (type unit by signature)' % attr.form) - elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8'): + elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8', 'DW_FORM_GNU_ref_alt'): + if self.dwarfinfo.supplementary_dwarfinfo: + return self.dwarfinfo.supplementary_dwarfinfo.get_DIE_from_refaddr(attr.raw_value) + # FIXME: how to distinguish supplementary files from dwo ? raise NotImplementedError('%s to dwo' % attr.form) else: raise DWARFError('%s is not a reference class form attribute' % attr) @@ -275,6 +278,11 @@ class DIE(object): elif form == 'DW_FORM_line_strp': with preserve_stream_pos(self.stream): value = self.dwarfinfo.get_string_from_linetable(raw_value) + elif form in ('DW_FORM_GNU_strp_alt', 'DW_FORM_strp_sup'): + if self.dwarfinfo.supplementary_dwarfinfo: + return self.dwarfinfo.supplementary_dwarfinfo.get_string_from_table(raw_value) + else: + value = raw_value elif form == 'DW_FORM_flag': value = not raw_value == 0 elif form == 'DW_FORM_flag_present': diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 22cd0eb..4edc7cd 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -79,7 +79,10 @@ class DWARFInfo(object): debug_str_offsets_sec, debug_line_str_sec, debug_loclists_sec, - debug_rnglists_sec): # Not parsed for now + debug_rnglists_sec, + debug_sup_sec, + gnu_debugaltlink_sec + ): """ config: A DwarfConfig object @@ -105,6 +108,13 @@ class DWARFInfo(object): self.debug_pubnames_sec = debug_pubnames_sec self.debug_loclists_sec = debug_loclists_sec self.debug_rnglists_sec = debug_rnglists_sec + self.debug_sup_sec = debug_sup_sec + self.gnu_debugaltlink_sec = gnu_debugaltlink_sec + + # Sets the supplementary_dwarfinfo to None. Client code can set this + # to something else, typically a DWARFInfo file read from an ELFFile + # which path is stored in the debug_sup_sec or gnu_debugaltlink_sec. + self.supplementary_dwarfinfo = None # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them @@ -507,6 +517,11 @@ class DWARFInfo(object): replace_value(data, field.content_type, self.get_string_from_linetable) elif field.form == 'DW_FORM_strp': replace_value(data, field.content_type, self.get_string_from_table) + elif field.form in ('DW_FORM_strp_sup', 'DW_FORM_GNU_strp_alt'): + if self.supplementary_dwarfinfo: + replace_value(data, field.content_type, self.supplementary_dwarfinfo.get_string_fromtable) + else: + replace_value(data, field.content_type, lambda x: str(x)) elif field.form in ('DW_FORM_strp_sup', 'DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4'): raise NotImplementedError() @@ -536,3 +551,18 @@ class DWARFInfo(object): program_start_offset=self.debug_line_sec.stream.tell(), program_end_offset=end_offset) + def parse_debugsupinfo(self): + """ + Extract a filename from either .debug_sup or .gnu_debualtlink sections. + """ + if self.debug_sup_sec is not None: + self.debug_sup_sec.stream.seek(0) + suplink = self.structs.Dwarf_debugsup.parse_stream(self.debug_sup_sec.stream) + if suplink.is_supplementary == 0: + return suplink.sup_filename + if self.gnu_debugaltlink_sec is not None: + self.gnu_debugaltlink_sec.stream.seek(0) + suplink = self.structs.Dwarf_debugaltlink.parse_stream(self.gnu_debugaltlink_sec.stream) + return suplink.sup_filename + return None + diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 798f690..1a5ae45 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -13,7 +13,7 @@ from ..construct import ( SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray, CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence, - Switch, Value + String, Switch, Value ) from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128, StreamOffset) @@ -146,6 +146,9 @@ class DWARFStructs(object): self._create_loclists_parsers() self._create_rnglists_parsers() + self._create_debugsup() + self._create_gnu_debugaltlink() + def _create_initial_length(self): def _InitialLength(name): # Adapts a Struct that parses forward a full initial length field. @@ -191,6 +194,18 @@ class DWARFStructs(object): If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const', self.Dwarf_sleb128('value'))))) + def _create_debugsup(self): + # We don't care about checksums, for now. + self.Dwarf_debugsup = Struct('Elf_debugsup', + self.Dwarf_int16('version'), + self.Dwarf_uint8('is_supplementary'), + CString('sup_filename')) + + def _create_gnu_debugaltlink(self): + self.Dwarf_debugaltlink = Struct('Elf_debugaltlink', + CString("sup_filename"), + String("sup_checksum", length=20)) + def _create_dw_form(self): self.Dwarf_dw_form = dict( DW_FORM_addr=self.Dwarf_target_addr(''), @@ -215,6 +230,7 @@ class DWARFStructs(object): DW_FORM_string=CString(''), DW_FORM_strp=self.Dwarf_offset(''), + DW_FORM_strp_sup=self.Dwarf_offset(''), DW_FORM_line_strp=self.Dwarf_offset(''), DW_FORM_strx1=self.Dwarf_uint8(''), DW_FORM_strx2=self.Dwarf_uint16(''), @@ -226,7 +242,9 @@ class DWARFStructs(object): DW_FORM_ref1=self.Dwarf_uint8(''), DW_FORM_ref2=self.Dwarf_uint16(''), DW_FORM_ref4=self.Dwarf_uint32(''), + DW_FORM_ref_sup4=self.Dwarf_uint32(''), DW_FORM_ref8=self.Dwarf_uint64(''), + DW_FORM_ref_sup8=self.Dwarf_uint64(''), DW_FORM_ref_udata=self.Dwarf_uleb128(''), DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''), diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index bdda624..e4ee5e9 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -7,6 +7,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- import io +import os import struct import zlib @@ -46,6 +47,12 @@ class ELFFile(object): """ Creation: the constructor accepts a stream (file-like object) with the contents of an ELF file. + Optionally, a stream_loader function can be passed as the second + argument. This stream_loader function takes a relative file path to + load a supplementary object file, and returns a stream suitable for + creating a new ELFFile. Currently, the only such relative file path is + obtained from the supplementary object files. + Accessible attributes: stream: @@ -69,7 +76,7 @@ class ELFFile(object): e_ident_raw: the raw e_ident field of the header """ - def __init__(self, stream): + def __init__(self, stream, stream_loader=None): self.stream = stream self._identify_file() self.structs = ELFStructs( @@ -88,6 +95,23 @@ class ELFFile(object): self._section_header_stringtable = \ self._get_section_header_stringtable() self._section_name_map = None + self.stream_loader = stream_loader + + @classmethod + def load_from_path(cls, path): + """Takes a path to a file on the local filesystem, and returns an + ELFFile from it, setting up a correct stream_loader relative to the + original file. + """ + base_directory = os.path.dirname(path) + def loader(elf_path): + # FIXME: use actual path instead of str/bytes + if not os.path.isabs(elf_path): + elf_path = os.path.join(base_directory, + elf_path) + return open(elf_path, 'rb') + stream = open(path, 'rb') + return ELFFile(stream, loader) def num_sections(self): """ Number of sections in the file @@ -203,12 +227,15 @@ class ELFFile(object): self.get_section_by_name('.zdebug_info') or self.get_section_by_name('.eh_frame')) - def get_dwarf_info(self, relocate_dwarf_sections=True): + def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True): """ Return a DWARFInfo object representing the debugging information in this file. If relocate_dwarf_sections is True, relocations for DWARF sections are looked up and applied. + + If follow_links is True, we will try to load the supplementary + object file (if any), and use it to resolve references and imports. """ # Expect that has_dwarf_info was called, so at least .debug_info is # present. @@ -219,8 +246,8 @@ class ELFFile(object): '.debug_loc', '.debug_ranges', '.debug_pubtypes', '.debug_pubnames', '.debug_addr', '.debug_str_offsets', '.debug_line_str', - '.debug_loclists', '.debug_rnglists') - + '.debug_loclists', '.debug_rnglists', + '.debug_sup', '.gnu_debugaltlink') compressed = bool(self.get_section_by_name('.zdebug_info')) if compressed: @@ -234,7 +261,7 @@ class ELFFile(object): debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name, debug_pubnames_name, debug_addr_name, debug_str_offsets_name, debug_line_str_name, debug_loclists_sec_name, debug_rnglists_sec_name, - eh_frame_sec_name) = section_names + debug_sup_name, gnu_debugaltlink_name, eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -249,7 +276,11 @@ class ELFFile(object): dwarf_section = self._decompress_dwarf_section(dwarf_section) debug_sections[secname] = dwarf_section - return DWARFInfo( + # Lookup if we have any of the .gnu_debugaltlink (GNU proprietary + # implementation) or .debug_sup sections, referencing a supplementary + # DWARF file + + dwarfinfo = DWARFInfo( config=DwarfConfig( little_endian=self.little_endian, default_address_size=self.elfclass // 8, @@ -269,8 +300,29 @@ class ELFFile(object): debug_str_offsets_sec=debug_sections[debug_str_offsets_name], debug_line_str_sec=debug_sections[debug_line_str_name], debug_loclists_sec=debug_sections[debug_loclists_sec_name], - debug_rnglists_sec=debug_sections[debug_rnglists_sec_name] + debug_rnglists_sec=debug_sections[debug_rnglists_sec_name], + debug_sup_sec=debug_sections[debug_sup_name], + gnu_debugaltlink_sec=debug_sections[gnu_debugaltlink_name] ) + if follow_links: + dwarfinfo.supplementary_dwarfinfo = self.get_supplementary_dwarfinfo(dwarfinfo) + return dwarfinfo + + + def get_supplementary_dwarfinfo(self, dwarfinfo): + """ + Read supplementary dwarfinfo, from either the standared .debug_sup + section or the GNU proprietary .gnu_debugaltlink. + """ + supfilepath = dwarfinfo.parse_debugsupinfo() + if supfilepath is not None and self.stream_loader is not None: + stream = self.stream_loader(supfilepath) + supelffile = ELFFile(stream) + dwarf_info = supelffile.get_dwarf_info() + stream.close() + return dwarf_info + return None + def has_ehabi_info(self): """ Check whether this file appears to have arm exception handler index table. @@ -765,3 +817,12 @@ class ELFFile(object): ) return section._replace(stream=uncompressed_stream, size=size) + + def close(self): + self.stream.close() + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index b437eec..74ea54d 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -380,6 +380,11 @@ class ELFStructs(object): self.Elf_word('abi_tiny'), ) + def _create_gnu_debugaltlink(self): + self.Elf_debugaltlink = Struct('Elf_debugaltlink', + CString("sup_filename"), + String("sup_checksum", length=20)) + def _create_gnu_property(self): # Structure of GNU property notes is documented in # https://github.com/hjl-tools/linux-abi/wiki/linux-abi-draft.pdf diff --git a/test/test_refaddr_bitness.py b/test/test_refaddr_bitness.py index c92e442..472a595 100644 --- a/test/test_refaddr_bitness.py +++ b/test/test_refaddr_bitness.py @@ -48,7 +48,9 @@ class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase): debug_str_offsets_sec=None, debug_line_str_sec=None, debug_loclists_sec = None, - debug_rnglists_sec = None + debug_rnglists_sec = None, + debug_sup_sec = None, + gnu_debugaltlink_sec = None ) CUs = [cu for cu in di.iter_CUs()] diff --git a/test/test_supplementary_object_files.py b/test/test_supplementary_object_files.py new file mode 100644 index 0000000..ee01a2d --- /dev/null +++ b/test/test_supplementary_object_files.py @@ -0,0 +1,101 @@ +# The test_gnudebugaltlink* and test_debugsup* files have been generated as +# follows: +# $ cat test_sup.c +# int main(int argc, char** argv) +# { +# return argc; +# } +# +# $ gcc test_sup.c -o test_debugsup1 +# $ gcc test_sup.c -o test_debugsup2 +# $ dwz test_debugsup1 test_debugsup2 -m test_debugsup.common --dwarf-5 +# +# $ gcc test_sup.c -o test_gnudebugaltlink1 +# $ gcc test_sup.c -o test_gnudebugaltlink2 +# $ dwz test_gnudebugaltlink1 test_gnudebugaltlink2 -m test_gnudebugaltlink.common + +import unittest +import os + +from elftools.elf.elffile import ELFFile + +class TestDWARFSupplementaryObjects(unittest.TestCase): + + def test_gnudebugaltlink_no_followlinks(self): + path = os.path.join('test', 'testfiles_for_unittests', + 'test_gnudebugaltlink1') + with open(path, 'rb') as f: + elffile = ELFFile(f) + # Check that we don't have a supplementary_dwarfinfo + dwarfinfo = elffile.get_dwarf_info(follow_links=False) + self.assertIsNone(dwarfinfo.supplementary_dwarfinfo) + # Check that imported units are present + self.assertTrue(any(die.tag == 'DW_TAG_imported_unit' + for cu in dwarfinfo.iter_CUs() + for die in cu.iter_DIEs())) + # Check that DW_FORM_GNU_strp_alt keep their raw_value. + for cu in dwarfinfo.iter_CUs(): + for die in cu.iter_DIEs(): + attrs = die.attributes + if ('DW_AT_name' in attrs and + attrs['DW_AT_name'].form == 'DW_FORM_GNU_strp_alt'): + self.assertEqual(attrs['DW_AT_name'].value, + attrs['DW_AT_name'].raw_value) + + def test_gnudebugaltlink_followlinks(self): + base_dir = os.path.join(b'test', b'testfiles_for_unittests') + path = os.path.join(base_dir, b'test_gnudebugaltlink1') + with ELFFile.load_from_path(path) as elffile: + # Check that we do have a supplementary_dwarfinfo + dwarfinfo = elffile.get_dwarf_info() + self.assertIsNotNone(dwarfinfo.supplementary_dwarfinfo) + # Check that imported units are replaced by what they refer to. + self.assertTrue(all(die.tag != 'DW_TAG_imported_unit' + for cu in dwarfinfo.iter_CUs() + for die in cu.iter_DIEs())) + # Check that DW_FORM_GNU_strp_alt get a proper reference + for cu in dwarfinfo.iter_CUs(): + for die in cu.iter_DIEs(): + attrs = die.attributes + if ('DW_AT_name' in attrs and attrs['DW_AT_name'].form == + 'DW_FORM_GNU_strp_alt'): + self.assertIsInstance(attrs['DW_AT_name'].value, bytes) + + def test_debugsup_no_followlinks(self): + path = os.path.join('test', 'testfiles_for_unittests', + 'test_debugsup1') + with ELFFile.load_from_path(path) as elffile: + # Check that we don't have a supplementary_dwarfinfo + dwarfinfo = elffile.get_dwarf_info(follow_links=False) + self.assertIsNone(dwarfinfo.supplementary_dwarfinfo) + # Check that imported units are present + self.assertTrue(any(die.tag == 'DW_TAG_imported_unit' + for cu in dwarfinfo.iter_CUs() + for die in cu.iter_DIEs())) + # Check that DW_FORM_GNU_strp_alt keep their raw_value. + for cu in dwarfinfo.iter_CUs(): + for die in cu.iter_DIEs(): + attrs = die.attributes + if ('DW_AT_name' in attrs and + attrs['DW_AT_name'].form == 'DW_FORM_strp_sup'): + self.assertEqual(attrs['DW_AT_name'].value, + attrs['DW_AT_name'].raw_value) + + def test_debugsup_followlinks(self): + base_dir = os.path.join(b'test', b'testfiles_for_unittests') + path = os.path.join(base_dir, b'test_debugsup1') + with ELFFile.load_from_path(path) as elffile: + # Check that we do have a supplementary_dwarfinfo + dwarfinfo = elffile.get_dwarf_info() + self.assertIsNotNone(dwarfinfo.supplementary_dwarfinfo) + # Check that imported units are replaced by what they refer to. + self.assertTrue(all(die.tag != 'DW_TAG_imported_unit' + for cu in dwarfinfo.iter_CUs() + for die in cu.iter_DIEs())) + # Check that DW_FORM_GNU_strp_alt get a proper reference + for cu in dwarfinfo.iter_CUs(): + for die in cu.iter_DIEs(): + attrs = die.attributes + if ('DW_AT_name' in attrs and attrs['DW_AT_name'].form == + 'DW_FORM_strp_sup'): + self.assertIsInstance(attrs['DW_AT_name'].value, bytes) diff --git a/test/testfiles_for_unittests/test_debugsup.common b/test/testfiles_for_unittests/test_debugsup.common new file mode 100644 index 0000000..d9d9c9a Binary files /dev/null and b/test/testfiles_for_unittests/test_debugsup.common differ diff --git a/test/testfiles_for_unittests/test_debugsup1 b/test/testfiles_for_unittests/test_debugsup1 new file mode 100755 index 0000000..bedc082 Binary files /dev/null and b/test/testfiles_for_unittests/test_debugsup1 differ diff --git a/test/testfiles_for_unittests/test_debugsup2 b/test/testfiles_for_unittests/test_debugsup2 new file mode 100755 index 0000000..bedc082 Binary files /dev/null and b/test/testfiles_for_unittests/test_debugsup2 differ diff --git a/test/testfiles_for_unittests/test_gnudebugaltlink.common b/test/testfiles_for_unittests/test_gnudebugaltlink.common new file mode 100644 index 0000000..1bc21c9 Binary files /dev/null and b/test/testfiles_for_unittests/test_gnudebugaltlink.common differ diff --git a/test/testfiles_for_unittests/test_gnudebugaltlink1 b/test/testfiles_for_unittests/test_gnudebugaltlink1 new file mode 100755 index 0000000..0374b5d Binary files /dev/null and b/test/testfiles_for_unittests/test_gnudebugaltlink1 differ diff --git a/test/testfiles_for_unittests/test_gnudebugaltlink2 b/test/testfiles_for_unittests/test_gnudebugaltlink2 new file mode 100755 index 0000000..0374b5d Binary files /dev/null and b/test/testfiles_for_unittests/test_gnudebugaltlink2 differ