From: Audrey Dutcher Date: Sat, 7 Mar 2020 13:39:13 +0000 (+0700) Subject: Add resilience for degenerate cases present in files with only debug information... X-Git-Tag: v0.27~59 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a3f1b7ca4423621f540a2581289ce57cb4bf3f72;p=pyelftools.git Add resilience for degenerate cases present in files with only debug information (#287) Some ELF files which contain only debug symbols have important sections present in the section table but marked as NOBITS instead of PROGBITS. Attempting to extract the segments can lead to crashes through parsing invalid data. The first patch modifies the dynamic segment/section specifically to add a flag for this case, since it seems to assume that there will always be at least one entry, DT_NULL. The second patch modifies the segment code more generally to return a dummy answer for what data it holds. The actual way that this change prevents a crash is while trying to parse .eh_frame when it is in fact NOBITS - originally I had a more targeted patch, but decided that it was important enough to do more generally --- diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index fcdec4e..5a7c99c 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -73,13 +73,22 @@ class DynamicTag(object): class Dynamic(object): """ Shared functionality between dynamic sections and segments. """ - def __init__(self, stream, elffile, stringtable, position): + def __init__(self, stream, elffile, stringtable, position, empty): + """ + :param stream: The file-like object from which to load data + :param elffile: The parent elffile object + :param stringtable: A stringtable reference to use for parsing string references in entries + :param position: The file offset of the dynamic segment/section + :param empty: Whether this is a degenerate case with zero entries. Normally, every dynamic table + will have at least one entry, the DT_NULL terminator. + """ self.elffile = elffile self.elfstructs = elffile.structs self._stream = stream - self._num_tags = -1 + self._num_tags = -1 if not empty else 0 self._offset = position self._tagsize = self.elfstructs.Elf_Dyn.sizeof() + self._empty = empty # Do not access this directly yourself; use _get_stringtable() instead. self._stringtable = stringtable @@ -125,6 +134,8 @@ class Dynamic(object): def _iter_tags(self, type=None): """ Yield all raw tags (limit to |type| if specified) """ + if self._empty: + return for n in itertools.count(): tag = self._get_tag(n) if type is None or tag['d_tag'] == type: @@ -141,6 +152,8 @@ class Dynamic(object): def _get_tag(self, n): """ Get the raw tag at index #n from the file """ + if self._num_tags != -1 and n >= self._num_tags: + raise IndexError(n) offset = self._offset + n * self._tagsize return struct_parse( self.elfstructs.Elf_Dyn, @@ -153,7 +166,7 @@ class Dynamic(object): return DynamicTag(self._get_tag(n), self._get_stringtable()) def num_tags(self): - """ Number of dynamic tags in the file + """ Number of dynamic tags in the file, including the DT_NULL tag """ if self._num_tags != -1: return self._num_tags @@ -207,7 +220,7 @@ class DynamicSection(Section, Dynamic): Section.__init__(self, header, name, elffile) stringtable = elffile.get_section(header['sh_link']) Dynamic.__init__(self, self.stream, self.elffile, stringtable, - self['sh_offset']) + self['sh_offset'], self['sh_type'] == 'SHT_NOBITS') class DynamicSegment(Segment, Dynamic): @@ -227,7 +240,8 @@ class DynamicSegment(Segment, Dynamic): stringtable = elffile.get_section(section['sh_link']) break Segment.__init__(self, header, stream) - Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset']) + Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'], + self['p_filesz'] == 0) self._symbol_list = None self._symbol_name_map = None diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index 9d16b24..f60e3b3 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -74,6 +74,10 @@ class Section(object): Note that data is decompressed if the stored section data is compressed. """ + # If this section is NOBITS, there is no data. provide a dummy answer + if self.header['sh_type'] == 'SHT_NOBITS': + return b'\0'*self.data_size + # If this section is compressed, deflate it if self.compressed: c_type = self._compression_type @@ -137,7 +141,7 @@ class StringTableSection(Section): """ table_offset = self['sh_offset'] s = parse_cstring_from_stream(self.stream, table_offset + offset) - return s.decode('utf-8') if s else '' + return s.decode('utf-8', errors='replace') if s else '' class SymbolTableSection(Section): @@ -267,7 +271,7 @@ class StabSection(Section): while offset < end: stabs = struct_parse( self.structs.Elf_Stabs, - self.elffile.stream, + self.stream, stream_pos=offset) stabs['n_offset'] = offset offset += self.structs.Elf_Stabs.sizeof() diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index 789656d..67ff1f5 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -43,6 +43,17 @@ class ELFStructs(object): assert elfclass == 32 or elfclass == 64 self.little_endian = little_endian self.elfclass = elfclass + self.e_type = None + self.e_machine = None + self.e_ident_osabi = None + + def __getstate__(self): + return self.little_endian, self.elfclass, self.e_type, self.e_machine, self.e_ident_osabi + + def __setstate__(self, state): + self.little_endian, self.elfclass, e_type, e_machine, e_osabi = state + self.create_basic_structs() + self.create_advanced_structs(e_type, e_machine, e_osabi) def create_basic_structs(self): """ Create word-size related structs and ehdr struct needed for @@ -76,12 +87,16 @@ class ELFStructs(object): """ Create all ELF structs except the ehdr. They may possibly depend on provided e_type and/or e_machine parsed from ehdr. """ - self._create_phdr(e_machine) - self._create_shdr(e_machine) + self.e_type = e_type + self.e_machine = e_machine + self.e_ident_osabi = e_ident_osabi + + self._create_phdr() + self._create_shdr() self._create_chdr() self._create_sym() self._create_rel() - self._create_dyn(e_machine, e_ident_osabi) + self._create_dyn() self._create_sunw_syminfo() self._create_gnu_verneed() self._create_gnu_verdef() @@ -127,13 +142,13 @@ class ELFStructs(object): def _create_ntbs(self): self.Elf_ntbs = CString - def _create_phdr(self, e_machine=None): + def _create_phdr(self): p_type_dict = ENUM_P_TYPE_BASE - if e_machine == 'EM_ARM': + if self.e_machine == 'EM_ARM': p_type_dict = ENUM_P_TYPE_ARM - elif e_machine == 'EM_AARCH64': + elif self.e_machine == 'EM_AARCH64': p_type_dict = ENUM_P_TYPE_AARCH64 - elif e_machine == 'EM_MIPS': + elif self.e_machine == 'EM_MIPS': p_type_dict = ENUM_P_TYPE_MIPS if self.elfclass == 32: @@ -159,17 +174,17 @@ class ELFStructs(object): self.Elf_xword('p_align'), ) - def _create_shdr(self, e_machine=None): + def _create_shdr(self): """Section header parsing. Depends on e_machine because of machine-specific values in sh_type. """ sh_type_dict = ENUM_SH_TYPE_BASE - if e_machine == 'EM_ARM': + if self.e_machine == 'EM_ARM': sh_type_dict = ENUM_SH_TYPE_ARM - elif e_machine == 'EM_X86_64': + elif self.e_machine == 'EM_X86_64': sh_type_dict = ENUM_SH_TYPE_AMD64 - elif e_machine == 'EM_MIPS': + elif self.e_machine == 'EM_MIPS': sh_type_dict = ENUM_SH_TYPE_MIPS self.Elf_Shdr = Struct('Elf_Shdr', @@ -227,11 +242,11 @@ class ELFStructs(object): self.Elf_sxword('r_addend'), ) - def _create_dyn(self, e_machine=None, e_ident_osabi=None): + def _create_dyn(self): d_tag_dict = dict(ENUM_D_TAG_COMMON) - if e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE: - d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[e_machine]) - elif e_ident_osabi == 'ELFOSABI_SOLARIS': + if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE: + d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine]) + elif self.e_ident_osabi == 'ELFOSABI_SOLARIS': d_tag_dict.update(ENUM_D_TAG_SOLARIS) self.Elf_Dyn = Struct('Elf_Dyn', diff --git a/test/test_dbgfile.py b/test/test_dbgfile.py new file mode 100644 index 0000000..bd5f06e --- /dev/null +++ b/test/test_dbgfile.py @@ -0,0 +1,54 @@ +""" +Test that elftools does not fail to load debug symbol ELF files +""" +import unittest +import os + +from elftools.elf.elffile import ELFFile, DynamicSection +from elftools.dwarf.callframe import ZERO + +class TestDBGFile(unittest.TestCase): + def test_dynamic_segment(self): + """ + Test that the degenerate case for the dynamic segment does not crash + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'debug_info.elf'), 'rb') as f: + elf = ELFFile(f) + + seen_dynamic_segment = False + for segment in elf.iter_segments(): + if segment.header.p_type != 'PT_DYNAMIC': + continue + + self.assertEqual(segment.num_tags(), 0, "The dynamic segment in this file should be empty") + seen_dynamic_segment = True + break + + self.assertTrue(seen_dynamic_segment, "There should be a dynamic segment in this file") + + def test_dynamic_section(self): + """ + Test that the degenerate case for the dynamic section does not crash + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'debug_info.elf'), 'rb') as f: + elf = ELFFile(f) + section = DynamicSection(elf.get_section_by_name('.dynamic').header, '.dynamic', elf) + + self.assertEqual(section.num_tags(), 0, "The dynamic section in this file should be empty") + + def test_eh_frame(self): + """ + Test that parsing .eh_frame with SHT_NOBITS does not crash + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'debug_info.elf'), 'rb') as f: + elf = ELFFile(f) + dwarf = elf.get_dwarf_info() + eh_frame = list(dwarf.EH_CFI_entries()) + self.assertEqual(len(eh_frame), 1, "There should only be the ZERO entry in eh_frame") + self.assertIs(type(eh_frame[0]), ZERO, "The only eh_frame entry should be the terminator") + +if __name__ == '__main__': + unittest.main() diff --git a/test/testfiles_for_unittests/debug_info.elf b/test/testfiles_for_unittests/debug_info.elf new file mode 100644 index 0000000..502b920 Binary files /dev/null and b/test/testfiles_for_unittests/debug_info.elf differ