Add resilience for degenerate cases present in files with only debug information...
authorAudrey Dutcher <audrey@rhelmot.io>
Sat, 7 Mar 2020 13:39:13 +0000 (20:39 +0700)
committerGitHub <noreply@github.com>
Sat, 7 Mar 2020 13:39:13 +0000 (05:39 -0800)
Some ELF files which contain only debug symbols have important sections present in the section table but marked as NOBITS instead of PROGBITS. Attempting to extract the segments can lead to crashes through parsing invalid data.

The first patch modifies the dynamic segment/section specifically to add a flag for this case, since it seems to assume that there will always be at least one entry, DT_NULL.

The second patch modifies the segment code more generally to return a dummy answer for what data it holds. The actual way that this change prevents a crash is while trying to parse .eh_frame when it is in fact NOBITS - originally I had a more targeted patch, but decided that it was important enough to do more generally

elftools/elf/dynamic.py
elftools/elf/sections.py
elftools/elf/structs.py
test/test_dbgfile.py [new file with mode: 0644]
test/testfiles_for_unittests/debug_info.elf [new file with mode: 0644]

index fcdec4e6639e78731657bd13a3352886d1387d6c..5a7c99c7883851f1800f63bb4313242b7e3a5a57 100644 (file)
@@ -73,13 +73,22 @@ class DynamicTag(object):
 class Dynamic(object):
     """ Shared functionality between dynamic sections and segments.
     """
-    def __init__(self, stream, elffile, stringtable, position):
+    def __init__(self, stream, elffile, stringtable, position, empty):
+        """
+        :param stream:          The file-like object from which to load data
+        :param elffile:         The parent elffile object
+        :param stringtable:     A stringtable reference to use for parsing string references in entries
+        :param position:        The file offset of the dynamic segment/section
+        :param empty:           Whether this is a degenerate case with zero entries. Normally, every dynamic table
+                                will have at least one entry, the DT_NULL terminator.
+        """
         self.elffile = elffile
         self.elfstructs = elffile.structs
         self._stream = stream
-        self._num_tags = -1
+        self._num_tags = -1 if not empty else 0
         self._offset = position
         self._tagsize = self.elfstructs.Elf_Dyn.sizeof()
+        self._empty = empty
 
         # Do not access this directly yourself; use _get_stringtable() instead.
         self._stringtable = stringtable
@@ -125,6 +134,8 @@ class Dynamic(object):
     def _iter_tags(self, type=None):
         """ Yield all raw tags (limit to |type| if specified)
         """
+        if self._empty:
+            return
         for n in itertools.count():
             tag = self._get_tag(n)
             if type is None or tag['d_tag'] == type:
@@ -141,6 +152,8 @@ class Dynamic(object):
     def _get_tag(self, n):
         """ Get the raw tag at index #n from the file
         """
+        if self._num_tags != -1 and n >= self._num_tags:
+            raise IndexError(n)
         offset = self._offset + n * self._tagsize
         return struct_parse(
             self.elfstructs.Elf_Dyn,
@@ -153,7 +166,7 @@ class Dynamic(object):
         return DynamicTag(self._get_tag(n), self._get_stringtable())
 
     def num_tags(self):
-        """ Number of dynamic tags in the file
+        """ Number of dynamic tags in the file, including the DT_NULL tag
         """
         if self._num_tags != -1:
             return self._num_tags
@@ -207,7 +220,7 @@ class DynamicSection(Section, Dynamic):
         Section.__init__(self, header, name, elffile)
         stringtable = elffile.get_section(header['sh_link'])
         Dynamic.__init__(self, self.stream, self.elffile, stringtable,
-            self['sh_offset'])
+            self['sh_offset'], self['sh_type'] == 'SHT_NOBITS')
 
 
 class DynamicSegment(Segment, Dynamic):
@@ -227,7 +240,8 @@ class DynamicSegment(Segment, Dynamic):
                 stringtable = elffile.get_section(section['sh_link'])
                 break
         Segment.__init__(self, header, stream)
-        Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'])
+        Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'],
+             self['p_filesz'] == 0)
         self._symbol_list = None
         self._symbol_name_map = None
 
index 9d16b241fa5bb63a032b2ac71c3e0e0d700f559e..f60e3b3585ae8424585f42fb65984460e384b154 100644 (file)
@@ -74,6 +74,10 @@ class Section(object):
         Note that data is decompressed if the stored section data is
         compressed.
         """
+        # If this section is NOBITS, there is no data. provide a dummy answer
+        if self.header['sh_type'] == 'SHT_NOBITS':
+            return b'\0'*self.data_size
+
         # If this section is compressed, deflate it
         if self.compressed:
             c_type = self._compression_type
@@ -137,7 +141,7 @@ class StringTableSection(Section):
         """
         table_offset = self['sh_offset']
         s = parse_cstring_from_stream(self.stream, table_offset + offset)
-        return s.decode('utf-8') if s else ''
+        return s.decode('utf-8', errors='replace') if s else ''
 
 
 class SymbolTableSection(Section):
@@ -267,7 +271,7 @@ class StabSection(Section):
         while offset < end:
             stabs = struct_parse(
                 self.structs.Elf_Stabs,
-                self.elffile.stream,
+                self.stream,
                 stream_pos=offset)
             stabs['n_offset'] = offset
             offset += self.structs.Elf_Stabs.sizeof()
index 789656d1ac2f402934e3d894c4e7fc43b6f671b3..67ff1f52a8f191c5485887ebd91fb680785d7096 100644 (file)
@@ -43,6 +43,17 @@ class ELFStructs(object):
         assert elfclass == 32 or elfclass == 64
         self.little_endian = little_endian
         self.elfclass = elfclass
+        self.e_type = None
+        self.e_machine = None
+        self.e_ident_osabi = None
+
+    def __getstate__(self):
+        return self.little_endian, self.elfclass, self.e_type, self.e_machine, self.e_ident_osabi
+
+    def __setstate__(self, state):
+        self.little_endian, self.elfclass, e_type, e_machine, e_osabi = state
+        self.create_basic_structs()
+        self.create_advanced_structs(e_type, e_machine, e_osabi)
 
     def create_basic_structs(self):
         """ Create word-size related structs and ehdr struct needed for
@@ -76,12 +87,16 @@ class ELFStructs(object):
         """ Create all ELF structs except the ehdr. They may possibly depend
             on provided e_type and/or e_machine parsed from ehdr.
         """
-        self._create_phdr(e_machine)
-        self._create_shdr(e_machine)
+        self.e_type = e_type
+        self.e_machine = e_machine
+        self.e_ident_osabi = e_ident_osabi
+
+        self._create_phdr()
+        self._create_shdr()
         self._create_chdr()
         self._create_sym()
         self._create_rel()
-        self._create_dyn(e_machine, e_ident_osabi)
+        self._create_dyn()
         self._create_sunw_syminfo()
         self._create_gnu_verneed()
         self._create_gnu_verdef()
@@ -127,13 +142,13 @@ class ELFStructs(object):
     def _create_ntbs(self):
         self.Elf_ntbs = CString
 
-    def _create_phdr(self, e_machine=None):
+    def _create_phdr(self):
         p_type_dict = ENUM_P_TYPE_BASE
-        if e_machine == 'EM_ARM':
+        if self.e_machine == 'EM_ARM':
             p_type_dict = ENUM_P_TYPE_ARM
-        elif e_machine == 'EM_AARCH64':
+        elif self.e_machine == 'EM_AARCH64':
             p_type_dict = ENUM_P_TYPE_AARCH64
-        elif e_machine == 'EM_MIPS':
+        elif self.e_machine == 'EM_MIPS':
             p_type_dict = ENUM_P_TYPE_MIPS
 
         if self.elfclass == 32:
@@ -159,17 +174,17 @@ class ELFStructs(object):
                 self.Elf_xword('p_align'),
             )
 
-    def _create_shdr(self, e_machine=None):
+    def _create_shdr(self):
         """Section header parsing.
 
         Depends on e_machine because of machine-specific values in sh_type.
         """
         sh_type_dict = ENUM_SH_TYPE_BASE
-        if e_machine == 'EM_ARM':
+        if self.e_machine == 'EM_ARM':
             sh_type_dict = ENUM_SH_TYPE_ARM
-        elif e_machine == 'EM_X86_64':
+        elif self.e_machine == 'EM_X86_64':
             sh_type_dict = ENUM_SH_TYPE_AMD64
-        elif e_machine == 'EM_MIPS':
+        elif self.e_machine == 'EM_MIPS':
             sh_type_dict = ENUM_SH_TYPE_MIPS
 
         self.Elf_Shdr = Struct('Elf_Shdr',
@@ -227,11 +242,11 @@ class ELFStructs(object):
             self.Elf_sxword('r_addend'),
         )
 
-    def _create_dyn(self, e_machine=None, e_ident_osabi=None):
+    def _create_dyn(self):
         d_tag_dict = dict(ENUM_D_TAG_COMMON)
-        if e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE:
-            d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[e_machine])
-        elif e_ident_osabi == 'ELFOSABI_SOLARIS':
+        if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE:
+            d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine])
+        elif self.e_ident_osabi == 'ELFOSABI_SOLARIS':
             d_tag_dict.update(ENUM_D_TAG_SOLARIS)
 
         self.Elf_Dyn = Struct('Elf_Dyn',
diff --git a/test/test_dbgfile.py b/test/test_dbgfile.py
new file mode 100644 (file)
index 0000000..bd5f06e
--- /dev/null
@@ -0,0 +1,54 @@
+"""
+Test that elftools does not fail to load debug symbol ELF files
+"""
+import unittest
+import os
+
+from elftools.elf.elffile import ELFFile, DynamicSection
+from elftools.dwarf.callframe import ZERO
+
+class TestDBGFile(unittest.TestCase):
+    def test_dynamic_segment(self):
+        """
+        Test that the degenerate case for the dynamic segment does not crash
+        """
+        with open(os.path.join('test', 'testfiles_for_unittests',
+                               'debug_info.elf'), 'rb') as f:
+            elf = ELFFile(f)
+
+            seen_dynamic_segment = False
+            for segment in elf.iter_segments():
+                if segment.header.p_type != 'PT_DYNAMIC':
+                    continue
+
+                self.assertEqual(segment.num_tags(), 0, "The dynamic segment in this file should be empty")
+                seen_dynamic_segment = True
+                break
+
+            self.assertTrue(seen_dynamic_segment, "There should be a dynamic segment in this file")
+
+    def test_dynamic_section(self):
+        """
+        Test that the degenerate case for the dynamic section does not crash
+        """
+        with open(os.path.join('test', 'testfiles_for_unittests',
+                               'debug_info.elf'), 'rb') as f:
+            elf = ELFFile(f)
+            section = DynamicSection(elf.get_section_by_name('.dynamic').header, '.dynamic', elf)
+
+            self.assertEqual(section.num_tags(), 0, "The dynamic section in this file should be empty")
+
+    def test_eh_frame(self):
+        """
+        Test that parsing .eh_frame with SHT_NOBITS does not crash
+        """
+        with open(os.path.join('test', 'testfiles_for_unittests',
+                               'debug_info.elf'), 'rb') as f:
+            elf = ELFFile(f)
+            dwarf = elf.get_dwarf_info()
+            eh_frame = list(dwarf.EH_CFI_entries())
+            self.assertEqual(len(eh_frame), 1, "There should only be the ZERO entry in eh_frame")
+            self.assertIs(type(eh_frame[0]), ZERO, "The only eh_frame entry should be the terminator")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/testfiles_for_unittests/debug_info.elf b/test/testfiles_for_unittests/debug_info.elf
new file mode 100644 (file)
index 0000000..502b920
Binary files /dev/null and b/test/testfiles_for_unittests/debug_info.elf differ