Some ELF files which contain only debug symbols have important sections present in the section table but marked as NOBITS instead of PROGBITS. Attempting to extract the segments can lead to crashes through parsing invalid data.
The first patch modifies the dynamic segment/section specifically to add a flag for this case, since it seems to assume that there will always be at least one entry, DT_NULL.
The second patch modifies the segment code more generally to return a dummy answer for what data it holds. The actual way that this change prevents a crash is while trying to parse .eh_frame when it is in fact NOBITS - originally I had a more targeted patch, but decided that it was important enough to do more generally
class Dynamic(object):
""" Shared functionality between dynamic sections and segments.
"""
- def __init__(self, stream, elffile, stringtable, position):
+ def __init__(self, stream, elffile, stringtable, position, empty):
+ """
+ :param stream: The file-like object from which to load data
+ :param elffile: The parent elffile object
+ :param stringtable: A stringtable reference to use for parsing string references in entries
+ :param position: The file offset of the dynamic segment/section
+ :param empty: Whether this is a degenerate case with zero entries. Normally, every dynamic table
+ will have at least one entry, the DT_NULL terminator.
+ """
self.elffile = elffile
self.elfstructs = elffile.structs
self._stream = stream
- self._num_tags = -1
+ self._num_tags = -1 if not empty else 0
self._offset = position
self._tagsize = self.elfstructs.Elf_Dyn.sizeof()
+ self._empty = empty
# Do not access this directly yourself; use _get_stringtable() instead.
self._stringtable = stringtable
def _iter_tags(self, type=None):
""" Yield all raw tags (limit to |type| if specified)
"""
+ if self._empty:
+ return
for n in itertools.count():
tag = self._get_tag(n)
if type is None or tag['d_tag'] == type:
def _get_tag(self, n):
""" Get the raw tag at index #n from the file
"""
+ if self._num_tags != -1 and n >= self._num_tags:
+ raise IndexError(n)
offset = self._offset + n * self._tagsize
return struct_parse(
self.elfstructs.Elf_Dyn,
return DynamicTag(self._get_tag(n), self._get_stringtable())
def num_tags(self):
- """ Number of dynamic tags in the file
+ """ Number of dynamic tags in the file, including the DT_NULL tag
"""
if self._num_tags != -1:
return self._num_tags
Section.__init__(self, header, name, elffile)
stringtable = elffile.get_section(header['sh_link'])
Dynamic.__init__(self, self.stream, self.elffile, stringtable,
- self['sh_offset'])
+ self['sh_offset'], self['sh_type'] == 'SHT_NOBITS')
class DynamicSegment(Segment, Dynamic):
stringtable = elffile.get_section(section['sh_link'])
break
Segment.__init__(self, header, stream)
- Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'])
+ Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'],
+ self['p_filesz'] == 0)
self._symbol_list = None
self._symbol_name_map = None
Note that data is decompressed if the stored section data is
compressed.
"""
+ # If this section is NOBITS, there is no data. provide a dummy answer
+ if self.header['sh_type'] == 'SHT_NOBITS':
+ return b'\0'*self.data_size
+
# If this section is compressed, deflate it
if self.compressed:
c_type = self._compression_type
"""
table_offset = self['sh_offset']
s = parse_cstring_from_stream(self.stream, table_offset + offset)
- return s.decode('utf-8') if s else ''
+ return s.decode('utf-8', errors='replace') if s else ''
class SymbolTableSection(Section):
while offset < end:
stabs = struct_parse(
self.structs.Elf_Stabs,
- self.elffile.stream,
+ self.stream,
stream_pos=offset)
stabs['n_offset'] = offset
offset += self.structs.Elf_Stabs.sizeof()
assert elfclass == 32 or elfclass == 64
self.little_endian = little_endian
self.elfclass = elfclass
+ self.e_type = None
+ self.e_machine = None
+ self.e_ident_osabi = None
+
+ def __getstate__(self):
+ return self.little_endian, self.elfclass, self.e_type, self.e_machine, self.e_ident_osabi
+
+ def __setstate__(self, state):
+ self.little_endian, self.elfclass, e_type, e_machine, e_osabi = state
+ self.create_basic_structs()
+ self.create_advanced_structs(e_type, e_machine, e_osabi)
def create_basic_structs(self):
""" Create word-size related structs and ehdr struct needed for
""" Create all ELF structs except the ehdr. They may possibly depend
on provided e_type and/or e_machine parsed from ehdr.
"""
- self._create_phdr(e_machine)
- self._create_shdr(e_machine)
+ self.e_type = e_type
+ self.e_machine = e_machine
+ self.e_ident_osabi = e_ident_osabi
+
+ self._create_phdr()
+ self._create_shdr()
self._create_chdr()
self._create_sym()
self._create_rel()
- self._create_dyn(e_machine, e_ident_osabi)
+ self._create_dyn()
self._create_sunw_syminfo()
self._create_gnu_verneed()
self._create_gnu_verdef()
def _create_ntbs(self):
self.Elf_ntbs = CString
- def _create_phdr(self, e_machine=None):
+ def _create_phdr(self):
p_type_dict = ENUM_P_TYPE_BASE
- if e_machine == 'EM_ARM':
+ if self.e_machine == 'EM_ARM':
p_type_dict = ENUM_P_TYPE_ARM
- elif e_machine == 'EM_AARCH64':
+ elif self.e_machine == 'EM_AARCH64':
p_type_dict = ENUM_P_TYPE_AARCH64
- elif e_machine == 'EM_MIPS':
+ elif self.e_machine == 'EM_MIPS':
p_type_dict = ENUM_P_TYPE_MIPS
if self.elfclass == 32:
self.Elf_xword('p_align'),
)
- def _create_shdr(self, e_machine=None):
+ def _create_shdr(self):
"""Section header parsing.
Depends on e_machine because of machine-specific values in sh_type.
"""
sh_type_dict = ENUM_SH_TYPE_BASE
- if e_machine == 'EM_ARM':
+ if self.e_machine == 'EM_ARM':
sh_type_dict = ENUM_SH_TYPE_ARM
- elif e_machine == 'EM_X86_64':
+ elif self.e_machine == 'EM_X86_64':
sh_type_dict = ENUM_SH_TYPE_AMD64
- elif e_machine == 'EM_MIPS':
+ elif self.e_machine == 'EM_MIPS':
sh_type_dict = ENUM_SH_TYPE_MIPS
self.Elf_Shdr = Struct('Elf_Shdr',
self.Elf_sxword('r_addend'),
)
- def _create_dyn(self, e_machine=None, e_ident_osabi=None):
+ def _create_dyn(self):
d_tag_dict = dict(ENUM_D_TAG_COMMON)
- if e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE:
- d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[e_machine])
- elif e_ident_osabi == 'ELFOSABI_SOLARIS':
+ if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE:
+ d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine])
+ elif self.e_ident_osabi == 'ELFOSABI_SOLARIS':
d_tag_dict.update(ENUM_D_TAG_SOLARIS)
self.Elf_Dyn = Struct('Elf_Dyn',
--- /dev/null
+"""
+Test that elftools does not fail to load debug symbol ELF files
+"""
+import unittest
+import os
+
+from elftools.elf.elffile import ELFFile, DynamicSection
+from elftools.dwarf.callframe import ZERO
+
+class TestDBGFile(unittest.TestCase):
+ def test_dynamic_segment(self):
+ """
+ Test that the degenerate case for the dynamic segment does not crash
+ """
+ with open(os.path.join('test', 'testfiles_for_unittests',
+ 'debug_info.elf'), 'rb') as f:
+ elf = ELFFile(f)
+
+ seen_dynamic_segment = False
+ for segment in elf.iter_segments():
+ if segment.header.p_type != 'PT_DYNAMIC':
+ continue
+
+ self.assertEqual(segment.num_tags(), 0, "The dynamic segment in this file should be empty")
+ seen_dynamic_segment = True
+ break
+
+ self.assertTrue(seen_dynamic_segment, "There should be a dynamic segment in this file")
+
+ def test_dynamic_section(self):
+ """
+ Test that the degenerate case for the dynamic section does not crash
+ """
+ with open(os.path.join('test', 'testfiles_for_unittests',
+ 'debug_info.elf'), 'rb') as f:
+ elf = ELFFile(f)
+ section = DynamicSection(elf.get_section_by_name('.dynamic').header, '.dynamic', elf)
+
+ self.assertEqual(section.num_tags(), 0, "The dynamic section in this file should be empty")
+
+ def test_eh_frame(self):
+ """
+ Test that parsing .eh_frame with SHT_NOBITS does not crash
+ """
+ with open(os.path.join('test', 'testfiles_for_unittests',
+ 'debug_info.elf'), 'rb') as f:
+ elf = ELFFile(f)
+ dwarf = elf.get_dwarf_info()
+ eh_frame = list(dwarf.EH_CFI_entries())
+ self.assertEqual(len(eh_frame), 1, "There should only be the ZERO entry in eh_frame")
+ self.assertIs(type(eh_frame[0]), ZERO, "The only eh_frame entry should be the terminator")
+
+if __name__ == '__main__':
+ unittest.main()