elf: support for ELF files with a large number of sections (#333)
authorAndreas Ziegler <andreas.ziegler@fau.de>
Thu, 1 Oct 2020 13:45:19 +0000 (15:45 +0200)
committerGitHub <noreply@github.com>
Thu, 1 Oct 2020 13:45:19 +0000 (06:45 -0700)
* elf: implement support for ELF files with a large number of sections

As documented in the ELF specification [0] and reported in #330,
the number of sections (`e_shnum` member of the ELF header)
as well as the section table index of the section name string
table (`e_shstrndx` member) could exceed the SHN_LORESERVE
(0xff00) value. In this case, the members of the ELF header
are set to 0 or SHN_XINDEX (0xffff), respectively, and the
actual values are found in the inital entry of the section
header table (which is otherwise set to zeroes).

So far, the implementation of `elffile.num_sections()`
didn't handle these situations and simply reported that the
file contained 0 sections, and `scripts/readelf.py` presented
invalid values.

Fix it by following the specification more closely and
showing the corresponding correct values in `readelf.py`.

[0]: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html

Closes: #330
* test: add test file with a large number of sections

This file was generated with the following commands:

$ for i in {1..65280}; do
    echo "void __attribute__((section(\"s.$i\"), naked)) f$i(void) {}";
done > many_sections.c;
echo "int main(){}" >> many_sections.c

$ gcc-8 -fno-asynchronous-unwind-tables -c -o many_sections.o.elf many_sections.c

$ strip many_sections.o.elf

elftools/elf/elffile.py
elftools/elf/sections.py
scripts/readelf.py
test/testfiles_for_readelf/many_sections.o.elf [new file with mode: 0644]

index 5020f4c6b0fafabb3d8b07cb062a656d7daa3d09..e6cf7c26824c042e4ff4b0ac7eea2d3df8b9bf11 100644 (file)
@@ -28,8 +28,8 @@ from ..common.utils import struct_parse, elf_assert
 from .structs import ELFStructs
 from .sections import (
         Section, StringTableSection, SymbolTableSection,
-        SUNWSyminfoTableSection, NullSection, NoteSection,
-        StabSection, ARMAttributesSection)
+        SymbolTableIndexSection, SUNWSyminfoTableSection, NullSection,
+        NoteSection, StabSection, ARMAttributesSection)
 from .dynamic import DynamicSection, DynamicSegment
 from .relocation import RelocationSection, RelocationHandler
 from .gnuversions import (
@@ -39,6 +39,7 @@ from .segments import Segment, InterpSegment, NoteSegment
 from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
 from ..ehabi.ehabiinfo import EHABIInfo
 from .hash import ELFHashSection, GNUHashSection
+from .constants import SHN_INDICES
 
 class ELFFile(object):
     """ Creation: the constructor accepts a stream (file-like object) with the
@@ -83,12 +84,25 @@ class ELFFile(object):
         self.stream.seek(0)
         self.e_ident_raw = self.stream.read(16)
 
-        self._file_stringtable_section = self._get_file_stringtable()
+        self._section_header_stringtable = \
+            self._get_section_header_stringtable()
         self._section_name_map = None
 
     def num_sections(self):
         """ Number of sections in the file
         """
+        if self['e_shoff'] == 0:
+            return 0
+        # From the ELF ABI documentation at
+        # https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html:
+        # "e_shnum normally tells how many entries the section header table
+        # contains. [...] If the number of sections is greater than or equal to
+        # SHN_LORESERVE (0xff00), e_shnum has the value SHN_UNDEF (0) and the
+        # actual number of section header table entries is contained in the
+        # sh_size field of the section header at index 0 (otherwise, the sh_size
+        # member of the initial entry contains 0)."
+        if self['e_shnum'] == 0:
+            return self._get_section_header(0)['sh_size']
         return self['e_shnum']
 
     def get_section(self, n):
@@ -437,6 +451,19 @@ class ELFFile(object):
 
         return architectures.get(self['e_machine'], '<unknown>')
 
+    def get_shstrndx(self):
+        """ Find the string table section index for the section header table
+        """
+        # From https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html:
+        # If the section name string table section index is greater than or
+        # equal to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX
+        # (0xffff) and the actual index of the section name string table section
+        # is contained in the sh_link field of the section header at index 0.
+        if self['e_shstrndx'] != SHN_INDICES.SHN_XINDEX:
+            return self['e_shstrndx']
+        else:
+            return self._get_section_header(0)['sh_link']
+
     #-------------------------------- PRIVATE --------------------------------#
 
     def __getitem__(self, name):
@@ -506,7 +533,7 @@ class ELFFile(object):
             string table
         """
         name_offset = section_header['sh_name']
-        return self._file_stringtable_section.get_string(name_offset)
+        return self._section_header_stringtable.get_string(name_offset)
 
     def _make_section(self, section_header):
         """ Create a section object of the appropriate type
@@ -520,6 +547,8 @@ class ELFFile(object):
             return NullSection(section_header, name, self)
         elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM', 'SHT_SUNW_LDYNSYM'):
             return self._make_symbol_table_section(section_header, name)
+        elif sectype == 'SHT_SYMTAB_SHNDX':
+            return self._make_symbol_table_index_section(section_header, name)
         elif sectype == 'SHT_SUNW_syminfo':
             return self._make_sunwsyminfo_table_section(section_header, name)
         elif sectype == 'SHT_GNU_verneed':
@@ -555,6 +584,14 @@ class ELFFile(object):
             elffile=self,
             stringtable=strtab_section)
 
+    def _make_symbol_table_index_section(self, section_header, name):
+        """ Create a SymbolTableIndexSection object
+        """
+        linked_symtab_index = section_header['sh_link']
+        return SymbolTableIndexSection(
+            section_header, name, elffile=self,
+            symboltable=linked_symtab_index)
+
     def _make_sunwsyminfo_table_section(self, section_header, name):
         """ Create a SUNWSyminfoTableSection
         """
@@ -617,10 +654,11 @@ class ELFFile(object):
             self.stream,
             stream_pos=self._segment_offset(n))
 
-    def _get_file_stringtable(self):
-        """ Find the file's string table section
+    def _get_section_header_stringtable(self):
+        """ Get the string table section corresponding to the section header
+            table.
         """
-        stringtable_section_num = self['e_shstrndx']
+        stringtable_section_num = self.get_shstrndx()
         return StringTableSection(
                 header=self._get_section_header(stringtable_section_num),
                 name='',
index 791b92769a244df4aa3381d2934aae6ec3936bc0..9a97a09f37897c7be30e9bf3955e9baf3c011032 100644 (file)
@@ -144,6 +144,26 @@ class StringTableSection(Section):
         return s.decode('utf-8', errors='replace') if s else ''
 
 
+class SymbolTableIndexSection(Section):
+    """ A section containing the section header table indices corresponding
+        to symbols in the linked symbol table. This section has to exist if the
+        symbol table contains an entry with a section header index set to
+        SHN_XINDEX (0xffff). The format of the section is described at
+        https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html
+    """
+    def __init__(self, header, name, elffile, symboltable):
+        super(SymbolTableIndexSection, self).__init__(header, name, elffile)
+        self.symboltable = symboltable
+
+    def get_section_index(self, n):
+        """ Get the section header table index for the symbol with index #n.
+            The section contains an array of Elf32_word values with one entry
+            for every symbol in the associated symbol table.
+        """
+        return struct_parse(self.elffile.structs.Elf_word(''), self.stream,
+                            self['sh_offset'] + n * self['sh_entsize'])
+
+
 class SymbolTableSection(Section):
     """ ELF symbol table section. Has an associated StringTableSection that's
         passed in the constructor.
index 6d358908d47e844b7eeda2924f244055a7a828f2..c298aa28466f2f0e9e61be15eb45f152f274f9de 100755 (executable)
@@ -32,7 +32,9 @@ from elftools.elf.elffile import ELFFile
 from elftools.elf.dynamic import DynamicSection, DynamicSegment
 from elftools.elf.enums import ENUM_D_TAG
 from elftools.elf.segments import InterpSegment
-from elftools.elf.sections import NoteSection, SymbolTableSection
+from elftools.elf.sections import (
+    NoteSection, SymbolTableSection, SymbolTableIndexSection
+)
 from elftools.elf.gnuversions import (
     GNUVerSymSection, GNUVerDefSection,
     GNUVerNeedSection,
@@ -51,6 +53,7 @@ from elftools.elf.descriptions import (
 from elftools.elf.constants import E_FLAGS
 from elftools.elf.constants import E_FLAGS_MASKS
 from elftools.elf.constants import SH_FLAGS
+from elftools.elf.constants import SHN_INDICES
 from elftools.dwarf.dwarfinfo import DWARFInfo
 from elftools.dwarf.descriptions import (
     describe_reg_name, describe_attr_value, set_global_machine_arch,
@@ -82,6 +85,8 @@ class ReadElf(object):
 
         self._versioninfo = None
 
+        self._shndx_sections = None
+
     def display_file_header(self):
         """ Display the ELF file header
         """
@@ -127,10 +132,18 @@ class ReadElf(object):
                 header['e_phnum'])
         self._emitline('  Size of section headers:           %s (bytes)' %
                 header['e_shentsize'])
-        self._emitline('  Number of section headers:         %s' %
+        self._emit('  Number of section headers:         %s' %
                 header['e_shnum'])
-        self._emitline('  Section header string table index: %s' %
+        if header['e_shnum'] == 0 and self.elffile.num_sections() != 0:
+            self._emitline(' (%d)' % self.elffile.num_sections())
+        else:
+            self._emitline('')
+        self._emit('  Section header string table index: %s' %
                 header['e_shstrndx'])
+        if header['e_shstrndx'] == SHN_INDICES.SHN_XINDEX:
+            self._emitline(' (%d)' % self.elffile.get_shstrndx())
+        else:
+            self._emitline('')
 
     def decode_flags(self, flags):
         description = ""
@@ -302,7 +315,7 @@ class ReadElf(object):
             return
 
         self._emitline('\nSection Header%s:' % (
-            's' if elfheader['e_shnum'] > 1 else ''))
+            's' if self.elffile.num_sections() > 1 else ''))
 
         # Different formatting constraints of 32-bit and 64-bit addresses
         #
@@ -357,7 +370,7 @@ class ReadElf(object):
         """
         self._init_versioninfo()
 
-        symbol_tables = [s for s in self.elffile.iter_sections()
+        symbol_tables = [(idx, s) for idx, s in enumerate(self.elffile.iter_sections())
                          if isinstance(s, SymbolTableSection)]
 
         if not symbol_tables and self.elffile.num_sections() == 0:
@@ -365,7 +378,7 @@ class ReadElf(object):
             self._emitline('Dynamic symbol information is not available for'
                            ' displaying symbols.')
 
-        for section in symbol_tables:
+        for section_index, section in symbol_tables:
             if not isinstance(section, SymbolTableSection):
                 continue
 
@@ -410,7 +423,9 @@ class ReadElf(object):
                     describe_symbol_type(symbol['st_info']['type']),
                     describe_symbol_bind(symbol['st_info']['bind']),
                     describe_symbol_visibility(symbol['st_other']['visibility']),
-                    describe_symbol_shndx(symbol['st_shndx']),
+                    describe_symbol_shndx(self._get_symbol_shndx(symbol,
+                                                                 nsym,
+                                                                 section_index)),
                     symbol.name,
                     version_info))
 
@@ -527,7 +542,10 @@ class ReadElf(object):
                     # names (excluding version info) to 22 chars, similarly to
                     # readelf.
                     if symbol['st_name'] == 0:
-                        symsec = self.elffile.get_section(symbol['st_shndx'])
+                        symsecidx = self._get_symbol_shndx(symbol,
+                                                           rel['r_info_sym'],
+                                                           section['sh_link'])
+                        symsec = self.elffile.get_section(symsecidx)
                         symbol_name = symsec.name
                         version = ''
                     else:
@@ -973,6 +991,22 @@ class ReadElf(object):
             # Not a number. Must be a name then
             return self.elffile.get_section_by_name(spec)
 
+    def _get_symbol_shndx(self, symbol, symbol_index, symtab_index):
+        """ Get the index into the section header table for the "symbol"
+            at "symbol_index" located in the symbol table with section index
+            "symtab_index".
+        """
+        symbol_shndx = symbol['st_shndx']
+        if symbol_shndx != SHN_INDICES.SHN_XINDEX:
+            return symbol_shndx
+
+        # Check for or lazily construct index section mapping (symbol table
+        # index -> corresponding symbol table index section object)
+        if self._shndx_sections is None:
+            self._shndx_sections = {sec.symboltable: sec for sec in self.elffile.iter_sections()
+                                    if isinstance(sec, SymbolTableIndexSection)}
+        return self._shndx_sections[symtab_index].get_section_index(symbol_index)
+
     def _note_relocs_for_section(self, section):
         """ If there are relocation sections pointing to the givne section,
             emit a note about it.
diff --git a/test/testfiles_for_readelf/many_sections.o.elf b/test/testfiles_for_readelf/many_sections.o.elf
new file mode 100644 (file)
index 0000000..f51fd3e
Binary files /dev/null and b/test/testfiles_for_readelf/many_sections.o.elf differ