import itertools
from collections import defaultdict
-from .hash import HashSection, GNUHashSection
+from .hash import ELFHashTable, GNUHashTable
from .sections import Section, Symbol
from .enums import ENUM_D_TAG
from .segments import Segment
end_ptr = None
# Check if a DT_GNU_HASH tag exists and recover the number of symbols
- # from the corresponding section
+ # from the corresponding hash table
_, gnu_hash_offset = self.get_table_offset('DT_GNU_HASH')
if gnu_hash_offset is not None:
- hash_section = GNUHashSection(self.stream, gnu_hash_offset,
- self.elffile)
+ hash_section = GNUHashTable(self.elffile, gnu_hash_offset, self)
end_ptr = tab_ptr + \
hash_section.get_number_of_symbols() * symbol_size
if end_ptr is None:
_, hash_offset = self.get_table_offset('DT_HASH')
if hash_offset is not None:
- hash_section = HashSection(self.stream, hash_offset,
- self.elffile)
+ # Get the hash table from the DT_HASH offset
+ hash_section = ELFHashTable(self.elffile, hash_offset, self)
end_ptr = tab_ptr + \
hash_section.get_number_of_symbols() * symbol_size
GNUVerSymSection)
from .segments import Segment, InterpSegment, NoteSegment
from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
-
+from .hash import ELFHashSection, GNUHashSection
class ELFFile(object):
""" Creation: the constructor accepts a stream (file-like object) with the
return StabSection(section_header, name, self)
elif sectype == 'SHT_ARM_ATTRIBUTES':
return ARMAttributesSection(section_header, name, self)
+ elif sectype == 'SHT_HASH':
+ return self._make_elf_hash_section(section_header, name)
+ elif sectype == 'SHT_GNU_HASH':
+ return self._make_gnu_hash_section(section_header, name)
else:
return Section(section_header, name, self)
elffile=self,
symboltable=strtab_section)
+ def _make_elf_hash_section(self, section_header, name):
+ linked_symtab_index = section_header['sh_link']
+ symtab_section = self.get_section(linked_symtab_index)
+ return ELFHashSection(
+ section_header, name, self, symtab_section
+ )
+
+ def _make_gnu_hash_section(self, section_header, name):
+ linked_symtab_index = section_header['sh_link']
+ symtab_section = self.get_section(linked_symtab_index)
+ return GNUHashSection(
+ section_header, name, self, symtab_section
+ )
+
def _get_segment_header(self, n):
""" Find the header of segment #n, parse it and return the struct
"""
# This code is in the public domain
#-------------------------------------------------------------------------------
+import struct
+
from ..common.utils import struct_parse
+from .sections import Section
-class HashSection(object):
- """ Minimal part of an ELF hash section to find the number of symbols in the
+class ELFHashTable(object):
+ """ Representation of an ELF hash table to find symbols in the
symbol table - useful for super-stripped binaries without section
headers where only the start of the symbol table is known from the
dynamic segment. The layout and contents are nicely described at
https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/.
+
+ The symboltable argument needs to implement a get_symbol() method -
+ in a regular ELF file, this will be the linked symbol table section
+ as indicated by the sh_link attribute. For super-stripped binaries,
+ one should use the DynamicSegment object as the symboltable as it
+ supports symbol lookup without access to a symbol table section.
"""
- def __init__(self, stream, offset, elffile):
- self._stream = stream
- self._offset = offset
- self._elffile = elffile
- self.params = struct_parse(self._elffile.structs.Elf_Hash,
- self._stream,
- self._offset)
+
+ def __init__(self, elffile, start_offset, symboltable):
+ self.elffile = elffile
+ self._symboltable = symboltable
+ self.params = struct_parse(self.elffile.structs.Elf_Hash,
+ self.elffile.stream,
+ start_offset)
def get_number_of_symbols(self):
""" Get the number of symbols from the hash table parameters.
"""
return self.params['nchains']
+ def get_symbol(self, name):
+ """ Look up a symbol from this hash table with the given name.
+ """
+ if self.params['nbuckets'] == 0:
+ return None
+ hval = self.elf_hash(name) % self.params['nbuckets']
+ symndx = self.params['buckets'][hval]
+ while symndx != 0:
+ sym = self._symboltable.get_symbol(symndx)
+ if sym.name == name:
+ return sym
+ symndx = self.params['chains'][symndx]
+ return None
+
+ @staticmethod
+ def elf_hash(name):
+ """ Compute the hash value for a given symbol name.
+ """
+ if not isinstance(name, bytes):
+ name = name.encode('utf-8')
+ h = 0
+ x = 0
+ for c in bytearray(name):
+ h = (h << 4) + c
+ x = h & 0xF0000000
+ if x != 0:
+ h ^= (x >> 24)
+ h &= ~x
+ return h
+
-class GNUHashSection(object):
- """ Minimal part of a GNU hash section to find the number of symbols in the
+class ELFHashSection(Section, ELFHashTable):
+ """ Section representation of an ELF hash table. In regular ELF files, this
+ allows us to use the common functions defined on Section objects when
+ dealing with the hash table.
+ """
+ def __init__(self, header, name, elffile, symboltable):
+ Section.__init__(self, header, name, elffile)
+ ELFHashTable.__init__(self, elffile, self['sh_offset'], symboltable)
+
+
+class GNUHashTable(object):
+ """ Representation of a GNU hash table to find symbols in the
symbol table - useful for super-stripped binaries without section
headers where only the start of the symbol table is known from the
dynamic segment. The layout and contents are nicely described at
https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/.
+
+ The symboltable argument needs to implement a get_symbol() method -
+ in a regular ELF file, this will be the linked symbol table section
+ as indicated by the sh_link attribute. For super-stripped binaries,
+ one should use the DynamicSegment object as the symboltable as it
+ supports symbol lookup without access to a symbol table section.
"""
- def __init__(self, stream, offset, elffile):
- self._stream = stream
- self._offset = offset
- self._elffile = elffile
- self.params = struct_parse(self._elffile.structs.Gnu_Hash,
- self._stream,
- self._offset)
+ def __init__(self, elffile, start_offset, symboltable):
+ self.elffile = elffile
+ self._symboltable = symboltable
+ self.params = struct_parse(self.elffile.structs.Gnu_Hash,
+ self.elffile.stream,
+ start_offset)
+ # Element sizes in the hash table
+ self._wordsize = self.elffile.structs.Elf_word('').sizeof()
+ self._xwordsize = self.elffile.structs.Elf_xword('').sizeof()
+ self._chain_pos = start_offset + 4 * self._wordsize + \
+ self.params['bloom_size'] * self._xwordsize + \
+ self.params['nbuckets'] * self._wordsize
def get_number_of_symbols(self):
""" Get the number of symbols in the hash table by finding the bucket
with the highest symbol index and walking to the end of its chain.
"""
- # Element sizes in the hash table
- wordsize = self._elffile.structs.Elf_word('').sizeof()
- xwordsize = self._elffile.structs.Elf_xword('').sizeof()
-
# Find highest index in buckets array
max_idx = max(self.params['buckets'])
if max_idx < self.params['symoffset']:
return self.params['symoffset']
# Position the stream at the start of the corresponding chain
- chain_pos = self._offset + 4 * wordsize + \
- self.params['bloom_size'] * xwordsize + \
- self.params['nbuckets'] * wordsize + \
- (max_idx - self.params['symoffset']) * wordsize
+ max_chain_pos = self._chain_pos + \
+ (max_idx - self.params['symoffset']) * self._wordsize
+ self.elffile.stream.seek(max_chain_pos)
# Walk the chain to its end (lowest bit is set)
while True:
- cur_hash = struct_parse(self._elffile.structs.Elf_word('elem'),
- self._stream,
- chain_pos)
+ cur_hash = struct.unpack('I', self.elffile.stream.read(self._wordsize))[0]
if cur_hash & 1:
return max_idx + 1
max_idx += 1
- chain_pos += wordsize
+
+ def _matches_bloom(self, H1):
+ """ Helper function to check if the given hash could be in the hash
+ table by testing it against the bloom filter.
+ """
+ arch_bits = self.elffile.elfclass
+ H2 = H1 >> self.params['bloom_shift']
+ word_idx = int(H1 / arch_bits) % self.params['bloom_size']
+ BITMASK = (1 << (H1 % arch_bits)) | (1 << (H2 % arch_bits))
+ return (self.params['bloom'][word_idx] & BITMASK) == BITMASK
+
+ def get_symbol(self, name):
+ """ Look up a symbol from this hash table with the given name.
+ """
+ namehash = self.gnu_hash(name)
+ if not self._matches_bloom(namehash):
+ return None
+
+ symidx = self.params['buckets'][namehash % self.params['nbuckets']]
+ if symidx < self.params['symoffset']:
+ return None
+
+ self.elffile.stream.seek(self._chain_pos + (symidx - self.params['symoffset']) * self._wordsize)
+ while True:
+ cur_hash = struct.unpack('I', self.elffile.stream.read(self._wordsize))[0]
+ if cur_hash | 1 == namehash | 1:
+ symbol = self._symboltable.get_symbol(symidx)
+ if name == symbol.name:
+ return symbol
+
+ if cur_hash & 1:
+ break
+ symidx += 1
+ return None
+
+ @staticmethod
+ def gnu_hash(key):
+ """ Compute the GNU-style hash value for a given symbol name.
+ """
+ if not isinstance(key, bytes):
+ key = key.encode('utf-8')
+ h = 5381
+ for c in bytearray(key):
+ h = h * 33 + c
+ return h & 0xFFFFFFFF
+
+
+class GNUHashSection(Section, GNUHashTable):
+ """ Section representation of a GNU hash table. In regular ELF files, this
+ allows us to use the common functions defined on Section objects when
+ dealing with the hash table.
+ """
+ def __init__(self, header, name, elffile, symboltable):
+ Section.__init__(self, header, name, elffile)
+ GNUHashTable.__init__(self, elffile, self['sh_offset'], symboltable)
+# -*- coding: utf-8 -*-
#-------------------------------------------------------------------------------
# elftools tests
#
import os
from elftools.elf.elffile import ELFFile
-from elftools.common.exceptions import ELFError
-from elftools.elf.hash import HashSection, GNUHashSection
+from elftools.elf.hash import ELFHashTable, GNUHashTable
class TestELFHash(unittest.TestCase):
+ """ Tests for the ELF hash table.
+ """
+
+ def test_elf_hash(self):
+ """ Verify correctness of ELF hashing function. The expected values
+ were computed with the C implementation from the glibc source code.
+ """
+ self.assertEqual(ELFHashTable.elf_hash(''), 0x00000000)
+ self.assertEqual(ELFHashTable.elf_hash('main'), 0x000737fe)
+ self.assertEqual(ELFHashTable.elf_hash('printf'), 0x077905a6)
+ self.assertEqual(ELFHashTable.elf_hash('exit'), 0x0006cf04)
+ self.assertEqual(ELFHashTable.elf_hash(u'ïó®123'), 0x0efddae3)
+ self.assertEqual(ELFHashTable.elf_hash(b'\xe4\xbd\xa0\xe5\xa5\xbd'),
+ 0x0f07f00d)
+
def test_get_number_of_syms(self):
""" Verify we can get get the number of symbols from an ELF hash
section.
"""
-
with open(os.path.join('test', 'testfiles_for_unittests',
'aarch64_super_stripped.elf'), 'rb') as f:
elf = ELFFile(f)
+ dynamic_segment = None
for segment in elf.iter_segments():
- if segment.header.p_type != 'PT_DYNAMIC':
- continue
+ if segment.header.p_type == 'PT_DYNAMIC':
+ dynamic_segment = segment
+ break
+
+ _, hash_offset = dynamic_segment.get_table_offset('DT_HASH')
- _, hash_offset = segment.get_table_offset('DT_HASH')
- hash_section = HashSection(elf.stream, hash_offset, elf)
+ hash_section = ELFHashTable(elf, hash_offset, dynamic_segment)
+ self.assertIsNotNone(hash_section)
self.assertEqual(hash_section.get_number_of_symbols(), 4)
+ def test_get_symbol(self):
+ """ Verify we can get a specific symbol from an ELF hash section.
+ """
+ path = os.path.join('test', 'testfiles_for_unittests',
+ 'simple_mipsel.elf')
+ with open(path, 'rb') as f:
+ elf = ELFFile(f)
+ hash_section = elf.get_section_by_name('.hash')
+ self.assertIsNotNone(hash_section)
+ symbol_main = hash_section.get_symbol('main')
+ self.assertIsNotNone(symbol_main)
+ self.assertEqual(symbol_main['st_value'], int(0x400790))
+
class TestGNUHash(unittest.TestCase):
+ """ Tests for the GNU hash table.
+ """
+
+ def test_gnu_hash(self):
+ """ Verify correctness of GNU hashing function. The expected values
+ were computed with the C implementation from the glibc source code.
+ """
+ self.assertEqual(GNUHashTable.gnu_hash(''), 0x00001505)
+ self.assertEqual(GNUHashTable.gnu_hash('main'), 0x7c9a7f6a)
+ self.assertEqual(GNUHashTable.gnu_hash('printf'), 0x156b2bb8)
+ self.assertEqual(GNUHashTable.gnu_hash('exit'), 0x7c967e3f)
+ self.assertEqual(GNUHashTable.gnu_hash(u'ïó®123'), 0x8025a693)
+ self.assertEqual(GNUHashTable.gnu_hash(b'\xe4\xbd\xa0\xe5\xa5\xbd'),
+ 0x296eec2d)
+
def test_get_number_of_syms(self):
""" Verify we can get get the number of symbols from a GNU hash
section.
with open(os.path.join('test', 'testfiles_for_unittests',
'lib_versioned64.so.1.elf'), 'rb') as f:
elf = ELFFile(f)
- for segment in elf.iter_segments():
- if segment.header.p_type != 'PT_DYNAMIC':
- continue
-
- _, hash_offset = segment.get_table_offset('DT_GNU_HASH')
- hash_section = GNUHashSection(elf.stream, hash_offset, elf)
+ hash_section = elf.get_section_by_name('.gnu.hash')
+ self.assertIsNotNone(hash_section)
self.assertEqual(hash_section.get_number_of_symbols(), 24)
+
+ def test_get_symbol(self):
+ """ Verify we can get a specific symbol from a GNU hash section.
+ """
+ with open(os.path.join('test', 'testfiles_for_unittests',
+ 'lib_versioned64.so.1.elf'), 'rb') as f:
+ elf = ELFFile(f)
+ hash_section = elf.get_section_by_name('.gnu.hash')
+ self.assertIsNotNone(hash_section)
+ symbol_f1 = hash_section.get_symbol('function1_ver1_1')
+ self.assertIsNotNone(symbol_f1)
+ self.assertEqual(symbol_f1['st_value'], int(0x9a2))