From 966438dbc158a838399459fb87f49df70cb30970 Mon Sep 17 00:00:00 2001 From: Andreas Ziegler Date: Mon, 9 Mar 2020 13:39:23 +0100 Subject: [PATCH] {GNU,}HashSection: Implement symbol lookup (#290) In super-stripped binaries, symbol tables can not be accessed directly as we do not have section headers to find them. In this case, we can already use the mandatory DynamicSegment which provides methods for individual access and iteration over symbols via a minimal implementation of symbol hash sections which only provided the number of symbols so far. As we can also directly look up symbols via the hash table, let's implement this functionality as well. The code is based on @rhelmot's implementation as discussed in #219, with some changes around reading the hash parameters. For supporting individual symbol lookup, we also need the corresponding symbol table to get the Symbol objects if the matching hash was found in the hash section. In regular ELF files, the symbol table is denoted by the section index provided in the sh_link field of the hash section and automatically created when building the hash section, for super-stripped binaries we can use the DynamicSegment (which needs to be present in any case) as the symbol table as it also provides a get_symbol() method relying on other ways to determine the list of symbols. Both of these variants can be seen in the improved test_hash.py file. The hash tables are implemented in a base class which does not derive from the Section class in order to allow instantiation even if no section headers are present in the underlying file. --- elftools/elf/dynamic.py | 11 ++- elftools/elf/elffile.py | 20 ++++- elftools/elf/hash.py | 167 ++++++++++++++++++++++++++++++++-------- test/test_hash.py | 80 +++++++++++++++---- 4 files changed, 228 insertions(+), 50 deletions(-) diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index a445ae9..c0458d8 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -9,7 +9,7 @@ import itertools from collections import defaultdict -from .hash import HashSection, GNUHashSection +from .hash import ELFHashTable, GNUHashTable from .sections import Section, Symbol from .enums import ENUM_D_TAG from .segments import Segment @@ -298,11 +298,10 @@ class DynamicSegment(Segment, Dynamic): end_ptr = None # Check if a DT_GNU_HASH tag exists and recover the number of symbols - # from the corresponding section + # from the corresponding hash table _, gnu_hash_offset = self.get_table_offset('DT_GNU_HASH') if gnu_hash_offset is not None: - hash_section = GNUHashSection(self.stream, gnu_hash_offset, - self.elffile) + hash_section = GNUHashTable(self.elffile, gnu_hash_offset, self) end_ptr = tab_ptr + \ hash_section.get_number_of_symbols() * symbol_size @@ -310,8 +309,8 @@ class DynamicSegment(Segment, Dynamic): if end_ptr is None: _, hash_offset = self.get_table_offset('DT_HASH') if hash_offset is not None: - hash_section = HashSection(self.stream, hash_offset, - self.elffile) + # Get the hash table from the DT_HASH offset + hash_section = ELFHashTable(self.elffile, hash_offset, self) end_ptr = tab_ptr + \ hash_section.get_number_of_symbols() * symbol_size diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index c5b66c7..f95fd54 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -33,7 +33,7 @@ from .gnuversions import ( GNUVerSymSection) from .segments import Segment, InterpSegment, NoteSegment from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig - +from .hash import ELFHashSection, GNUHashSection class ELFFile(object): """ Creation: the constructor accepts a stream (file-like object) with the @@ -504,6 +504,10 @@ class ELFFile(object): return StabSection(section_header, name, self) elif sectype == 'SHT_ARM_ATTRIBUTES': return ARMAttributesSection(section_header, name, self) + elif sectype == 'SHT_HASH': + return self._make_elf_hash_section(section_header, name) + elif sectype == 'SHT_GNU_HASH': + return self._make_gnu_hash_section(section_header, name) else: return Section(section_header, name, self) @@ -557,6 +561,20 @@ class ELFFile(object): elffile=self, symboltable=strtab_section) + def _make_elf_hash_section(self, section_header, name): + linked_symtab_index = section_header['sh_link'] + symtab_section = self.get_section(linked_symtab_index) + return ELFHashSection( + section_header, name, self, symtab_section + ) + + def _make_gnu_hash_section(self, section_header, name): + linked_symtab_index = section_header['sh_link'] + symtab_section = self.get_section(linked_symtab_index) + return GNUHashSection( + section_header, name, self, symtab_section + ) + def _get_segment_header(self, n): """ Find the header of segment #n, parse it and return the struct """ diff --git a/elftools/elf/hash.py b/elftools/elf/hash.py index 3c39f8b..2e8a6fe 100644 --- a/elftools/elf/hash.py +++ b/elftools/elf/hash.py @@ -7,71 +7,178 @@ # This code is in the public domain #------------------------------------------------------------------------------- +import struct + from ..common.utils import struct_parse +from .sections import Section -class HashSection(object): - """ Minimal part of an ELF hash section to find the number of symbols in the +class ELFHashTable(object): + """ Representation of an ELF hash table to find symbols in the symbol table - useful for super-stripped binaries without section headers where only the start of the symbol table is known from the dynamic segment. The layout and contents are nicely described at https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/. + + The symboltable argument needs to implement a get_symbol() method - + in a regular ELF file, this will be the linked symbol table section + as indicated by the sh_link attribute. For super-stripped binaries, + one should use the DynamicSegment object as the symboltable as it + supports symbol lookup without access to a symbol table section. """ - def __init__(self, stream, offset, elffile): - self._stream = stream - self._offset = offset - self._elffile = elffile - self.params = struct_parse(self._elffile.structs.Elf_Hash, - self._stream, - self._offset) + + def __init__(self, elffile, start_offset, symboltable): + self.elffile = elffile + self._symboltable = symboltable + self.params = struct_parse(self.elffile.structs.Elf_Hash, + self.elffile.stream, + start_offset) def get_number_of_symbols(self): """ Get the number of symbols from the hash table parameters. """ return self.params['nchains'] + def get_symbol(self, name): + """ Look up a symbol from this hash table with the given name. + """ + if self.params['nbuckets'] == 0: + return None + hval = self.elf_hash(name) % self.params['nbuckets'] + symndx = self.params['buckets'][hval] + while symndx != 0: + sym = self._symboltable.get_symbol(symndx) + if sym.name == name: + return sym + symndx = self.params['chains'][symndx] + return None + + @staticmethod + def elf_hash(name): + """ Compute the hash value for a given symbol name. + """ + if not isinstance(name, bytes): + name = name.encode('utf-8') + h = 0 + x = 0 + for c in bytearray(name): + h = (h << 4) + c + x = h & 0xF0000000 + if x != 0: + h ^= (x >> 24) + h &= ~x + return h + -class GNUHashSection(object): - """ Minimal part of a GNU hash section to find the number of symbols in the +class ELFHashSection(Section, ELFHashTable): + """ Section representation of an ELF hash table. In regular ELF files, this + allows us to use the common functions defined on Section objects when + dealing with the hash table. + """ + def __init__(self, header, name, elffile, symboltable): + Section.__init__(self, header, name, elffile) + ELFHashTable.__init__(self, elffile, self['sh_offset'], symboltable) + + +class GNUHashTable(object): + """ Representation of a GNU hash table to find symbols in the symbol table - useful for super-stripped binaries without section headers where only the start of the symbol table is known from the dynamic segment. The layout and contents are nicely described at https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/. + + The symboltable argument needs to implement a get_symbol() method - + in a regular ELF file, this will be the linked symbol table section + as indicated by the sh_link attribute. For super-stripped binaries, + one should use the DynamicSegment object as the symboltable as it + supports symbol lookup without access to a symbol table section. """ - def __init__(self, stream, offset, elffile): - self._stream = stream - self._offset = offset - self._elffile = elffile - self.params = struct_parse(self._elffile.structs.Gnu_Hash, - self._stream, - self._offset) + def __init__(self, elffile, start_offset, symboltable): + self.elffile = elffile + self._symboltable = symboltable + self.params = struct_parse(self.elffile.structs.Gnu_Hash, + self.elffile.stream, + start_offset) + # Element sizes in the hash table + self._wordsize = self.elffile.structs.Elf_word('').sizeof() + self._xwordsize = self.elffile.structs.Elf_xword('').sizeof() + self._chain_pos = start_offset + 4 * self._wordsize + \ + self.params['bloom_size'] * self._xwordsize + \ + self.params['nbuckets'] * self._wordsize def get_number_of_symbols(self): """ Get the number of symbols in the hash table by finding the bucket with the highest symbol index and walking to the end of its chain. """ - # Element sizes in the hash table - wordsize = self._elffile.structs.Elf_word('').sizeof() - xwordsize = self._elffile.structs.Elf_xword('').sizeof() - # Find highest index in buckets array max_idx = max(self.params['buckets']) if max_idx < self.params['symoffset']: return self.params['symoffset'] # Position the stream at the start of the corresponding chain - chain_pos = self._offset + 4 * wordsize + \ - self.params['bloom_size'] * xwordsize + \ - self.params['nbuckets'] * wordsize + \ - (max_idx - self.params['symoffset']) * wordsize + max_chain_pos = self._chain_pos + \ + (max_idx - self.params['symoffset']) * self._wordsize + self.elffile.stream.seek(max_chain_pos) # Walk the chain to its end (lowest bit is set) while True: - cur_hash = struct_parse(self._elffile.structs.Elf_word('elem'), - self._stream, - chain_pos) + cur_hash = struct.unpack('I', self.elffile.stream.read(self._wordsize))[0] if cur_hash & 1: return max_idx + 1 max_idx += 1 - chain_pos += wordsize + + def _matches_bloom(self, H1): + """ Helper function to check if the given hash could be in the hash + table by testing it against the bloom filter. + """ + arch_bits = self.elffile.elfclass + H2 = H1 >> self.params['bloom_shift'] + word_idx = int(H1 / arch_bits) % self.params['bloom_size'] + BITMASK = (1 << (H1 % arch_bits)) | (1 << (H2 % arch_bits)) + return (self.params['bloom'][word_idx] & BITMASK) == BITMASK + + def get_symbol(self, name): + """ Look up a symbol from this hash table with the given name. + """ + namehash = self.gnu_hash(name) + if not self._matches_bloom(namehash): + return None + + symidx = self.params['buckets'][namehash % self.params['nbuckets']] + if symidx < self.params['symoffset']: + return None + + self.elffile.stream.seek(self._chain_pos + (symidx - self.params['symoffset']) * self._wordsize) + while True: + cur_hash = struct.unpack('I', self.elffile.stream.read(self._wordsize))[0] + if cur_hash | 1 == namehash | 1: + symbol = self._symboltable.get_symbol(symidx) + if name == symbol.name: + return symbol + + if cur_hash & 1: + break + symidx += 1 + return None + + @staticmethod + def gnu_hash(key): + """ Compute the GNU-style hash value for a given symbol name. + """ + if not isinstance(key, bytes): + key = key.encode('utf-8') + h = 5381 + for c in bytearray(key): + h = h * 33 + c + return h & 0xFFFFFFFF + + +class GNUHashSection(Section, GNUHashTable): + """ Section representation of a GNU hash table. In regular ELF files, this + allows us to use the common functions defined on Section objects when + dealing with the hash table. + """ + def __init__(self, header, name, elffile, symboltable): + Section.__init__(self, header, name, elffile) + GNUHashTable.__init__(self, elffile, self['sh_offset'], symboltable) diff --git a/test/test_hash.py b/test/test_hash.py index 9fab30e..7832a76 100644 --- a/test/test_hash.py +++ b/test/test_hash.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- #------------------------------------------------------------------------------- # elftools tests # @@ -8,28 +9,73 @@ import unittest import os from elftools.elf.elffile import ELFFile -from elftools.common.exceptions import ELFError -from elftools.elf.hash import HashSection, GNUHashSection +from elftools.elf.hash import ELFHashTable, GNUHashTable class TestELFHash(unittest.TestCase): + """ Tests for the ELF hash table. + """ + + def test_elf_hash(self): + """ Verify correctness of ELF hashing function. The expected values + were computed with the C implementation from the glibc source code. + """ + self.assertEqual(ELFHashTable.elf_hash(''), 0x00000000) + self.assertEqual(ELFHashTable.elf_hash('main'), 0x000737fe) + self.assertEqual(ELFHashTable.elf_hash('printf'), 0x077905a6) + self.assertEqual(ELFHashTable.elf_hash('exit'), 0x0006cf04) + self.assertEqual(ELFHashTable.elf_hash(u'ïó®123'), 0x0efddae3) + self.assertEqual(ELFHashTable.elf_hash(b'\xe4\xbd\xa0\xe5\xa5\xbd'), + 0x0f07f00d) + def test_get_number_of_syms(self): """ Verify we can get get the number of symbols from an ELF hash section. """ - with open(os.path.join('test', 'testfiles_for_unittests', 'aarch64_super_stripped.elf'), 'rb') as f: elf = ELFFile(f) + dynamic_segment = None for segment in elf.iter_segments(): - if segment.header.p_type != 'PT_DYNAMIC': - continue + if segment.header.p_type == 'PT_DYNAMIC': + dynamic_segment = segment + break + + _, hash_offset = dynamic_segment.get_table_offset('DT_HASH') - _, hash_offset = segment.get_table_offset('DT_HASH') - hash_section = HashSection(elf.stream, hash_offset, elf) + hash_section = ELFHashTable(elf, hash_offset, dynamic_segment) + self.assertIsNotNone(hash_section) self.assertEqual(hash_section.get_number_of_symbols(), 4) + def test_get_symbol(self): + """ Verify we can get a specific symbol from an ELF hash section. + """ + path = os.path.join('test', 'testfiles_for_unittests', + 'simple_mipsel.elf') + with open(path, 'rb') as f: + elf = ELFFile(f) + hash_section = elf.get_section_by_name('.hash') + self.assertIsNotNone(hash_section) + symbol_main = hash_section.get_symbol('main') + self.assertIsNotNone(symbol_main) + self.assertEqual(symbol_main['st_value'], int(0x400790)) + class TestGNUHash(unittest.TestCase): + """ Tests for the GNU hash table. + """ + + def test_gnu_hash(self): + """ Verify correctness of GNU hashing function. The expected values + were computed with the C implementation from the glibc source code. + """ + self.assertEqual(GNUHashTable.gnu_hash(''), 0x00001505) + self.assertEqual(GNUHashTable.gnu_hash('main'), 0x7c9a7f6a) + self.assertEqual(GNUHashTable.gnu_hash('printf'), 0x156b2bb8) + self.assertEqual(GNUHashTable.gnu_hash('exit'), 0x7c967e3f) + self.assertEqual(GNUHashTable.gnu_hash(u'ïó®123'), 0x8025a693) + self.assertEqual(GNUHashTable.gnu_hash(b'\xe4\xbd\xa0\xe5\xa5\xbd'), + 0x296eec2d) + def test_get_number_of_syms(self): """ Verify we can get get the number of symbols from a GNU hash section. @@ -38,10 +84,18 @@ class TestGNUHash(unittest.TestCase): with open(os.path.join('test', 'testfiles_for_unittests', 'lib_versioned64.so.1.elf'), 'rb') as f: elf = ELFFile(f) - for segment in elf.iter_segments(): - if segment.header.p_type != 'PT_DYNAMIC': - continue - - _, hash_offset = segment.get_table_offset('DT_GNU_HASH') - hash_section = GNUHashSection(elf.stream, hash_offset, elf) + hash_section = elf.get_section_by_name('.gnu.hash') + self.assertIsNotNone(hash_section) self.assertEqual(hash_section.get_number_of_symbols(), 24) + + def test_get_symbol(self): + """ Verify we can get a specific symbol from a GNU hash section. + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'lib_versioned64.so.1.elf'), 'rb') as f: + elf = ELFFile(f) + hash_section = elf.get_section_by_name('.gnu.hash') + self.assertIsNotNone(hash_section) + symbol_f1 = hash_section.get_symbol('function1_ver1_1') + self.assertIsNotNone(symbol_f1) + self.assertEqual(symbol_f1['st_value'], int(0x9a2)) -- 2.30.2