From: Nam T. Nguyen Date: Fri, 25 Jul 2014 18:41:53 +0000 (-0700) Subject: Support parsing symbol table in dynamic segment. X-Git-Tag: v0.23~5^2 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=687baa0cd6be158a32756fc7dd475e0e7f98f4d2;p=pyelftools.git Support parsing symbol table in dynamic segment. Unlike SymbolTableSection, this patch reads from DT_SYMTAB, and DT_STRTAB in PT_DYNAMIC segment. It heuristically determines the end of DT_SYMTAB by finding the next higher pointer in the same segment. GNU libc (dl-addr.c) assumes that DT_STRTAB comes after DT_SYMTAB. --- diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 9f985c2..0b3e3e9 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -8,7 +8,7 @@ #------------------------------------------------------------------------------- import itertools -from .sections import Section +from .sections import Section, Symbol from .segments import Segment from ..common.exceptions import ELFError from ..common.utils import struct_parse, parse_cstring_from_stream @@ -80,6 +80,22 @@ class Dynamic(object): # Do not access this directly yourself; use _get_stringtable() instead. self._stringtable = stringtable + def get_table_offset(self, tag_name): + """ Return the virtual address and file offset of a dynamic table. + """ + ptr = None + for tag in self._iter_tags(type=tag_name): + ptr = tag['d_ptr'] + break + + # If we found a virtual address, locate the offset in the file + # by using the program headers. + offset = None + if ptr: + offset = next(self._elffile.address_offsets(ptr), None) + + return ptr, offset + def _get_stringtable(self): """ Return a string table for looking up dynamic tag related strings. @@ -92,17 +108,10 @@ class Dynamic(object): # If the ELF has stripped its section table (which is unusual, but # perfectly valid), we need to use the dynamic tags to locate the # dynamic string table. - strtab = None - for tag in self._iter_tags(type='DT_STRTAB'): - strtab = tag['d_val'] - break - # If we found a dynamic string table, locate the offset in the file - # by using the program headers. - if strtab: - table_offset = next(self._elffile.address_offsets(strtab), None) - if table_offset is not None: - self._stringtable = _DynamicStringTable(self._stream, table_offset) - return self._stringtable + _, table_offset = self.get_table_offset('DT_STRTAB') + if table_offset is not None: + self._stringtable = _DynamicStringTable(self._stream, table_offset) + return self._stringtable # That didn't work for some reason. Let's use the section header # even though this ELF is super weird. @@ -179,3 +188,47 @@ class DynamicSegment(Segment, Dynamic): break Segment.__init__(self, header, stream) Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset']) + + def iter_symbols(self): + """ Yield all symbols in this dynamic segment. The symbols are usually + the same as returned by SymbolTableSection.iter_symbols. However, + in stripped binaries, SymbolTableSection might have been removed. + This method reads from the mandatory dynamic tag DT_SYMTAB. + """ + tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB') + if tab_ptr is None or tab_offset is None: + raise ELFError('Segment does not contain DT_SYMTAB.') + + symbol_size = self._elfstructs.Elf_Sym.sizeof() + + # Find closest higher pointer than tab_ptr. We'll use that to mark the + # end of the symbol table. + nearest_ptr = None + for tag in self.iter_tags(): + tag_ptr = tag['d_ptr'] + if tag['d_tag'] == 'DT_SYMENT': + if symbol_size != tag['d_val']: + # DT_SYMENT is the size of one symbol entry. It must be the + # same as returned by Elf_Sym.sizeof. + raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' % + (tag['d_val'], symbol_size)) + if (tag_ptr > tab_ptr and + (nearest_ptr is None or nearest_ptr > tag_ptr)): + nearest_ptr = tag_ptr + + if nearest_ptr is None: + # Use the end of segment that contains DT_SYMTAB. + for segment in self._elffile.iter_segments(): + if (segment['p_vaddr'] <= tab_ptr and + tab_ptr <= (segment['p_vaddr'] + segment['p_filesz'])): + nearest_ptr = segment['p_vaddr'] + segment['p_filesz'] + + if nearest_ptr is None: + raise ELFError('Cannot determine the end of DT_SYMTAB.') + + string_table = self._get_stringtable() + for i in range((nearest_ptr - tab_ptr) // symbol_size): + symbol = struct_parse(self._elfstructs.Elf_Sym, self._stream, + i * symbol_size + tab_offset) + symbol_name = string_table.get_string(symbol['st_name']) + yield Symbol(symbol, symbol_name) diff --git a/test/test_dynamic.py b/test/test_dynamic.py index f25feba..8dc6c28 100644 --- a/test/test_dynamic.py +++ b/test/test_dynamic.py @@ -46,6 +46,20 @@ class TestDynamic(unittest.TestCase): exp = ['libc.so.6'] self.assertEqual(libs, exp) + def test_reading_symbols(self): + """Verify we can read symbol table without SymbolTableSection""" + with open(os.path.join('test', 'testfiles_for_unittests', + 'aarch64_super_stripped.elf'), 'rb') as f: + elf = ELFFile(f) + for segment in elf.iter_segments(): + if segment.header.p_type != 'PT_DYNAMIC': + continue + + symbol_names = [x.name for x in segment.iter_symbols()] + + exp = [b'', b'__libc_start_main', b'__gmon_start__', b'abort'] + self.assertEqual(symbol_names, exp) + if __name__ == '__main__': unittest.main()