From: Andreas Ziegler Date: Tue, 12 Jan 2021 15:03:47 +0000 (+0100) Subject: dynamic.py: move logic around to allow symbol access more easily (#346) X-Git-Tag: v0.28~22 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=14bc1632fe36c902449e13e9dc11797b948f45d5;p=pyelftools.git dynamic.py: move logic around to allow symbol access more easily (#346) So far, the implementation of num_symbols() and get_symbol() in the DynamicSegment class depended on iter_symbols(). However, most part of iter_symbols() is actually about determining the number of symbols. Let's move that logic to the correct method and use it in iter_symbols(). Additionally, in an ELF file without any exported symbols, the hash table will be empty and will thus return a too low number of symbols. However, a loader might still need to access the imported symbols (which also have an entry in the symbol table, with st_shndx set to SHN_UNDEF). To allow this, make get_symbol() take any index and simply read the symbol data from the corresponding index, and use get_symbol() from iter_symbols(). This way, one can for example use symbol index information from relocation entries to directly access the symbol data. These changes also make the logic in DynamicSegment resemble the code in SymbolTableSection more closely. Fixes: #342 --- diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index f03c6b3..2f85333 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -252,80 +252,46 @@ class DynamicSegment(Segment, Dynamic): Segment.__init__(self, header, stream) Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'], self['p_filesz'] == 0) - self._symbol_list = None + self._symbol_size = self.elfstructs.Elf_Sym.sizeof() + self._num_symbols = None self._symbol_name_map = None def num_symbols(self): """ Number of symbols in the table recovered from DT_SYMTAB """ - if self._symbol_list is None: - self._symbol_list = list(self.iter_symbols()) - return len(self._symbol_list) - - def get_symbol(self, index): - """ Get the symbol at index #index from the table (Symbol object) - """ - if self._symbol_list is None: - self._symbol_list = list(self.iter_symbols()) - return self._symbol_list[index] - - def get_symbol_by_name(self, name): - """ Get a symbol(s) by name. Return None if no symbol by the given name - exists. - """ - # The first time this method is called, construct a name to number - # mapping - # - if self._symbol_name_map is None: - self._symbol_name_map = defaultdict(list) - for i, sym in enumerate(self.iter_symbols()): - self._symbol_name_map[sym.name].append(i) - symnums = self._symbol_name_map.get(name) - return [self.get_symbol(i) for i in symnums] if symnums else None - - def iter_symbols(self): - """ Yield all symbols in this dynamic segment. The symbols are usually - the same as returned by SymbolTableSection.iter_symbols. However, - in stripped binaries, SymbolTableSection might have been removed. - This method reads from the mandatory dynamic tag DT_SYMTAB. - """ - tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB') - if tab_ptr is None or tab_offset is None: - raise ELFError('Segment does not contain DT_SYMTAB.') - - symbol_size = self.elfstructs.Elf_Sym.sizeof() - - end_ptr = None + if self._num_symbols is not None: + return self._num_symbols # Check if a DT_GNU_HASH tag exists and recover the number of symbols # from the corresponding hash table _, gnu_hash_offset = self.get_table_offset('DT_GNU_HASH') if gnu_hash_offset is not None: hash_section = GNUHashTable(self.elffile, gnu_hash_offset, self) - end_ptr = tab_ptr + \ - hash_section.get_number_of_symbols() * symbol_size + self._num_symbols = hash_section.get_number_of_symbols() # If DT_GNU_HASH did not exist, maybe we can use DT_HASH - if end_ptr is None: + if self._num_symbols is None: _, hash_offset = self.get_table_offset('DT_HASH') if hash_offset is not None: # Get the hash table from the DT_HASH offset hash_section = ELFHashTable(self.elffile, hash_offset, self) - end_ptr = tab_ptr + \ - hash_section.get_number_of_symbols() * symbol_size + self._num_symbols = hash_section.get_number_of_symbols() - if end_ptr is None: + if self._num_symbols is None: # Find closest higher pointer than tab_ptr. We'll use that to mark # the end of the symbol table. + tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB') + if tab_ptr is None or tab_offset is None: + raise ELFError('Segment does not contain DT_SYMTAB.') nearest_ptr = None for tag in self.iter_tags(): tag_ptr = tag['d_ptr'] if tag['d_tag'] == 'DT_SYMENT': - if symbol_size != tag['d_val']: + if self._symbol_size != tag['d_val']: # DT_SYMENT is the size of one symbol entry. It must be # the same as returned by Elf_Sym.sizeof. raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' % - (tag['d_val'], symbol_size)) + (tag['d_val'], self._symbol_size)) if (tag_ptr > tab_ptr and (nearest_ptr is None or nearest_ptr > tag_ptr)): nearest_ptr = tag_ptr @@ -338,13 +304,49 @@ class DynamicSegment(Segment, Dynamic): nearest_ptr = segment['p_vaddr'] + segment['p_filesz'] end_ptr = nearest_ptr + self._num_symbols = (end_ptr - tab_ptr) // self._symbol_size - if end_ptr is None: + if self._num_symbols is None: raise ELFError('Cannot determine the end of DT_SYMTAB.') + return self._num_symbols + + def get_symbol(self, index): + """ Get the symbol at index #index from the table (Symbol object) + """ + tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB') + if tab_ptr is None or tab_offset is None: + raise ELFError('Segment does not contain DT_SYMTAB.') + + symbol = struct_parse( + self.elfstructs.Elf_Sym, + self._stream, + stream_pos=tab_offset + index * self._symbol_size) + string_table = self._get_stringtable() - for i in range((end_ptr - tab_ptr) // symbol_size): - symbol = struct_parse(self.elfstructs.Elf_Sym, self._stream, - i * symbol_size + tab_offset) - symbol_name = string_table.get_string(symbol['st_name']) - yield Symbol(symbol, symbol_name) + symbol_name = string_table.get_string(symbol["st_name"]) + + return Symbol(symbol, symbol_name) + + def get_symbol_by_name(self, name): + """ Get a symbol(s) by name. Return None if no symbol by the given name + exists. + """ + # The first time this method is called, construct a name to number + # mapping + # + if self._symbol_name_map is None: + self._symbol_name_map = defaultdict(list) + for i, sym in enumerate(self.iter_symbols()): + self._symbol_name_map[sym.name].append(i) + symnums = self._symbol_name_map.get(name) + return [self.get_symbol(i) for i in symnums] if symnums else None + + def iter_symbols(self): + """ Yield all symbols in this dynamic segment. The symbols are usually + the same as returned by SymbolTableSection.iter_symbols. However, + in stripped binaries, SymbolTableSection might have been removed. + This method reads from the mandatory dynamic tag DT_SYMTAB. + """ + for i in range(self.num_symbols()): + yield(self.get_symbol(i)) diff --git a/test/test_dynamic.py b/test/test_dynamic.py index 1f48362..a310d8a 100644 --- a/test/test_dynamic.py +++ b/test/test_dynamic.py @@ -74,7 +74,6 @@ class TestDynamic(unittest.TestCase): self.assertEqual(symbol_names, exp) self.assertEqual(symbol_at_index_3.name, 'abort') self.assertIsNotNone(symbols_abort) - self.assertEqual(symbols_abort[0], symbol_at_index_3) def test_reading_symbols_gnu_hash(self): """ Verify we can read symbol table without SymbolTableSection but with @@ -98,7 +97,6 @@ class TestDynamic(unittest.TestCase): self.assertEqual(symbol_names[:9], exp) self.assertEqual(symbol_at_index_3.name, '__register_atfork') self.assertIsNotNone(symbols_atfork) - self.assertEqual(symbols_atfork[0], symbol_at_index_3) def test_sunw_tags(self): def extract_sunw(filename):