hash.py: observe endianness when reading hashes (#338)
authorAndreas Ziegler <andreas.ziegler@fau.de>
Mon, 26 Oct 2020 13:07:42 +0000 (14:07 +0100)
committerGitHub <noreply@github.com>
Mon, 26 Oct 2020 13:07:42 +0000 (06:07 -0700)
Reading the hashes from a GNUHashTable didn't properly use
the endianness of the underlying ELF file, so looking up
hashes would fail if the byte order of the analyzed file
did not match the native byte order of the current machine.

The test file consists of two functions:

int callee(){
    return 42;
}

int caller(){
    return callee();
}

and was compiled using `aarch64_be-linux-gcc` (version 8.3
on an x86_64 host) with the `-mbig-endian` and `-shared`
command line flags.

elftools/elf/hash.py
test/test_hash.py
test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf [new file with mode: 0755]

index 2e8a6fed41d0afde64692db9c54f001de44b9c1a..c8d1e17e7e3a133b03ee52b99b13619f3bf95f7c 100644 (file)
@@ -119,10 +119,11 @@ class GNUHashTable(object):
         max_chain_pos = self._chain_pos + \
             (max_idx - self.params['symoffset']) * self._wordsize
         self.elffile.stream.seek(max_chain_pos)
+        hash_format = '<I' if self.elffile.little_endian else '>I'
 
         # Walk the chain to its end (lowest bit is set)
         while True:
-            cur_hash = struct.unpack('I', self.elffile.stream.read(self._wordsize))[0]
+            cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
             if cur_hash & 1:
                 return max_idx + 1
 
@@ -150,8 +151,9 @@ class GNUHashTable(object):
             return None
 
         self.elffile.stream.seek(self._chain_pos + (symidx - self.params['symoffset']) * self._wordsize)
+        hash_format = '<I' if self.elffile.little_endian else '>I'
         while True:
-            cur_hash = struct.unpack('I', self.elffile.stream.read(self._wordsize))[0]
+            cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
             if cur_hash | 1 == namehash | 1:
                 symbol = self._symboltable.get_symbol(symidx)
                 if name == symbol.name:
index 7832a7698fffce17c8c40420a7233cfef4c4a67c..2c2ffec402a3cd514f039494c451210d55c2468b 100644 (file)
@@ -99,3 +99,17 @@ class TestGNUHash(unittest.TestCase):
             symbol_f1 = hash_section.get_symbol('function1_ver1_1')
             self.assertIsNotNone(symbol_f1)
             self.assertEqual(symbol_f1['st_value'], int(0x9a2))
+
+    def test_get_symbol_big_endian(self):
+        """ Verify we can get a specific symbol from a GNU hash section in a
+            big-endian file.
+        """
+        with open(os.path.join('test', 'testfiles_for_unittests',
+                               'aarch64_be_gnu_hash.so.elf'), 'rb') as f:
+            elf = ELFFile(f)
+            self.assertFalse(elf.little_endian)
+            hash_section = elf.get_section_by_name('.gnu.hash')
+            self.assertIsNotNone(hash_section)
+            symbol_f1 = hash_section.get_symbol('caller')
+            self.assertIsNotNone(symbol_f1)
+            self.assertEqual(symbol_f1['st_value'], int(0x5a4))
diff --git a/test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf b/test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf
new file mode 100755 (executable)
index 0000000..ed36844
Binary files /dev/null and b/test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf differ