callframe.py: fix DW_EH_PE_absptr decoding (#295)
authorPierre-Marie de Rodat <pmderodat@kawie.fr>
Tue, 10 Mar 2020 13:12:11 +0000 (14:12 +0100)
committerGitHub <noreply@github.com>
Tue, 10 Mar 2020 13:12:11 +0000 (06:12 -0700)
* Handle type2/type3 relocation fields for ELF64 MIPS binaries

* dwarf/callframe.py: fix field read using the DW_EH_PE_absptr encoding

This encoding represents target addresses, so it is the virtual address
space determines its size, not the DWARF format.

Fixes #288

elftools/dwarf/callframe.py
elftools/elf/structs.py
scripts/readelf.py
test/testfiles_for_readelf/angr-eh_frame.elf [new file with mode: 0644]

index 46116ca1b2d11ad2dd0127434b959af1e1c5e1d5..101c2d6d40e4ebf25b5934dd9ae5df6a6e21883a 100644 (file)
@@ -375,9 +375,7 @@ class CallFrameInfo(object):
         """
         return {
             DW_EH_encoding_flags['DW_EH_PE_absptr']:
-                entry_structs.Dwarf_uint32
-                if entry_structs.dwarf_format == 32 else
-                entry_structs.Dwarf_uint64,
+                entry_structs.Dwarf_target_addr,
             DW_EH_encoding_flags['DW_EH_PE_uleb128']:
                 entry_structs.Dwarf_uleb128,
             DW_EH_encoding_flags['DW_EH_PE_udata2']:
index 67ff1f52a8f191c5485887ebd91fb680785d7096..6fc05da4bcb767c8a650a84ae56397e6b4152fc2 100644 (file)
@@ -215,31 +215,61 @@ class ELFStructs(object):
         self.Elf_Chdr = Struct('Elf_Chdr', *fields)
 
     def _create_rel(self):
-        # r_info is also taken apart into r_info_sym and r_info_type.
-        # This is done in Value to avoid endianity issues while parsing.
+        r_info = self.Elf_xword('r_info')
+
+        # r_info is also taken apart into r_info_sym and r_info_type, plus
+        # r_info_type2 and r_info_type3 on ELF64 MIPS. This is done in Value
+        # to avoid endianity issues while parsing.
         if self.elfclass == 32:
-            r_info_sym = Value('r_info_sym',
-                lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF)
-            r_info_type = Value('r_info_type',
-                lambda ctx: ctx['r_info'] & 0xFF)
-        else: # 64
-            r_info_sym = Value('r_info_sym',
-                lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF)
-            r_info_type = Value('r_info_type',
-                lambda ctx: ctx['r_info'] & 0xFFFFFFFF)
+            fields = [Value('r_info_sym',
+                            lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF),
+                      Value('r_info_type',
+                            lambda ctx: ctx['r_info'] & 0xFF)]
+        elif self.e_machine == 'EM_MIPS': # ELF64 MIPS
+            # The r_info field in MIPS ELF64 binaries (called r_raw_info, here)
+            # isn't a 64-bit field, but rather two 32-bit fields (the symbol
+            # index, then three bytes for relocation types). See the
+            # specification:
+            # <https://www.linux-mips.org/pub/linux/mips/doc/ABI/elf64-2.4.pdf>
+            # Note that the specification describes the fields more directly,
+            # but here we stick to the general "r_info" field to be compatible
+            # with other architectures and simplify testing.
+
+            def compute_r_info(ctx):
+                raw = ctx['r_raw_info']
+                return (((raw & 0xffffffff) << 32)
+                        | ((raw >> 56) & 0xff)
+                        | ((raw >> 40) & 0xff00)
+                        | ((raw >> 24) & 0xff0000)
+                        | ((raw >> 8) & 0xff000000))
+
+            r_info = self.Elf_xword('r_raw_info')
+            fields = [
+                Value('r_info', compute_r_info),
+                Value('r_info_sym',
+                      lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF),
+                Value('r_info_type3',
+                      lambda ctx: (ctx['r_info'] >> 16) & 0xFF),
+                Value('r_info_type2',
+                      lambda ctx: (ctx['r_info'] >> 8) & 0xFF),
+                Value('r_info_type',
+                      lambda ctx: ctx['r_info'] & 0xFF)
+            ]
+        else: # Other 64 ELFs
+            fields = [Value('r_info_sym',
+                            lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF),
+                      Value('r_info_type',
+                            lambda ctx: ctx['r_info'] & 0xFFFFFFFF)]
 
         self.Elf_Rel = Struct('Elf_Rel',
             self.Elf_addr('r_offset'),
-            self.Elf_xword('r_info'),
-            r_info_sym,
-            r_info_type,
+            r_info,
+            *fields
         )
         self.Elf_Rela = Struct('Elf_Rela',
             self.Elf_addr('r_offset'),
-            self.Elf_xword('r_info'),
-            r_info_sym,
-            r_info_type,
-            self.Elf_sxword('r_addend'),
+            r_info,
+            *(fields + [self.Elf_sxword('r_addend')])
         )
 
     def _create_dyn(self):
index 2ff229a7c193c97dbf035d02687dcc288177e0e4..46955db8ca43a85ef87c8d563c56d6dec510b436 100755 (executable)
@@ -517,35 +517,47 @@ class ReadElf(object):
                         addend = self._format_hex(rel['r_addend'], lead0x=False)
                         self._emit(' %s   %s' % (' ' * fieldsize, addend))
                     self._emitline()
-                    continue
 
-                symbol = symtable.get_symbol(rel['r_info_sym'])
-                # Some symbols have zero 'st_name', so instead what's used is
-                # the name of the section they point at. Truncate symbol names
-                # (excluding version info) to 22 chars, similarly to readelf.
-                if symbol['st_name'] == 0:
-                    symsec = self.elffile.get_section(symbol['st_shndx'])
-                    symbol_name = symsec.name
-                    version = ''
                 else:
-                    symbol_name = symbol.name
-                    version = self._symbol_version(rel['r_info_sym'])
-                    version = (version['name']
-                               if version and version['name'] else '')
-                symbol_name = '%.22s' % symbol_name
-                if version:
-                    symbol_name += '@' + version
-
-                self._emit(' %s %s' % (
-                    self._format_hex(
-                        symbol['st_value'],
-                        fullhex=True, lead0x=False),
-                    symbol_name))
-                if section.is_RELA():
-                    self._emit(' %s %x' % (
-                        '+' if rel['r_addend'] >= 0 else '-',
-                        abs(rel['r_addend'])))
-                self._emitline()
+                    symbol = symtable.get_symbol(rel['r_info_sym'])
+                    # Some symbols have zero 'st_name', so instead what's used
+                    # is the name of the section they point at. Truncate symbol
+                    # names (excluding version info) to 22 chars, similarly to
+                    # readelf.
+                    if symbol['st_name'] == 0:
+                        symsec = self.elffile.get_section(symbol['st_shndx'])
+                        symbol_name = symsec.name
+                        version = ''
+                    else:
+                        symbol_name = symbol.name
+                        version = self._symbol_version(rel['r_info_sym'])
+                        version = (version['name']
+                                   if version and version['name'] else '')
+                    symbol_name = '%.22s' % symbol_name
+                    if version:
+                        symbol_name += '@' + version
+
+                    self._emit(' %s %s' % (
+                        self._format_hex(
+                            symbol['st_value'],
+                            fullhex=True, lead0x=False),
+                        symbol_name))
+                    if section.is_RELA():
+                        self._emit(' %s %x' % (
+                            '+' if rel['r_addend'] >= 0 else '-',
+                            abs(rel['r_addend'])))
+                    self._emitline()
+
+                # Emit the two additional relocation types for ELF64 MIPS
+                # binaries.
+                if (self.elffile.elfclass == 64 and
+                    self.elffile['e_machine'] == 'EM_MIPS'):
+                    for i in (2, 3):
+                        rtype = rel['r_info_type%s' % i]
+                        self._emit('                    Type%s: %s' % (
+                                   i,
+                                   describe_reloc_type(rtype, self.elffile)))
+                        self._emitline()
 
         if not has_relocation_sections:
             self._emitline('\nThere are no relocations in this file.')
diff --git a/test/testfiles_for_readelf/angr-eh_frame.elf b/test/testfiles_for_readelf/angr-eh_frame.elf
new file mode 100644 (file)
index 0000000..f6514ad
Binary files /dev/null and b/test/testfiles_for_readelf/angr-eh_frame.elf differ