dwarf.CallFrameInfo: Support parsing LSDA pointers from FDEs. (#308)
authorFish <ltfish@users.noreply.github.com>
Tue, 7 Jul 2020 13:07:12 +0000 (06:07 -0700)
committerGitHub <noreply@github.com>
Tue, 7 Jul 2020 13:07:12 +0000 (06:07 -0700)
* dwarf.CallFrameInfo: Support parsing LSDA pointers from FDEs.

* Add a test case.

* Make 0 explicit. More doc-string.

elftools/dwarf/callframe.py
test/test_callframe.py

index d47a0a1f748043765c072c6cdb1dcd54f879d4e3..0bb0b39ce3800ad27e7b50ca88a2226f3dc05fa0 100644 (file)
@@ -141,6 +141,14 @@ class CallFrameInfo(object):
         else:
             cie = self._parse_cie_for_fde(offset, header, entry_structs)
             aug_bytes = self._read_augmentation_data(entry_structs)
+            lsda_encoding = cie.augmentation_dict.get('LSDA_encoding', DW_EH_encoding_flags['DW_EH_PE_omit'])
+            if lsda_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']:
+                # parse LSDA pointer
+                lsda_pointer = self._parse_lsda_pointer(entry_structs,
+                                                        self.stream.tell() - len(aug_bytes),
+                                                        lsda_encoding)
+            else:
+                lsda_pointer = None
 
         # For convenience, compute the end offset for this entry
         end_offset = (
@@ -163,8 +171,10 @@ class CallFrameInfo(object):
             cie = self._parse_cie_for_fde(offset, header, entry_structs)
             self._entry_cache[offset] = FDE(
                 header=header, instructions=instructions, offset=offset,
+                structs=entry_structs, cie=cie,
                 augmentation_bytes=aug_bytes,
-                structs=entry_structs, cie=cie)
+                lsda_pointer=lsda_pointer,
+            )
         return self._entry_cache[offset]
 
     def _parse_instructions(self, structs, offset, end_offset):
@@ -323,6 +333,37 @@ class CallFrameInfo(object):
             self.stream)['length']
         return self.stream.read(augmentation_data_length)
 
+    def _parse_lsda_pointer(self, structs, stream_offset, encoding):
+        """ Parse bytes to get an LSDA pointer.
+
+        The basic encoding (lower four bits of the encoding) describes how the values are encoded in a CIE or an FDE.
+        The modifier (upper four bits of the encoding) describes how the raw values, after decoded using a basic
+        encoding, should be modified before using.
+
+        Ref: https://www.airs.com/blog/archives/460
+        """
+        assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit']
+        basic_encoding = encoding & 0x0f
+        modifier = encoding & 0xf0
+
+        formats = self._eh_encoding_to_field(structs)
+
+        ptr = struct_parse(
+            Struct('Augmentation_Data',
+                   formats[basic_encoding]('LSDA_pointer')),
+            self.stream, stream_pos=stream_offset)['LSDA_pointer']
+
+        if modifier == DW_EH_encoding_flags['DW_EH_PE_absptr']:
+            pass
+
+        elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']:
+            ptr += self.address + stream_offset
+
+        else:
+            assert False, 'Unsupported encoding modifier for LSDA pointer: {:#x}'.format(modifier)
+
+        return ptr
+
     def _parse_fde_header(self, entry_structs, offset):
         """ Compute a struct to parse the header of the current FDE.
         """
@@ -369,7 +410,8 @@ class CallFrameInfo(object):
 
         return result
 
-    def _eh_encoding_to_field(self, entry_structs):
+    @staticmethod
+    def _eh_encoding_to_field(entry_structs):
         """
         Return a mapping from basic encodings (DW_EH_encoding_flags) the
         corresponding field constructors (for instance
@@ -436,14 +478,14 @@ class CFIEntry(object):
             http://www.airs.com/blog/archives/460.
     """
     def __init__(self, header, structs, instructions, offset,
-            augmentation_dict={}, augmentation_bytes=b'', cie=None):
+            augmentation_dict=None, augmentation_bytes=b'', cie=None):
         self.header = header
         self.structs = structs
         self.instructions = instructions
         self.offset = offset
         self.cie = cie
         self._decoded_table = None
-        self.augmentation_dict = augmentation_dict
+        self.augmentation_dict = augmentation_dict if augmentation_dict else {}
         self.augmentation_bytes = augmentation_bytes
 
     def get_decoded(self):
@@ -593,7 +635,9 @@ class CIE(CFIEntry):
 
 
 class FDE(CFIEntry):
-    pass
+    def __init__(self, header, structs, instructions, offset, augmentation_bytes=None, cie=None, lsda_pointer=None):
+        super().__init__(header, structs, instructions, offset, augmentation_bytes=augmentation_bytes, cie=cie)
+        self.lsda_pointer = lsda_pointer
 
 
 class ZERO(object):
index 0fb9dde75967156d7399950e5905951c259da92c..fc434f9bbabe53548d80c0e0d93df254fd7c0328 100644 (file)
@@ -13,6 +13,7 @@ from elftools.dwarf.callframe import (
 from elftools.dwarf.structs import DWARFStructs
 from elftools.dwarf.descriptions import (describe_CFI_instructions,
     set_global_machine_arch)
+from elftools.dwarf.enums import DW_EH_encoding_flags
 from elftools.elf.elffile import ELFFile
 from os.path import join
 
@@ -85,6 +86,7 @@ class TestCallFrame(unittest.TestCase):
         self.assertEqual(entries[1]['length'], 40)
         self.assertEqual(entries[1]['CIE_pointer'], 0)
         self.assertEqual(entries[1]['address_range'], 84)
+        self.assertIsNone(entries[1].lsda_pointer)
         self.assertIs(entries[1].cie, entries[0])
         self.assertEqual(len(entries[1].instructions), 21)
         self.assertInstruction(entries[1].instructions[0],
@@ -167,5 +169,55 @@ class TestCallFrame(unittest.TestCase):
             self.assertEqual(oracle_decoded.table[0]['cfa'].offset,
                 decoded.table[0]['cfa'].offset)
 
+    def test_ehframe_fde_with_lsda_pointer(self):
+        # CIE and FDE dumped from exceptions_0, offset 0xcc0
+        # binary is at https://github.com/angr/binaries/blob/master/tests/x86_64/exceptions_0
+        data = (b'' +
+            # CIE
+            b'\x1c\x00\x00\x00' +       # length
+            b'\x00\x00\x00\x00' +       # ID
+            b'\x01' +                   # version
+            b'\x7a\x50\x4c\x52\x00' +   # augmentation string
+            b'\x01' +                   # code alignment
+            b'\x78' +                   # data alignment
+            b'\x10' +                   # return address register
+            b'\x07' +                   # augmentation data length
+            b'\x9b' +                   # personality function pointer encoding
+            b'\x3d\x13\x20\x00' +       # personality function pointer
+            b'\x1b' +                   # LSDA pointer encoding
+            b'\x1b' +                   # FDE encoding
+            b'\x0c\x07\x08\x90' +       # initial instructions
+            b'\x01\x00\x00' +
+            # FDE
+            b'\x24\x00\x00\x00' +       # length
+            b'\x24\x00\x00\x00' +       # CIE reference pointer
+            b'\x62\xfd\xff\xff' +       # pc begin
+            b'\x89\x00\x00\x00' +       # pc range
+            b'\x04' +                   # augmentation data length
+            b'\xb7\x00\x00\x00' +       # LSDA pointer
+            b'\x41\x0e\x10\x86' +       # initial instructions
+            b'\x02\x43\x0d\x06' +
+            b'\x45\x83\x03\x02' +
+            b'\x7f\x0c\x07\x08' +
+            b'\x00\x00\x00'
+            )
+        s = BytesIO(data)
+
+        structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=8)
+        cfi = CallFrameInfo(s, len(data), 0, structs, for_eh_frame=True)
+        entries = cfi.get_entries()
+
+        self.assertEqual(len(entries), 2)
+        self.assertIsInstance(entries[0], CIE)
+        self.assertIn('LSDA_encoding', entries[0].augmentation_dict)
+        # check LSDA encoding
+        lsda_encoding = entries[0].augmentation_dict['LSDA_encoding']
+        basic_encoding = lsda_encoding & 0x0f
+        modifier = lsda_encoding & 0xf0
+        self.assertEqual(basic_encoding, DW_EH_encoding_flags['DW_EH_PE_sdata4'])
+        self.assertEqual(modifier, DW_EH_encoding_flags['DW_EH_PE_pcrel'])
+        self.assertIsInstance(entries[1], FDE)
+        self.assertEqual(entries[1].lsda_pointer, 232)
+
 if __name__ == '__main__':
     unittest.main()