From ff8331af0dc511c0186797ee7b62b7db78ac45d3 Mon Sep 17 00:00:00 2001 From: Fish Date: Tue, 7 Jul 2020 06:07:12 -0700 Subject: [PATCH] dwarf.CallFrameInfo: Support parsing LSDA pointers from FDEs. (#308) * dwarf.CallFrameInfo: Support parsing LSDA pointers from FDEs. * Add a test case. * Make 0 explicit. More doc-string. --- elftools/dwarf/callframe.py | 54 +++++++++++++++++++++++++++++++++---- test/test_callframe.py | 52 +++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 5 deletions(-) diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index d47a0a1..0bb0b39 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -141,6 +141,14 @@ class CallFrameInfo(object): else: cie = self._parse_cie_for_fde(offset, header, entry_structs) aug_bytes = self._read_augmentation_data(entry_structs) + lsda_encoding = cie.augmentation_dict.get('LSDA_encoding', DW_EH_encoding_flags['DW_EH_PE_omit']) + if lsda_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: + # parse LSDA pointer + lsda_pointer = self._parse_lsda_pointer(entry_structs, + self.stream.tell() - len(aug_bytes), + lsda_encoding) + else: + lsda_pointer = None # For convenience, compute the end offset for this entry end_offset = ( @@ -163,8 +171,10 @@ class CallFrameInfo(object): cie = self._parse_cie_for_fde(offset, header, entry_structs) self._entry_cache[offset] = FDE( header=header, instructions=instructions, offset=offset, + structs=entry_structs, cie=cie, augmentation_bytes=aug_bytes, - structs=entry_structs, cie=cie) + lsda_pointer=lsda_pointer, + ) return self._entry_cache[offset] def _parse_instructions(self, structs, offset, end_offset): @@ -323,6 +333,37 @@ class CallFrameInfo(object): self.stream)['length'] return self.stream.read(augmentation_data_length) + def _parse_lsda_pointer(self, structs, stream_offset, encoding): + """ Parse bytes to get an LSDA pointer. + + The basic encoding (lower four bits of the encoding) describes how the values are encoded in a CIE or an FDE. + The modifier (upper four bits of the encoding) describes how the raw values, after decoded using a basic + encoding, should be modified before using. + + Ref: https://www.airs.com/blog/archives/460 + """ + assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit'] + basic_encoding = encoding & 0x0f + modifier = encoding & 0xf0 + + formats = self._eh_encoding_to_field(structs) + + ptr = struct_parse( + Struct('Augmentation_Data', + formats[basic_encoding]('LSDA_pointer')), + self.stream, stream_pos=stream_offset)['LSDA_pointer'] + + if modifier == DW_EH_encoding_flags['DW_EH_PE_absptr']: + pass + + elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']: + ptr += self.address + stream_offset + + else: + assert False, 'Unsupported encoding modifier for LSDA pointer: {:#x}'.format(modifier) + + return ptr + def _parse_fde_header(self, entry_structs, offset): """ Compute a struct to parse the header of the current FDE. """ @@ -369,7 +410,8 @@ class CallFrameInfo(object): return result - def _eh_encoding_to_field(self, entry_structs): + @staticmethod + def _eh_encoding_to_field(entry_structs): """ Return a mapping from basic encodings (DW_EH_encoding_flags) the corresponding field constructors (for instance @@ -436,14 +478,14 @@ class CFIEntry(object): http://www.airs.com/blog/archives/460. """ def __init__(self, header, structs, instructions, offset, - augmentation_dict={}, augmentation_bytes=b'', cie=None): + augmentation_dict=None, augmentation_bytes=b'', cie=None): self.header = header self.structs = structs self.instructions = instructions self.offset = offset self.cie = cie self._decoded_table = None - self.augmentation_dict = augmentation_dict + self.augmentation_dict = augmentation_dict if augmentation_dict else {} self.augmentation_bytes = augmentation_bytes def get_decoded(self): @@ -593,7 +635,9 @@ class CIE(CFIEntry): class FDE(CFIEntry): - pass + def __init__(self, header, structs, instructions, offset, augmentation_bytes=None, cie=None, lsda_pointer=None): + super().__init__(header, structs, instructions, offset, augmentation_bytes=augmentation_bytes, cie=cie) + self.lsda_pointer = lsda_pointer class ZERO(object): diff --git a/test/test_callframe.py b/test/test_callframe.py index 0fb9dde..fc434f9 100644 --- a/test/test_callframe.py +++ b/test/test_callframe.py @@ -13,6 +13,7 @@ from elftools.dwarf.callframe import ( from elftools.dwarf.structs import DWARFStructs from elftools.dwarf.descriptions import (describe_CFI_instructions, set_global_machine_arch) +from elftools.dwarf.enums import DW_EH_encoding_flags from elftools.elf.elffile import ELFFile from os.path import join @@ -85,6 +86,7 @@ class TestCallFrame(unittest.TestCase): self.assertEqual(entries[1]['length'], 40) self.assertEqual(entries[1]['CIE_pointer'], 0) self.assertEqual(entries[1]['address_range'], 84) + self.assertIsNone(entries[1].lsda_pointer) self.assertIs(entries[1].cie, entries[0]) self.assertEqual(len(entries[1].instructions), 21) self.assertInstruction(entries[1].instructions[0], @@ -167,5 +169,55 @@ class TestCallFrame(unittest.TestCase): self.assertEqual(oracle_decoded.table[0]['cfa'].offset, decoded.table[0]['cfa'].offset) + def test_ehframe_fde_with_lsda_pointer(self): + # CIE and FDE dumped from exceptions_0, offset 0xcc0 + # binary is at https://github.com/angr/binaries/blob/master/tests/x86_64/exceptions_0 + data = (b'' + + # CIE + b'\x1c\x00\x00\x00' + # length + b'\x00\x00\x00\x00' + # ID + b'\x01' + # version + b'\x7a\x50\x4c\x52\x00' + # augmentation string + b'\x01' + # code alignment + b'\x78' + # data alignment + b'\x10' + # return address register + b'\x07' + # augmentation data length + b'\x9b' + # personality function pointer encoding + b'\x3d\x13\x20\x00' + # personality function pointer + b'\x1b' + # LSDA pointer encoding + b'\x1b' + # FDE encoding + b'\x0c\x07\x08\x90' + # initial instructions + b'\x01\x00\x00' + + # FDE + b'\x24\x00\x00\x00' + # length + b'\x24\x00\x00\x00' + # CIE reference pointer + b'\x62\xfd\xff\xff' + # pc begin + b'\x89\x00\x00\x00' + # pc range + b'\x04' + # augmentation data length + b'\xb7\x00\x00\x00' + # LSDA pointer + b'\x41\x0e\x10\x86' + # initial instructions + b'\x02\x43\x0d\x06' + + b'\x45\x83\x03\x02' + + b'\x7f\x0c\x07\x08' + + b'\x00\x00\x00' + ) + s = BytesIO(data) + + structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=8) + cfi = CallFrameInfo(s, len(data), 0, structs, for_eh_frame=True) + entries = cfi.get_entries() + + self.assertEqual(len(entries), 2) + self.assertIsInstance(entries[0], CIE) + self.assertIn('LSDA_encoding', entries[0].augmentation_dict) + # check LSDA encoding + lsda_encoding = entries[0].augmentation_dict['LSDA_encoding'] + basic_encoding = lsda_encoding & 0x0f + modifier = lsda_encoding & 0xf0 + self.assertEqual(basic_encoding, DW_EH_encoding_flags['DW_EH_PE_sdata4']) + self.assertEqual(modifier, DW_EH_encoding_flags['DW_EH_PE_pcrel']) + self.assertIsInstance(entries[1], FDE) + self.assertEqual(entries[1].lsda_pointer, 232) + if __name__ == '__main__': unittest.main() -- 2.30.2