From: LeadroyaL Date: Wed, 19 Aug 2020 16:35:12 +0000 (+0800) Subject: Add support for ARM exception handler ABI (#328) X-Git-Tag: v0.27~14 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ee0facee32ae5fc91709c93f9a57a9a7683a3315;p=pyelftools.git Add support for ARM exception handler ABI (#328) --- diff --git a/elftools/ehabi/__init__.py b/elftools/ehabi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/elftools/ehabi/constants.py b/elftools/ehabi/constants.py new file mode 100644 index 0000000..2921b97 --- /dev/null +++ b/elftools/ehabi/constants.py @@ -0,0 +1 @@ +EHABI_INDEX_ENTRY_SIZE = 8 diff --git a/elftools/ehabi/decoder.py b/elftools/ehabi/decoder.py new file mode 100644 index 0000000..ce20f65 --- /dev/null +++ b/elftools/ehabi/decoder.py @@ -0,0 +1,284 @@ +# ------------------------------------------------------------------------------- +# elftools: ehabi/decoder.py +# +# Decode ARM exception handler bytecode. +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- +from collections import namedtuple + + +class EHABIBytecodeDecoder(object): + """ Decoder of a sequence of ARM exception handler abi bytecode. + + Reference: + https://github.com/llvm/llvm-project/blob/master/llvm/tools/llvm-readobj/ARMEHABIPrinter.h + https://developer.arm.com/documentation/ihi0038/b/ + + Accessible attributes: + + mnemonic_array: + MnemonicItem array. + + Parameters: + + bytecode_array: + Integer array, raw data of bytecode. + + """ + + def __init__(self, bytecode_array): + self._bytecode_array = bytecode_array + self._index = None + self.mnemonic_array = None + self._decode() + + def _decode(self): + """ Decode bytecode array, put result into mnemonic_array. + """ + self._index = 0 + self.mnemonic_array = [] + while self._index < len(self._bytecode_array): + for mask, value, handler in self.ring: + if (self._bytecode_array[self._index] & mask) == value: + start_idx = self._index + mnemonic = handler(self) + end_idx = self._index + self.mnemonic_array.append( + MnemonicItem(self._bytecode_array[start_idx: end_idx], mnemonic)) + break + + def _decode_00xxxxxx(self): + # SW.startLine() << format("0x%02X ; vsp = vsp + %u\n", Opcode, + # ((Opcode & 0x3f) << 2) + 4); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'vsp = vsp + %u' % (((opcode & 0x3f) << 2) + 4) + + def _decode_01xxxxxx(self): + # SW.startLine() << format("0x%02X ; vsp = vsp - %u\n", Opcode, + # ((Opcode & 0x3f) << 2) + 4); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'vsp = vsp - %u' % (((opcode & 0x3f) << 2) + 4) + + gpr_register_names = ("r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc") + + def _calculate_range(self, start, count): + return ((1 << (count + 1)) - 1) << start + + def _printGPR(self, gpr_mask): + hits = [self.gpr_register_names[i] for i in range(32) if gpr_mask & (1 << i) != 0] + return '{%s}' % ', '.join(hits) + + def _print_registers(self, vfp_mask, prefix): + hits = [prefix + str(i) for i in range(32) if vfp_mask & (1 << i) != 0] + return '{%s}' % ', '.join(hits) + + def _decode_1000iiii_iiiiiiii(self): + op0 = self._bytecode_array[self._index] + self._index += 1 + op1 = self._bytecode_array[self._index] + self._index += 1 + # uint16_t GPRMask = (Opcode1 << 4) | ((Opcode0 & 0x0f) << 12); + # SW.startLine() + # << format("0x%02X 0x%02X ; %s", + # Opcode0, Opcode1, GPRMask ? "pop " : "refuse to unwind"); + # if (GPRMask) + # PrintGPR(GPRMask); + gpr_mask = (op1 << 4) | ((op0 & 0x0f) << 12) + if gpr_mask == 0: + return 'refuse to unwind' + else: + return 'pop %s' % self._printGPR(gpr_mask) + + def _decode_10011101(self): + self._index += 1 + return 'reserved (ARM MOVrr)' + + def _decode_10011111(self): + self._index += 1 + return 'reserved (WiMMX MOVrr)' + + def _decode_1001nnnn(self): + # SW.startLine() << format("0x%02X ; vsp = r%u\n", Opcode, (Opcode & 0x0f)); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'vsp = r%u' % (opcode & 0x0f) + + def _decode_10100nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4)); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07)) + + def _decode_10101nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4) | (1 << 14)); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07) | (1 << 14)) + + def _decode_10110000(self): + # SW.startLine() << format("0x%02X ; finish\n", Opcode); + self._index += 1 + return 'finish' + + def _decode_10110001_0000iiii(self): + # SW.startLine() + # << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, + # ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); + # if (((Opcode1 & 0xf0) == 0x00) && Opcode1) + # PrintGPR((Opcode1 & 0x0f)); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + if (op1 & 0xf0) != 0 or op1 == 0x00: + return 'spare' + else: + return 'pop %s' % self._printGPR((op1 & 0x0f)) + + def _decode_10110010_uleb128(self): + # SmallVector ULEB; + # do { ULEB.push_back(Opcodes[OI ^ 3]); } while (Opcodes[OI++ ^ 3] & 0x80); + # uint64_t Value = 0; + # for (unsigned BI = 0, BE = ULEB.size(); BI != BE; ++BI) + # Value = Value | ((ULEB[BI] & 0x7f) << (7 * BI)); + # OS << format("; vsp = vsp + %" PRIu64 "\n", 0x204 + (Value << 2)); + self._index += 1 # skip constant byte + uleb_buffer = [self._bytecode_array[self._index]] + self._index += 1 + while self._bytecode_array[self._index] & 0x80 == 0: + uleb_buffer.append(self._bytecode_array[self._index]) + self._index += 1 + value = 0 + for b in reversed(uleb_buffer): + value = (value << 7) + (b & 0x7F) + return 'vsp = vsp + %u' % (0x204 + (value << 2)) + + def _decode_10110011_sssscccc(self): + # these two decoders are equal + return self._decode_11001001_sssscccc() + + def _decode_101101nn(self): + return self._spare() + + def _decode_10111nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 8), "d"); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._print_registers(self._calculate_range(8, opcode & 0x07), "d") + + def _decode_11000110_sssscccc(self): + # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); + # uint8_t Start = ((Opcode1 & 0xf0) >> 4); + # uint8_t Count = ((Opcode1 & 0x0f) >> 0); + # PrintRegisters((((1 << (Count + 1)) - 1) << Start), "wR"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + start = ((op1 & 0xf0) >> 4) + count = ((op1 & 0x0f) >> 0) + return 'pop %s' % self._print_registers(self._calculate_range(start, count), "wR") + + def _decode_11000111_0000iiii(self): + # SW.startLine() + # << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, + # ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); + # if ((Opcode1 & 0xf0) == 0x00 && Opcode1) + # PrintRegisters(Opcode1 & 0x0f, "wCGR"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + if (op1 & 0xf0) != 0 or op1 == 0x00: + return 'spare' + else: + return 'pop %s' % self._print_registers(op1 & 0x0f, "wCGR") + + def _decode_11001000_sssscccc(self): + # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); + # uint8_t Start = 16 + ((Opcode1 & 0xf0) >> 4); + # uint8_t Count = ((Opcode1 & 0x0f) >> 0); + # PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + start = 16 + ((op1 & 0xf0) >> 4) + count = ((op1 & 0x0f) >> 0) + return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d") + + def _decode_11001001_sssscccc(self): + # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); + # uint8_t Start = ((Opcode1 & 0xf0) >> 4); + # uint8_t Count = ((Opcode1 & 0x0f) >> 0); + # PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + start = ((op1 & 0xf0) >> 4) + count = ((op1 & 0x0f) >> 0) + return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d") + + def _decode_11001yyy(self): + return self._spare() + + def _decode_11000nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 10), "wR"); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._print_registers(self._calculate_range(10, opcode & 0x07), "wR") + + def _decode_11010nnn(self): + # these two decoders are equal + return self._decode_10111nnn() + + def _decode_11xxxyyy(self): + return self._spare() + + def _spare(self): + self._index += 1 + return 'spare' + + _DECODE_RECIPE_TYPE = namedtuple('_DECODE_RECIPE_TYPE', 'mask value handler') + + ring = ( + _DECODE_RECIPE_TYPE(mask=0xc0, value=0x00, handler=_decode_00xxxxxx), + _DECODE_RECIPE_TYPE(mask=0xc0, value=0x40, handler=_decode_01xxxxxx), + _DECODE_RECIPE_TYPE(mask=0xf0, value=0x80, handler=_decode_1000iiii_iiiiiiii), + _DECODE_RECIPE_TYPE(mask=0xff, value=0x9d, handler=_decode_10011101), + _DECODE_RECIPE_TYPE(mask=0xff, value=0x9f, handler=_decode_10011111), + _DECODE_RECIPE_TYPE(mask=0xf0, value=0x90, handler=_decode_1001nnnn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xa0, handler=_decode_10100nnn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xa8, handler=_decode_10101nnn), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb0, handler=_decode_10110000), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb1, handler=_decode_10110001_0000iiii), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb2, handler=_decode_10110010_uleb128), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb3, handler=_decode_10110011_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xfc, value=0xb4, handler=_decode_101101nn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xb8, handler=_decode_10111nnn), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc6, handler=_decode_11000110_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc7, handler=_decode_11000111_0000iiii), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc8, handler=_decode_11001000_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc9, handler=_decode_11001001_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xc8, value=0xc8, handler=_decode_11001yyy), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xc0, handler=_decode_11000nnn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xd0, handler=_decode_11010nnn), + _DECODE_RECIPE_TYPE(mask=0xc0, value=0xc0, handler=_decode_11xxxyyy), + ) + + +class MnemonicItem(object): + """ Single mnemonic item. + """ + + def __init__(self, bytecode, mnemonic): + self.bytecode = bytecode + self.mnemonic = mnemonic + + def __repr__(self): + return '%s ; %s' % (' '.join(['0x%02x' % x for x in self.bytecode]), self.mnemonic) diff --git a/elftools/ehabi/ehabiinfo.py b/elftools/ehabi/ehabiinfo.py new file mode 100644 index 0000000..415566c --- /dev/null +++ b/elftools/ehabi/ehabiinfo.py @@ -0,0 +1,209 @@ +# ------------------------------------------------------------------------------- +# elftools: ehabi/ehabiinfo.py +# +# Decoder for ARM exception handler bytecode. +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +from ..common.utils import struct_parse + +from .decoder import EHABIBytecodeDecoder +from .constants import EHABI_INDEX_ENTRY_SIZE +from .structs import EHABIStructs + + +class EHABIInfo(object): + """ ARM exception handler abi information class. + + Parameters: + + arm_idx_section: + elf.sections.Section object, section which type is SHT_ARM_EXIDX. + + little_endian: + bool, endianness of elf file. + """ + + def __init__(self, arm_idx_section, little_endian): + self._arm_idx_section = arm_idx_section + self._struct = EHABIStructs(little_endian) + self._num_entry = None + + def section_name(self): + return self._arm_idx_section.name + + def section_offset(self): + return self._arm_idx_section['sh_offset'] + + def num_entry(self): + """ Number of exception handler entry in the section. + """ + if self._num_entry is None: + self._num_entry = self._arm_idx_section['sh_size'] // EHABI_INDEX_ENTRY_SIZE + return self._num_entry + + def get_entry(self, n): + """ Get the exception handler entry at index #n. (EHABIEntry object or a subclass) + """ + if n >= self.num_entry(): + raise IndexError('Invalid entry %d/%d' % (n, self._num_entry)) + eh_index_entry_offset = self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE + eh_index_data = struct_parse(self._struct.EH_index_struct, self._arm_idx_section.stream, eh_index_entry_offset) + word0, word1 = eh_index_data['word0'], eh_index_data['word1'] + + if word0 & 0x80000000 != 0: + return CorruptEHABIEntry('Corrupt ARM exception handler table entry: %x' % n) + + function_offset = arm_expand_prel31(word0, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE) + + if word1 == 1: + # 0x1 means cannot unwind + return CannotUnwindEHABIEntry(function_offset) + elif word1 & 0x80000000 == 0: + # highest bit is zero, point to .ARM.extab data + eh_table_offset = arm_expand_prel31(word1, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE + 4) + eh_index_data = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream, eh_table_offset) + word0 = eh_index_data['word0'] + if word0 & 0x80000000 == 0: + # highest bit is one, generic model + return GenericEHABIEntry(function_offset, arm_expand_prel31(word0, eh_table_offset)) + else: + # highest bit is one, arm compact model + # highest half must be 0b1000 for compact model + if word0 & 0x70000000 != 0: + return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n) + per_index = (word0 >> 24) & 0x7f + if per_index == 0: + # arm compact model 0 + opcode = [(word0 & 0xFF0000) >> 16, (word0 & 0xFF00) >> 8, word0 & 0xFF] + return EHABIEntry(function_offset, per_index, opcode) + elif per_index == 1 or per_index == 2: + # arm compact model 1/2 + more_word = (word0 >> 16) & 0xff + opcode = [(word0 >> 8) & 0xff, (word0 >> 0) & 0xff] + self._arm_idx_section.stream.seek(eh_table_offset + 4) + for i in range(more_word): + r = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream)['word0'] + opcode.append((r >> 24) & 0xFF) + opcode.append((r >> 16) & 0xFF) + opcode.append((r >> 8) & 0xFF) + opcode.append((r >> 0) & 0xFF) + return EHABIEntry(function_offset, per_index, opcode, eh_table_offset=eh_table_offset) + else: + return CorruptEHABIEntry('Unknown ARM compact model %d at table entry: %x' % (per_index, n)) + else: + # highest bit is one, compact model must be 0 + if word1 & 0x7f000000 != 0: + return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n) + opcode = [(word1 & 0xFF0000) >> 16, (word1 & 0xFF00) >> 8, word1 & 0xFF] + return EHABIEntry(function_offset, 0, opcode) + + +class EHABIEntry(object): + """ Exception handler abi entry. + + Accessible attributes: + + function_offset: + Integer. + None if corrupt. (Reference: CorruptEHABIEntry) + + personality: + Integer. + None if corrupt or unwindable. (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry) + 0/1/2 for ARM personality compact format. + Others for generic personality. + + bytecode_array: + Integer array. + None if corrupt or unwindable or generic personality. + (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry) + + eh_table_offset: + Integer. + Only entries who point to .ARM.extab contains this field, otherwise return None. + + unwindable: + bool. Whether this function is unwindable. + + corrupt: + bool. Whether this entry is corrupt. + + """ + + def __init__(self, + function_offset, + personality, + bytecode_array, + eh_table_offset=None, + unwindable=True, + corrupt=False): + self.function_offset = function_offset + self.personality = personality + self.bytecode_array = bytecode_array + self.eh_table_offset = eh_table_offset + self.unwindable = unwindable + self.corrupt = corrupt + + def mnmemonic_array(self): + if self.bytecode_array: + return EHABIBytecodeDecoder(self.bytecode_array).mnemonic_array + else: + return None + + def __repr__(self): + return "" % ( + self.function_offset, + self.personality, + "eh_table_offset=0x%x, " % self.eh_table_offset if self.eh_table_offset else "", + self.bytecode_array) + + +class CorruptEHABIEntry(EHABIEntry): + """ This entry is corrupt. Attribute #corrupt will be True. + """ + + def __init__(self, reason): + super(CorruptEHABIEntry, self).__init__(function_offset=None, personality=None, bytecode_array=None, + corrupt=True) + self.reason = reason + + def __repr__(self): + return "" % self.reason + + +class CannotUnwindEHABIEntry(EHABIEntry): + """ This function cannot be unwind. Attribute #unwindable will be False. + """ + + def __init__(self, function_offset): + super(CannotUnwindEHABIEntry, self).__init__(function_offset, personality=None, bytecode_array=None, + unwindable=False) + + def __repr__(self): + return "" % self.function_offset + + +class GenericEHABIEntry(EHABIEntry): + """ This entry is generic model rather than ARM compact model.Attribute #bytecode_array will be None. + """ + + def __init__(self, function_offset, personality): + super(GenericEHABIEntry, self).__init__(function_offset, personality, bytecode_array=None) + + def __repr__(self): + return "" % (self.function_offset, self.personality) + + +def arm_expand_prel31(address, place): + """ + address: uint32 + place: uint32 + return: uint64 + """ + location = address & 0x7fffffff + if location & 0x04000000: + location |= 0xffffffff80000000 + return location + place & 0xffffffffffffffff diff --git a/elftools/ehabi/structs.py b/elftools/ehabi/structs.py new file mode 100644 index 0000000..35ceaf3 --- /dev/null +++ b/elftools/ehabi/structs.py @@ -0,0 +1,47 @@ +# ------------------------------------------------------------------------------- +# elftools: ehabi/structs.py +# +# Encapsulation of Construct structs for parsing an EHABI, adjusted for +# correct endianness and word-size. +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +from ..construct import UBInt32, ULInt32, Struct + + +class EHABIStructs(object): + """ Accessible attributes: + + EH_index_struct: + Struct of item in section .ARM.exidx. + + EH_table_struct: + Struct of item in section .ARM.extab. + """ + + def __init__(self, little_endian): + self._little_endian = little_endian + self._create_structs() + + def _create_structs(self): + if self._little_endian: + self.EHABI_uint32 = ULInt32 + else: + self.EHABI_uint32 = UBInt32 + self._create_exception_handler_index() + self._create_exception_handler_table() + + def _create_exception_handler_index(self): + self.EH_index_struct = Struct( + 'EH_index', + self.EHABI_uint32('word0'), + self.EHABI_uint32('word1') + ) + + def _create_exception_handler_table(self): + self.EH_table_struct = Struct( + 'EH_table', + self.EHABI_uint32('word0'), + ) diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 3c8ed51..5020f4c 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -37,6 +37,7 @@ from .gnuversions import ( GNUVerSymSection) from .segments import Segment, InterpSegment, NoteSegment from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig +from ..ehabi.ehabiinfo import EHABIInfo from .hash import ELFHashSection, GNUHashSection class ELFFile(object): @@ -227,6 +228,25 @@ class ELFFile(object): debug_pubnames_sec = debug_sections[debug_pubnames_name] ) + def has_ehabi_info(self): + """ Check whether this file appears to have arm exception handler index table. + """ + return any(s['sh_type'] == 'SHT_ARM_EXIDX' for s in self.iter_sections()) + + def get_ehabi_infos(self): + """ Generally, shared library and executable contain 1 .ARM.exidx section. + Object file contains many .ARM.exidx sections. + So we must traverse every section and filter sections whose type is SHT_ARM_EXIDX. + """ + _ret = [] + if self['e_type'] == 'ET_REL': + # TODO: support relocatable file + assert False, "Current version of pyelftools doesn't support relocatable file." + for section in self.iter_sections(): + if section['sh_type'] == 'SHT_ARM_EXIDX': + _ret.append(EHABIInfo(section, self.little_endian)) + return _ret if len(_ret) > 0 else None + def get_machine_arch(self): """ Return the machine architecture, as detected from the ELF header. """ diff --git a/scripts/readelf.py b/scripts/readelf.py index 366c50e..6d35890 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -61,6 +61,7 @@ from elftools.dwarf.constants import ( DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file) from elftools.dwarf.locationlists import LocationParser, LocationEntry from elftools.dwarf.callframe import CIE, FDE, ZERO +from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry class ReadElf(object): @@ -563,6 +564,41 @@ class ReadElf(object): if not has_relocation_sections: self._emitline('\nThere are no relocations in this file.') + def display_arm_unwind(self): + if not self.elffile.has_ehabi_info(): + self._emitline('There are no .ARM.idx sections in this file.') + return + for ehabi_info in self.elffile.get_ehabi_infos(): + # Unwind section '.ARM.exidx' at offset 0x203e8 contains 1009 entries: + self._emitline("\nUnwind section '%s' at offset 0x%x contains %d entries" % ( + ehabi_info.section_name(), + ehabi_info.section_offset(), + ehabi_info.num_entry() + )) + + for i in range(ehabi_info.num_entry()): + entry = ehabi_info.get_entry(i) + self._emitline() + self._emitline("Entry %d:" % i) + if isinstance(entry, CorruptEHABIEntry): + self._emitline(" [corrupt] %s" % entry.reason) + continue + self._emit(" Function offset 0x%x: " % entry.function_offset) + if isinstance(entry, CannotUnwindEHABIEntry): + self._emitline("[cantunwind]") + continue + elif entry.eh_table_offset: + self._emitline("@0x%x" % entry.eh_table_offset) + else: + self._emitline("Compact (inline)") + if isinstance(entry, GenericEHABIEntry): + self._emitline(" Personality: 0x%x" % entry.personality) + else: + self._emitline(" Compact model index: %d" % entry.personality) + for mnemonic_item in entry.mnmemonic_array(): + self._emit(' ') + self._emitline(mnemonic_item) + def display_version_info(self): """ Display the version info contained in the file """ @@ -1470,6 +1506,9 @@ def main(stream=None): argparser.add_argument('-r', '--relocs', action='store_true', dest='show_relocs', help='Display the relocations (if present)') + argparser.add_argument('-au', '--arm-unwind', + action='store_true', dest='show_arm_unwind', + help='Display the armeabi unwind information (if present)') argparser.add_argument('-x', '--hex-dump', action='store', dest='show_hex_dump', metavar='', help='Dump the contents of section as bytes') @@ -1524,6 +1563,8 @@ def main(stream=None): readelf.display_notes() if args.show_relocs: readelf.display_relocations() + if args.show_arm_unwind: + readelf.display_arm_unwind() if args.show_version_info: readelf.display_version_info() if args.show_arch_specific: diff --git a/setup.py b/setup.py index 33ebf80..0377a1e 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ setup( 'elftools.elf', 'elftools.common', 'elftools.dwarf', + 'elftools.ehabi', 'elftools.construct', 'elftools.construct.lib', ], diff --git a/test/test_ehabi_decoder.py b/test/test_ehabi_decoder.py new file mode 100644 index 0000000..61ad8b4 --- /dev/null +++ b/test/test_ehabi_decoder.py @@ -0,0 +1,95 @@ +# ------------------------------------------------------------------------------- +# elftools: tests +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +import unittest + +from elftools.ehabi.decoder import EHABIBytecodeDecoder + + +class TestEHABIDecoder(unittest.TestCase): + """ Tests for the EHABI decoder. + """ + + def testLLVM(self): + # Reference: https://github.com/llvm/llvm-project/blob/master/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s + mnemonic_array = EHABIBytecodeDecoder([0xb1, 0x0f, 0xa7, 0x3f, 0xb0, 0xb0]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "pop {r0, r1, r2, r3}") + self.assertEqual(mnemonic_array[1].mnemonic, "pop {r4, r5, r6, r7, r8, r9, r10, fp}") + self.assertEqual(mnemonic_array[2].mnemonic, "vsp = vsp + 256") + self.assertEqual(mnemonic_array[3].mnemonic, "finish") + self.assertEqual(mnemonic_array[4].mnemonic, "finish") + + mnemonic_array = EHABIBytecodeDecoder([0xc9, 0x84, 0xb0]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "pop {d8, d9, d10, d11, d12}") + self.assertEqual(mnemonic_array[1].mnemonic, "finish") + + mnemonic_array = EHABIBytecodeDecoder( + [0xD7, 0xC9, 0x02, 0xC8, 0x02, 0xC7, 0x03, 0xC6, + 0x02, 0xC2, 0xBA, 0xB3, 0x12, 0xB2, 0x80, 0x04, + 0xB1, 0x01, 0xB0, 0xA9, 0xA1, 0x91, 0x84, 0xC0, + 0x80, 0xC0, 0x80, 0x01, 0x81, 0x00, 0x80, 0x00, + 0x42, 0x02, ]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "pop {d8, d9, d10, d11, d12, d13, d14, d15}") + self.assertEqual(mnemonic_array[1].mnemonic, "pop {d0, d1, d2}") + self.assertEqual(mnemonic_array[2].mnemonic, "pop {d16, d17, d18}") + self.assertEqual(mnemonic_array[3].mnemonic, "pop {wCGR0, wCGR1}") + self.assertEqual(mnemonic_array[4].mnemonic, "pop {wR0, wR1, wR2}") + self.assertEqual(mnemonic_array[5].mnemonic, "pop {wR10, wR11, wR12}") + self.assertEqual(mnemonic_array[6].mnemonic, "pop {d8, d9, d10}") + self.assertEqual(mnemonic_array[7].mnemonic, "pop {d1, d2, d3}") + self.assertEqual(mnemonic_array[8].mnemonic, "vsp = vsp + 2564") + self.assertEqual(mnemonic_array[9].mnemonic, "pop {r0}") + self.assertEqual(mnemonic_array[10].mnemonic, "finish") + self.assertEqual(mnemonic_array[11].mnemonic, "pop {r4, r5, lr}") + self.assertEqual(mnemonic_array[12].mnemonic, "pop {r4, r5}") + self.assertEqual(mnemonic_array[13].mnemonic, "vsp = r1") + self.assertEqual(mnemonic_array[14].mnemonic, "pop {r10, fp, lr}") + self.assertEqual(mnemonic_array[15].mnemonic, "pop {r10, fp}") + self.assertEqual(mnemonic_array[16].mnemonic, "pop {r4}") + self.assertEqual(mnemonic_array[17].mnemonic, "pop {ip}") + self.assertEqual(mnemonic_array[18].mnemonic, "refuse to unwind") + self.assertEqual(mnemonic_array[19].mnemonic, "vsp = vsp - 12") + self.assertEqual(mnemonic_array[20].mnemonic, "vsp = vsp + 12") + + mnemonic_array = EHABIBytecodeDecoder( + [0xD8, 0xD0, 0xCA, 0xC9, 0x00, 0xC8, 0x00, 0xC7, + 0x10, 0xC7, 0x01, 0xC7, 0x00, 0xC6, 0x00, 0xC0, + 0xB8, 0xB4, 0xB3, 0x00, 0xB2, 0x00, 0xB1, 0x10, + 0xB1, 0x01, 0xB1, 0x00, 0xB0, 0xA8, 0xA0, 0x9F, + 0x9D, 0x91, 0x88, 0x00, 0x80, 0x00, 0x40, 0x00, + ]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "spare") + self.assertEqual(mnemonic_array[1].mnemonic, "pop {d8}") + self.assertEqual(mnemonic_array[2].mnemonic, "spare") + self.assertEqual(mnemonic_array[3].mnemonic, "pop {d0}") + self.assertEqual(mnemonic_array[4].mnemonic, "pop {d16}") + self.assertEqual(mnemonic_array[5].mnemonic, "spare") + self.assertEqual(mnemonic_array[6].mnemonic, "pop {wCGR0}") + self.assertEqual(mnemonic_array[7].mnemonic, "spare") + self.assertEqual(mnemonic_array[8].mnemonic, "pop {wR0}") + self.assertEqual(mnemonic_array[9].mnemonic, "pop {wR10}") + self.assertEqual(mnemonic_array[10].mnemonic, "pop {d8}") + self.assertEqual(mnemonic_array[11].mnemonic, "spare") + self.assertEqual(mnemonic_array[12].mnemonic, "pop {d0}") + self.assertEqual(mnemonic_array[13].mnemonic, "vsp = vsp + 516") + self.assertEqual(mnemonic_array[14].mnemonic, "spare") + self.assertEqual(mnemonic_array[15].mnemonic, "pop {r0}") + self.assertEqual(mnemonic_array[16].mnemonic, "spare") + self.assertEqual(mnemonic_array[17].mnemonic, "finish") + self.assertEqual(mnemonic_array[18].mnemonic, "pop {r4, lr}") + self.assertEqual(mnemonic_array[19].mnemonic, "pop {r4}") + self.assertEqual(mnemonic_array[20].mnemonic, "reserved (WiMMX MOVrr)") + self.assertEqual(mnemonic_array[21].mnemonic, "reserved (ARM MOVrr)") + self.assertEqual(mnemonic_array[22].mnemonic, "vsp = r1") + self.assertEqual(mnemonic_array[23].mnemonic, "pop {pc}") + self.assertEqual(mnemonic_array[24].mnemonic, "refuse to unwind") + self.assertEqual(mnemonic_array[25].mnemonic, "vsp = vsp - 4") + self.assertEqual(mnemonic_array[26].mnemonic, "vsp = vsp + 4") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_ehabi_elf.py b/test/test_ehabi_elf.py new file mode 100644 index 0000000..9a0c12b --- /dev/null +++ b/test/test_ehabi_elf.py @@ -0,0 +1,89 @@ +# ------------------------------------------------------------------------------- +# elftools: tests +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +import unittest +import os + +from elftools.ehabi.ehabiinfo import EHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry, CorruptEHABIEntry +from elftools.elf.elffile import ELFFile + + +class TestEHABIELF(unittest.TestCase): + """ Parse ELF and visit ARM exception handler index table entry. + """ + + def test_parse_object_file(self): + # FIXME: `.ARM.exidx.text.XXX` need relocation, it's too complex for current unittest. + fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.o') + with open(fname, 'rb') as f: + elf = ELFFile(f) + try: + elf.get_ehabi_infos() + self.assertTrue(False, "Unreachable code") + except AssertionError as e: + self.assertEqual(str(e), "Current version of pyelftools doesn't support relocatable file.") + + def test_parse_shared_library(self): + fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.so') + with open(fname, 'rb') as f: + elf = ELFFile(f) + self.assertTrue(elf.has_ehabi_info()) + infos = elf.get_ehabi_infos() + self.assertEqual(1, len(infos)) + info = infos[0] + + self.assertIsInstance(info.get_entry(0), EHABIEntry) + self.assertEqual(info.get_entry(0).function_offset, 0x34610) + self.assertEqual(info.get_entry(0).eh_table_offset, 0x69544) + self.assertEqual(info.get_entry(0).bytecode_array, [0x97, 0x41, 0x84, 0x0d, 0xb0, 0xb0]) + + self.assertIsInstance(info.get_entry(7), CannotUnwindEHABIEntry) + self.assertEqual(info.get_entry(7).function_offset, 0x346f8) + + self.assertIsInstance(info.get_entry(8), EHABIEntry) + self.assertEqual(info.get_entry(8).personality, 0) + self.assertEqual(info.get_entry(8).function_offset, 0x3473c) + self.assertEqual(info.get_entry(8).bytecode_array, [0x97, 0x84, 0x08]) + + self.assertIsInstance(info.get_entry(9), GenericEHABIEntry) + self.assertEqual(info.get_entry(9).function_offset, 0x3477c) + self.assertEqual(info.get_entry(9).personality, 0x31a30) + + for i in range(info.num_entry()): + self.assertNotIsInstance(info.get_entry(i), CorruptEHABIEntry) + + def test_parse_executable(self): + fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.elf') + with open(fname, 'rb') as f: + elf = ELFFile(f) + self.assertTrue(elf.has_ehabi_info()) + infos = elf.get_ehabi_infos() + self.assertEqual(1, len(infos)) + info = infos[0] + + self.assertIsInstance(info.get_entry(0), EHABIEntry) + self.assertEqual(info.get_entry(0).function_offset, 0x4f50) + self.assertEqual(info.get_entry(0).eh_table_offset, 0x22864) + self.assertEqual(info.get_entry(0).bytecode_array, [0x97, 0x41, 0x84, 0x0d, 0xb0, 0xb0]) + + self.assertIsInstance(info.get_entry(7), CannotUnwindEHABIEntry) + self.assertEqual(info.get_entry(7).function_offset, 0x5040) + + self.assertIsInstance(info.get_entry(8), GenericEHABIEntry) + self.assertEqual(info.get_entry(8).personality, 0x15d21) + + self.assertIsInstance(info.get_entry(9), EHABIEntry) + self.assertEqual(info.get_entry(9).function_offset, 0x5144) + self.assertEqual(info.get_entry(9).personality, 0) + self.assertEqual(info.get_entry(9).bytecode_array, [0x97, 0x84, 0x08]) + + for i in range(info.num_entry()): + self.assertNotIsInstance(info.get_entry(i), CorruptEHABIEntry) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/testfiles_for_unittests/arm_exidx_test.cpp b/test/testfiles_for_unittests/arm_exidx_test.cpp new file mode 100644 index 0000000..4790cc0 --- /dev/null +++ b/test/testfiles_for_unittests/arm_exidx_test.cpp @@ -0,0 +1,23 @@ +#include +#include + +void func1(int i); + +void func2(int i); + +void func1(int i) { + if (i == 0) + return; + func2(i - 1); +} + +void func2(int i) { + if (i == 0) + return; + func1(i - 1); +} + +int main(int argc, char **argv) { + std::string hello = "Hello from C++"; + std::cout << hello << std::endl; +} diff --git a/test/testfiles_for_unittests/arm_exidx_test.elf b/test/testfiles_for_unittests/arm_exidx_test.elf new file mode 100644 index 0000000..94bb535 Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.elf differ diff --git a/test/testfiles_for_unittests/arm_exidx_test.o b/test/testfiles_for_unittests/arm_exidx_test.o new file mode 100644 index 0000000..c13b003 Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.o differ diff --git a/test/testfiles_for_unittests/arm_exidx_test.so b/test/testfiles_for_unittests/arm_exidx_test.so new file mode 100755 index 0000000..ef45313 Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.so differ