From: Seva Alekseyev Date: Sun, 22 Mar 2020 13:35:19 +0000 (-0400) Subject: GNU expressions (#303) X-Git-Tag: v0.27~37 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=98f0cdb22129786584b55322dca208aed0ecda97;p=pyelftools.git GNU expressions (#303) --- diff --git a/elftools/common/utils.py b/elftools/common/utils.py index 4e80e18..d1fde2c 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -9,7 +9,7 @@ from contextlib import contextmanager from .exceptions import ELFParseError, ELFError, DWARFError from .py3compat import int2byte -from ..construct import ConstructError +from ..construct import ConstructError, ULInt8 def merge_dicts(*dicts): @@ -102,6 +102,11 @@ def roundup(num, bits): """ return (num - 1 | (1 << bits) - 1) + 1 +def read_blob(stream, length): + """Read length bytes from stream, return a list of ints + """ + return [struct_parse(ULInt8(''), stream) for i in range(length)] + #------------------------- PRIVATE ------------------------- def _assert_with_exception(cond, msg, exception_type): diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 101c2d6..5f48eb4 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -224,6 +224,8 @@ class CallFrameInfo(object): args = [ struct_parse(structs.Dwarf_uleb128(''), self.stream), struct_parse(structs.Dwarf_sleb128(''), self.stream)] + elif opcode == DW_CFA_GNU_args_size: + args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)] else: dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode) @@ -488,7 +490,9 @@ class CFIEntry(object): line_stack = [] def _add_to_order(regnum): - if regnum not in cur_line: + # DW_CFA_restore and others remove registers from cur_line, + # but they stay in reg_order. Avoid duplicates. + if regnum not in reg_order: reg_order.append(regnum) for instr in self.instructions: diff --git a/elftools/dwarf/constants.py b/elftools/dwarf/constants.py index 79db975..558e8c6 100644 --- a/elftools/dwarf/constants.py +++ b/elftools/dwarf/constants.py @@ -197,3 +197,4 @@ DW_CFA_def_cfa_offset_sf = 0x13 DW_CFA_val_offset = 0x14 DW_CFA_val_offset_sf = 0x15 DW_CFA_val_expression = 0x16 +DW_CFA_GNU_args_size = 0x2e diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 5567732..29f818f 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -99,7 +99,7 @@ def describe_CFI_instructions(entry): s += ' %s: %s ofs %s\n' % ( name, _full_reg_name(instr.args[0]), instr.args[1] * cie['data_alignment_factor']) - elif name == 'DW_CFA_def_cfa_offset': + elif name in ('DW_CFA_def_cfa_offset', 'DW_CFA_GNU_args_size'): s += ' %s: %s\n' % (name, instr.args[0]) elif name == 'DW_CFA_def_cfa_expression': expr_dumper = ExprDumper(entry.structs) @@ -132,7 +132,7 @@ def describe_CFI_CFA_rule(rule): return '%s%+d' % (describe_reg_name(rule.reg), rule.offset) -def describe_DWARF_expr(expr, structs): +def describe_DWARF_expr(expr, structs, cu_offset=None): """ Textual description of a DWARF expression encoded in 'expr'. structs should come from the entity encompassing the expression - it's needed to be able to parse it correctly. @@ -145,7 +145,7 @@ def describe_DWARF_expr(expr, structs): _DWARF_EXPR_DUMPER_CACHE[cache_key] = \ ExprDumper(structs) dwarf_expr_dumper = _DWARF_EXPR_DUMPER_CACHE[cache_key] - return '(' + dwarf_expr_dumper.dump_expr(expr) + ')' + return '(' + dwarf_expr_dumper.dump_expr(expr, cu_offset) + ')' def describe_reg_name(regnum, machine_arch=None, default=True): @@ -335,6 +335,7 @@ _DESCR_DW_ATE = { DW_ATE_edited: '(edited)', DW_ATE_signed_fixed: '(signed_fixed)', DW_ATE_unsigned_fixed: '(unsigned_fixed)', + DW_ATE_UTF: '(unicode string)', DW_ATE_HP_float80: '(HP_float80)', DW_ATE_HP_complex_float80: '(HP_complex_float80)', DW_ATE_HP_float128: '(HP_float128)', @@ -421,7 +422,7 @@ def _location_list_extra(attr, die, section_offset): if attr.form in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'): return '(location list)' else: - return describe_DWARF_expr(attr.value, die.cu.structs) + return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset) def _data_member_location_extra(attr, die, section_offset): @@ -434,7 +435,7 @@ def _data_member_location_extra(attr, die, section_offset): elif attr.form == 'DW_FORM_sdata': return str(attr.value) else: - return describe_DWARF_expr(attr.value, die.cu.structs) + return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset) def _import_extra(attr, die, section_offset): @@ -539,16 +540,20 @@ class ExprDumper(object): self.expr_parser = DWARFExprParser(self.structs) self._init_lookups() - def dump_expr(self, expr): + def dump_expr(self, expr, cu_offset=None): """ Parse and dump a DWARF expression. expr should be a list of - (integer) byte values. + (integer) byte values. cu_offset is the cu_offset + value from the CU object where the expression resides. + Only affects a handful of GNU opcodes, if None is provided, + that's not a crash condition, only the expression dump will + not be consistent of that of readelf. Returns a string representing the expression. """ parsed = self.expr_parser.parse_expr(expr) s = [] for deo in parsed: - s.append(self._dump_to_string(deo.op, deo.op_name, deo.args)) + s.append(self._dump_to_string(deo.op, deo.op_name, deo.args, cu_offset)) return '; '.join(s) def _init_lookups(self): @@ -568,7 +573,14 @@ class ExprDumper(object): self._ops_with_hex_arg = set( ['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref']) - def _dump_to_string(self, opcode, opcode_name, args): + def _dump_to_string(self, opcode, opcode_name, args, cu_offset=None): + # Some GNU ops contain an offset from the current CU as an argument, + # but readelf emits those ops with offset from the info section + # so we need the base offset of the parent CU. + # If omitted, arguments on some GNU opcodes will be off. + if cu_offset is None: + cu_offset = 0 + if len(args) == 0: if opcode_name.startswith('DW_OP_reg'): regnum = int(opcode_name[9:]) @@ -596,5 +608,19 @@ class ExprDumper(object): return '%s: %x' % (opcode_name, args[0]) elif opcode_name in self._ops_with_two_decimal_args: return '%s: %s %s' % (opcode_name, args[0], args[1]) + elif opcode_name == 'DW_OP_GNU_entry_value': + return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args) for deo in args[0]])) + elif opcode_name == 'DW_OP_implicit_value': + return "%s %s byte block: %s" % (opcode_name, len(args[0]), ''.join(["%x " % b for b in args[0]])) + elif opcode_name == 'DW_OP_GNU_parameter_ref': + return "%s: <0x%x>" % (opcode_name, args[0] + cu_offset) + elif opcode_name == 'DW_OP_GNU_implicit_pointer': + return "%s: <0x%x> %d" % (opcode_name, args[0], args[1]) + elif opcode_name == 'DW_OP_GNU_convert': + return "%s <0x%x>" % (opcode_name, args[0] + cu_offset) + elif opcode_name == 'DW_OP_GNU_deref_type': + return "%s: %d <0x%x>" % (opcode_name, args[0], args[1] + cu_offset) + elif opcode_name == 'DW_OP_GNU_const_type': + return "%s: <0x%x> %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1])) else: return '' % opcode_name diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index 6de0b59..f791975 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -9,7 +9,7 @@ from collections import namedtuple from ..common.py3compat import BytesIO, iteritems -from ..common.utils import struct_parse, bytelist2string +from ..common.utils import struct_parse, bytelist2string, read_blob # DWARF expression opcodes. name -> opcode mapping @@ -83,6 +83,13 @@ DW_OP_name2opcode = dict( DW_OP_convert=0xa8, DW_OP_reinterpret=0xa9, DW_OP_lo_user=0xe0, + DW_OP_GNU_implicit_pointer=0xf2, + DW_OP_GNU_entry_value=0xf3, + DW_OP_GNU_const_type=0xf4, + DW_OP_GNU_regval_type=0xf5, + DW_OP_GNU_deref_type=0xf6, + DW_OP_GNU_convert=0xf7, + DW_OP_GNU_parameter_ref=0xfa, DW_OP_hi_user=0xff, ) @@ -171,9 +178,26 @@ def _init_dispatch_table(structs): def parse_arg_struct2(arg1_struct, arg2_struct): return lambda stream: [struct_parse(arg1_struct, stream), struct_parse(arg2_struct, stream)] + + # ULEB128, then an expression of that length + def parse_nestedexpr(): + def parse(stream): + size = struct_parse(structs.Dwarf_uleb128(''), stream) + nested_expr_blob = read_blob(stream, size) + return [DWARFExprParser(structs).parse_expr(nested_expr_blob)] + return parse + + # ULEB128, then a blob of that size + def parse_blob(): + return lambda stream: [read_blob(stream, struct_parse(structs.Dwarf_uleb128(''), stream))] + + # ULEB128 with datatype DIE offset, then byte, then a blob of that size + def parse_typedblob(): + return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))] add('DW_OP_addr', parse_op_addr()) add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8(''))) + add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8(''))) add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16(''))) add('DW_OP_const2s', parse_arg_struct(structs.Dwarf_int16(''))) add('DW_OP_const4u', parse_arg_struct(structs.Dwarf_uint32(''))) @@ -193,11 +217,11 @@ def _init_dispatch_table(structs): 'DW_OP_swap', 'DW_OP_swap', 'DW_OP_rot', 'DW_OP_xderef', 'DW_OP_abs', 'DW_OP_and', 'DW_OP_div', 'DW_OP_minus', 'DW_OP_mod', 'DW_OP_mul', 'DW_OP_neg', 'DW_OP_not', - 'DW_OP_plus', 'DW_OP_shl', 'DW_OP_shr', 'DW_OP_shra', - 'DW_OP_xor', 'DW_OP_eq', 'DW_OP_ge', 'DW_OP_gt', - 'DW_OP_le', 'DW_OP_lt', 'DW_OP_ne', 'DW_OP_nop', + 'DW_OP_or', 'DW_OP_plus', 'DW_OP_shl', 'DW_OP_shr', + 'DW_OP_shra', 'DW_OP_xor', 'DW_OP_eq', 'DW_OP_ge', + 'DW_OP_gt', 'DW_OP_le', 'DW_OP_lt', 'DW_OP_ne', 'DW_OP_nop', 'DW_OP_push_object_address', 'DW_OP_form_tls_address', - 'DW_OP_call_frame_cfa']: + 'DW_OP_call_frame_cfa', 'DW_OP_stack_value']: add(opname, parse_noargs()) for n in range(0, 32): @@ -217,5 +241,16 @@ def _init_dispatch_table(structs): add('DW_OP_call2', parse_arg_struct(structs.Dwarf_uint16(''))) add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32(''))) add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset(''))) + add('DW_OP_implicit_value', parse_blob()) + add('DW_OP_GNU_entry_value', parse_nestedexpr()) + add('DW_OP_GNU_const_type', parse_typedblob()) + add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_GNU_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_GNU_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''), + structs.Dwarf_sleb128(''))) + add('DW_OP_GNU_parameter_ref', parse_arg_struct(structs.Dwarf_offset(''))) + add('DW_OP_GNU_convert', parse_arg_struct(structs.Dwarf_uleb128(''))) return table diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index bacefd7..86d15d6 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -178,9 +178,12 @@ def describe_note(x): n_desc = x['n_desc'] desc = '' if x['n_type'] == 'NT_GNU_ABI_TAG': - desc = '\n OS: %s, ABI: %d.%d.%d' % ( - _DESCR_NOTE_ABI_TAG_OS.get(n_desc['abi_os'], _unknown), - n_desc['abi_major'], n_desc['abi_minor'], n_desc['abi_tiny']) + if x['n_name'] == 'Android': + desc = '\n description data: %s ' % ' '.join("%02x" % ord(b) for b in x['n_descdata']) + else: + desc = '\n OS: %s, ABI: %d.%d.%d' % ( + _DESCR_NOTE_ABI_TAG_OS.get(n_desc['abi_os'], _unknown), + n_desc['abi_major'], n_desc['abi_minor'], n_desc['abi_tiny']) elif x['n_type'] == 'NT_GNU_BUILD_ID': desc = '\n Build ID: %s' % (n_desc) elif x['n_type'] == 'NT_GNU_GOLD_VERSION': @@ -190,11 +193,15 @@ def describe_note(x): '{:02x}'.format(ord(byte)) for byte in n_desc )) - note_type = (x['n_type'] if isinstance(x['n_type'], str) - else 'Unknown note type:') - note_type_desc = ('0x%.8x' % x['n_type'] - if isinstance(x['n_type'], int) else - _DESCR_NOTE_N_TYPE.get(x['n_type'], _unknown)) + if x['n_type'] == 'NT_GNU_ABI_TAG' and x['n_name'] == 'Android': + note_type = 'NT_VERSION' + note_type_desc = 'version' + else: + note_type = (x['n_type'] if isinstance(x['n_type'], str) + else 'Unknown note type:') + note_type_desc = ('0x%.8x' % x['n_type'] + if isinstance(x['n_type'], int) else + _DESCR_NOTE_N_TYPE.get(x['n_type'], _unknown)) return '%s (%s)%s' % (note_type, note_type_desc, desc) diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py index 67c0e94..94713e0 100644 --- a/elftools/elf/enums.py +++ b/elftools/elf/enums.py @@ -784,6 +784,7 @@ ENUM_RELOC_TYPE_x64 = dict( R_X86_64_TLSDESC_CALL=35, R_X86_64_TLSDESC=36, R_X86_64_IRELATIVE=37, + R_X86_64_REX_GOTPCRELX=42, R_X86_64_GNU_VTINHERIT=250, R_X86_64_GNU_VTENTRY=251, _default_=Pass, diff --git a/elftools/elf/notes.py b/elftools/elf/notes.py index d69007f..3e46b3a 100644 --- a/elftools/elf/notes.py +++ b/elftools/elf/notes.py @@ -30,6 +30,7 @@ def iter_notes(elffile, offset, size): offset += disk_namesz desc_data = bytes2str(elffile.stream.read(note['n_descsz'])) + note['n_descdata'] = desc_data if note['n_type'] == 'NT_GNU_ABI_TAG': note['n_desc'] = struct_parse(elffile.structs.Elf_abi, elffile.stream, diff --git a/examples/dwarf_location_info.py b/examples/dwarf_location_info.py index 5258e49..0ec9933 100644 --- a/examples/dwarf_location_info.py +++ b/examples/dwarf_location_info.py @@ -85,13 +85,13 @@ def process_file(filename): if isinstance(loc, LocationExpr): print(' %s' % ( describe_DWARF_expr(loc.loc_expr, - dwarfinfo.structs))) + dwarfinfo.structs, CU.cu_offset))) elif isinstance(loc, list): print(show_loclist(loc, dwarfinfo, - indent=' ')) + ' ', CU.cu_offset)) -def show_loclist(loclist, dwarfinfo, indent): +def show_loclist(loclist, dwarfinfo, indent, cu_offset): """ Display a location list nicely, decoding the DWARF expressions contained within. """ @@ -100,7 +100,7 @@ def show_loclist(loclist, dwarfinfo, indent): if isinstance(loc_entity, LocationEntry): d.append('%s <<%s>>' % ( loc_entity, - describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs))) + describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs, cu_offset))) else: d.append(str(loc_entity)) return '\n'.join(indent + s for s in d) diff --git a/scripts/readelf.py b/scripts/readelf.py index 46955db..c596109 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -400,11 +400,11 @@ class ReadElf(object): version_info = '@@%(name)s' % version # symbol names are truncated to 25 chars, similarly to readelf - self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s%s' % ( + self._emitline('%6d: %s %s %-7s %-6s %-7s %4s %.25s%s' % ( nsym, self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), - symbol['st_size'], + "%5d" % symbol['st_size'] if symbol['st_size'] < 100000 else hex(symbol['st_size']), describe_symbol_type(symbol['st_info']['type']), describe_symbol_bind(symbol['st_info']['bind']), describe_symbol_visibility(symbol['st_other']['visibility']), @@ -489,7 +489,7 @@ class ReadElf(object): continue has_relocation_sections = True - self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % ( + self._emitline("\nRelocation section '%.128s' at offset %s contains %s entries:" % ( section.name, self._format_hex(section['sh_offset']), section.num_relocations())) @@ -986,7 +986,10 @@ class ReadElf(object): # correctly reflect the nesting depth # die_depth = 0 + current_function = None for die in cu.iter_DIEs(): + if die.tag == 'DW_TAG_subprogram': + current_function = die self._emitline(' <%s><%x>: Abbrev Number: %s%s' % ( die_depth, die.offset, @@ -1001,11 +1004,19 @@ class ReadElf(object): # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name - self._emitline(' <%x> %-18s: %s' % ( + + attr_desc = describe_attr_value(attr, die, section_offset) + + if 'DW_OP_fbreg' in attr_desc and current_function and not 'DW_AT_frame_base' in current_function.attributes: + postfix = ' [without dw_at_frame_base]' + else: + postfix = '' + + self._emitline(' <%x> %-18s: %s%s' % ( attr.offset, name, - describe_attr_value( - attr, die, section_offset))) + attr_desc, + postfix)) if die.has_children: die_depth += 1 diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 76ecd72..5f9a8ae 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -48,19 +48,23 @@ def discover_testfiles(rootdir): yield os.path.join(rootdir, filename) -def run_test_on_file(filename, verbose=False): +def run_test_on_file(filename, verbose, opt): """ Runs a test on the given input filename. Return True if all test runs succeeded. + If opt is specified, rather that going over the whole + set of supported readelf options, the test will only + run for one option. """ success = True testlog.info("Test file '%s'" % filename) - for option in [ + options = [opt] if opt else [ '-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V', '--debug-dump=info', '--debug-dump=decodedline', '--debug-dump=frames', '--debug-dump=frames-interp', '--debug-dump=aranges', '--debug-dump=pubtypes', '--debug-dump=pubnames' - ]: + ] + for option in options: if verbose: testlog.info("..option='%s'" % option) # TODO(zlobober): this is a dirty hack to make tests work for ELF core @@ -83,7 +87,7 @@ def run_test_on_file(filename, verbose=False): rc, stdout = run_exe(exe_path, args) if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,)) if rc != 0: - testlog.error("@@ aborting - '%s' returned '%s'" % (exe_path, rc)) + testlog.error("@@ aborting - '%s %s' returned '%s'" % (exe_path, option, rc)) return False stdouts.append(stdout) if verbose: testlog.info('....comparing output...') @@ -201,6 +205,9 @@ def main(): '-k', '--keep-going', action='store_true', dest='keep_going', help="Run all tests, don't stop at the first failure") + argparser.add_argument('--opt', + action='store', dest='opt', metavar='', + help= 'Limit the test one one readelf option.') args = argparser.parse_args() if args.parallel: @@ -223,13 +230,13 @@ def main(): if len(filenames) > 1 and args.parallel: pool = Pool() results = pool.map( - run_test_on_file, + lambda filename: run_test_on_file(filename, False, args.opt), filenames) failures = results.count(False) else: failures = 0 for filename in filenames: - if not run_test_on_file(filename, verbose=args.verbose): + if not run_test_on_file(filename, args.verbose, args.opt): failures += 1 if not args.keep_going: break diff --git a/test/testfiles_for_readelf/dwarf_gnuops2.o.elf b/test/testfiles_for_readelf/dwarf_gnuops2.o.elf new file mode 100644 index 0000000..0d3bb20 Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_gnuops2.o.elf differ diff --git a/test/testfiles_for_readelf/dwarf_gnuops3.o.elf b/test/testfiles_for_readelf/dwarf_gnuops3.o.elf new file mode 100644 index 0000000..9ae256d Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_gnuops3.o.elf differ diff --git a/test/testfiles_for_readelf/dwarf_gnuops4.so.elf b/test/testfiles_for_readelf/dwarf_gnuops4.so.elf new file mode 100644 index 0000000..d9ffe9b Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_gnuops4.so.elf differ