From: Marco Bonelli Date: Fri, 10 Dec 2021 14:36:18 +0000 (+0100) Subject: Update readelf to v2.37, adapt readelf.py output and tests (#387) X-Git-Tag: v0.28~6 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=662e4ab30d314d5d00e901bebc3187636d5adc59;p=pyelftools.git Update readelf to v2.37, adapt readelf.py output and tests (#387) Changes to conform the output of readelf.py to binutils readelf v2.37: - Use singular "entry" when needed instead of "entries". - Output the last entry for the .debug_line output table when DW_LNE_end_sequence is encountered, as DWARF standard dictates. Looks looks like this was a readelf bug which was fixed in commit ba8826a82a29a19b78c18ce4f44fe313de279af7 of the GNU binutils-gdb repo. - Add additional "Stmt" field in the .debug_line output table, and ignore the new "View" field. The "Stmt" field has been implemented in readelf.py. The "View" field is not something that the DWARF standard defines, it's an internal register added to the line number information state machine by binutils to perform assembler checks (see commit ba8826a82a29a19b78c18ce4f44fe313de279af7 of GNU binutils-gdb repo for more info, in particular gas/doc/as.texinfo). "View" is unimplemented in pyelftools for now and a special case has been added in the readelf test suite to ignore it. - Add support for printing section names when dumping .symtab entries of st_type STT_SECTION as readelf v2.37 does (see commit 23356397449a8aa65afead0a895a20be53b3c6b0 of GNU binutils-gdb repo). - Add suport for recognizing SOs specifically tagged as PIE (DT_FLAGS_1 dynamic tag with DF_1_PIE set). In such case, describe the file as "Position-Independent Executable file" instead of "Shared object file", as readelf v2.37 does. - Add leading "0x" for version section addresses when dumping version information (-V) as readelf does. - Ignore "D (mbind)" in section headers flags legend (pyelftools does not output this flag). Special cases ADDED for run_readelf_tests.py: - Ignore "View" column for --debug-dump=decodedline in readelf's output. - Ignore ellipsis ("[...]") for long names/symbols/paths in readelf's output. Special cases REMOVED for run_readelf_tests.py: - Detection of additional '@' after symbol names (flag_after_symtable) seems to no longer be needed as all tests pass whitout this exception. - Special case for DW_AT_apple_xxx seems to no longer be needed, readelf now recognizes those. - Special case for PT_GNU_PROPERTY no longer needed, readelf now recognizes it. Other changes: - Add missing import in elftools/dwarf/lineprogram.py. References: - GNU binutils-gdb repo: https://sourceware.org/git/?p=binutils-gdb.git --- diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index ce69d68..dbde7ba 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -10,7 +10,7 @@ import os import copy from collections import namedtuple -from ..common.utils import struct_parse +from ..common.utils import struct_parse, dwarf_assert from .constants import * @@ -178,6 +178,7 @@ class LineProgram(object): if ex_opcode == DW_LNE_end_sequence: state.end_sequence = True + state.is_stmt = 0 add_entry_new_state(ex_opcode, [], is_extended=True) # reset state state = LineState(self.header['default_is_stmt']) diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index 27e23be..d15cbe1 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -35,7 +35,13 @@ def describe_ei_osabi(x): return _DESCR_EI_OSABI.get(x, _unknown) -def describe_e_type(x): +def describe_e_type(x, elffile=None): + if elffile is not None and x == 'ET_DYN': + # Detect whether this is a normal SO or a PIE executable + dynamic = elffile.get_section_by_name('.dynamic') + for t in dynamic.iter_tags('DT_FLAGS_1'): + if t.entry.d_val & ENUM_DT_FLAGS_1['DF_1_PIE']: + return 'DYN (Position-Independent Executable file)' return _DESCR_E_TYPE.get(x, _unknown) diff --git a/scripts/readelf.py b/scripts/readelf.py index 80d5650..2ddd02b 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -108,7 +108,7 @@ class ReadElf(object): self._emitline(' ABI Version: %d' % e_ident['EI_ABIVERSION']) self._emitline(' Type: %s' % - describe_e_type(header['e_type'])) + describe_e_type(header['e_type'], self.elffile)) self._emitline(' Machine: %s' % describe_e_machine(header['e_machine'])) self._emitline(' Version: %s' % @@ -230,7 +230,7 @@ class ReadElf(object): elfheader = self.elffile.header if show_heading: self._emitline('Elf file type is %s' % - describe_e_type(elfheader['e_type'])) + describe_e_type(elfheader['e_type'], self.elffile)) self._emitline('Entry point is %s' % self._format_hex(elfheader['e_entry'])) # readelf weirness - why isn't e_phoff printed as hex? (for section @@ -391,8 +391,10 @@ class ReadElf(object): section.name)) continue - self._emitline("\nSymbol table '%s' contains %s entries:" % ( - section.name, section.num_symbols())) + self._emitline("\nSymbol table '%s' contains %d %s:" % ( + section.name, + section.num_symbols(), + 'entry' if section.num_symbols() == 1 else 'entries')) if self.elffile.elfclass == 32: self._emitline(' Num: Value Size Type Bind Vis Ndx Name') @@ -418,6 +420,13 @@ class ReadElf(object): else: version_info = '@@%(name)s' % version + symbol_name = symbol.name + # Print section names for STT_SECTION symbols as readelf does + if (symbol['st_info']['type'] == 'STT_SECTION' + and symbol['st_shndx'] < self.elffile.num_sections() + and symbol['st_name'] == 0): + symbol_name = self.elffile.get_section(symbol['st_shndx']).name + # symbol names are truncated to 25 chars, similarly to readelf self._emitline('%6d: %s %s %-7s %-6s %-7s %4s %.25s%s' % ( nsym, @@ -430,7 +439,7 @@ class ReadElf(object): describe_symbol_shndx(self._get_symbol_shndx(symbol, nsym, section_index)), - symbol.name, + symbol_name, version_info)) def display_dynamic_tags(self): @@ -442,9 +451,10 @@ class ReadElf(object): continue has_dynamic_sections = True - self._emitline("\nDynamic section at offset %s contains %s entries:" % ( + self._emitline("\nDynamic section at offset %s contains %d %s:" % ( self._format_hex(section['sh_offset']), - section.num_tags())) + section.num_tags(), + 'entry' if section.num_tags() == 1 else 'entries')) self._emitline(" Tag Type Name/Value") padding = 20 + (8 if self.elffile.elfclass == 32 else 0) @@ -510,10 +520,11 @@ class ReadElf(object): continue has_relocation_sections = True - self._emitline("\nRelocation section '%.128s' at offset %s contains %s entries:" % ( + self._emitline("\nRelocation section '%.128s' at offset %s contains %d %s:" % ( section.name, self._format_hex(section['sh_offset']), - section.num_relocations())) + section.num_relocations(), + 'entry' if section.num_relocations() == 1 else 'entries')) if section.is_RELA(): self._emitline(" Offset Info Type Sym. Value Sym. Name + Addend") else: @@ -592,11 +603,11 @@ class ReadElf(object): return for ehabi_info in self.elffile.get_ehabi_infos(): # Unwind section '.ARM.exidx' at offset 0x203e8 contains 1009 entries: - self._emitline("\nUnwind section '%s' at offset 0x%x contains %d entries" % ( + self._emitline("\nUnwind section '%s' at offset 0x%x contains %d %s" % ( ehabi_info.section_name(), ehabi_info.section_offset(), - ehabi_info.num_entry() - )) + ehabi_info.num_entry(), + 'entry' if ehabi_info.num_entry() == 1 else 'entries')) for i in range(ehabi_info.num_entry()): entry = ehabi_info.get_entry(i) @@ -632,9 +643,7 @@ class ReadElf(object): for section in self.elffile.iter_sections(): if isinstance(section, GNUVerSymSection): - self._print_version_section_header( - section, 'Version symbols', lead0x=False) - + self._print_version_section_header(section, 'Version symbols') num_symbols = section.num_symbols() # Symbol version info are printed four by four entries @@ -903,8 +912,9 @@ class ReadElf(object): else: num_entries = version_section.num_symbols() - self._emitline("\n%s section '%s' contains %s entries:" % - (name, version_section.name, num_entries)) + self._emitline("\n%s section '%s' contains %d %s:" % ( + name, version_section.name, num_entries, + 'entry' if num_entries == 1 else 'entries')) self._emitline('%sAddr: %s Offset: %s Link: %i (%s)' % ( ' ' * indent, self._format_hex( @@ -1106,7 +1116,8 @@ class ReadElf(object): """ if not self._dwarfinfo.has_debug_info: return - self._emitline('Decoded dump of debug contents of section %s:\n' % self._dwarfinfo.debug_line_sec.name) + self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_line_sec.name) + self._emitline() for cu in self._dwarfinfo.iter_CUs(): lineprogram = self._dwarfinfo.line_program_for_CU(cu) @@ -1121,7 +1132,7 @@ class ReadElf(object): cu_filename = '%s/%s' % (bytes2str(dir), cu_filename) self._emitline('CU: %s:' % cu_filename) - self._emitline('File name Line number Starting address') + self._emitline('File name Line number Starting address Stmt') # Print each state's file, line and address information. For some # instructions other output is needed to be compatible with @@ -1143,23 +1154,19 @@ class ReadElf(object): elif entry.command == DW_LNE_define_file: self._emitline('%s:' % ( bytes2str(lineprogram['include_directory'][entry.args[0].dir_index]))) - elif not state.end_sequence: - # readelf doesn't print the state after end_sequence - # instructions. I think it's a bug but to be compatible - # I don't print them too. - if lineprogram['version'] < 4 or self.elffile['e_machine'] == 'EM_PPC64': - self._emitline('%-35s %11d %18s' % ( - bytes2str(lineprogram['file_entry'][state.file - 1].name), - state.line, - '0' if state.address == 0 else - self._format_hex(state.address))) - else: - self._emitline('%-35s %11d %18s[%d]' % ( - bytes2str(lineprogram['file_entry'][state.file - 1].name), - state.line, - '0' if state.address == 0 else - self._format_hex(state.address), - state.op_index)) + elif lineprogram['version'] < 4 or self.elffile['e_machine'] == 'EM_PPC64': + self._emitline('%-35s %11s %18s %s' % ( + bytes2str(lineprogram['file_entry'][state.file - 1].name), + state.line if not state.end_sequence else '-', + '0' if state.address == 0 else self._format_hex(state.address), + 'x' if state.is_stmt and not state.end_sequence else '')) + else: + self._emitline('%-35s %11d %18s[%d] %s' % ( + bytes2str(lineprogram['file_entry'][state.file - 1].name), + state.line if not state.end_sequence else '-', + '0' if state.address == 0 else self._format_hex(state.address), + state.op_index, + 'x' if state.is_stmt and not state.end_sequence else '')) if entry.command == DW_LNS_copy: # Another readelf oddity... self._emitline() diff --git a/test/external_tools/readelf b/test/external_tools/readelf index c467972..96f85d8 100755 Binary files a/test/external_tools/readelf and b/test/external_tools/readelf differ diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 96447b4..59a039c 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -130,15 +130,16 @@ def compare_output(s1, s2): lines1 = prepare_lines(s1) lines2 = prepare_lines(s2) - flag_after_symtable = False + flag_in_debug_line_section = False if len(lines1) != len(lines2): return False, 'Number of lines different: %s vs %s' % ( len(lines1), len(lines2)) for i in range(len(lines1)): - if 'symbol table' in lines1[i]: - flag_after_symtable = True + if lines1[i].endswith('debug_line section:'): + # .debug_line or .zdebug_line + flag_in_debug_line_section = True # readelf spelling error for GNU property notes lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type') @@ -162,12 +163,32 @@ def compare_output(s1, s2): sm = SequenceMatcher() sm.set_seqs(lines1[i], lines2[i]) changes = sm.get_opcodes() - if flag_after_symtable: - # Detect readelf's adding @ with lib and version after - # symbol name. - if ( len(changes) == 2 and changes[1][0] == 'delete' and - lines1[i][changes[1][1]] == '@'): + if flag_in_debug_line_section: + # readelf outputs an additional "View" column: ignore it + if len(lines1_parts) >= 2 and lines1_parts[-2] == 'view': ok = True + else: + # Fast check special-cased for the only ELF we have which + # has this information (dwarf_gnuops4.so.elf) + ok = ( lines1_parts[-2:] == ['1', 'x'] + and lines2_parts[-1] == 'x') + elif '[...]' in lines1[i]: + # Special case truncations with ellipsis like these: + # .note.gnu.bu[...] redelf + # .note.gnu.build-i pyelftools + # Or more complex for symbols with versions, like these: + # _unw[...]@gcc_3.0 readelf + # _unwind_resume@gcc_3.0 pyelftools + for p1, p2 in zip(lines1_parts, lines2_parts): + dots_start = p1.find('[...]') + if dots_start != -1: + break + ok = p1.endswith('[...]') and p1[:dots_start] == p2[:dots_start] + if not ok: + dots_end = dots_start + 5 + if len(p1) > dots_end and p1[dots_end] == '@': + ok = ( p1[:dots_start] == p2[:dots_start] + and p1[p1.rfind('@'):] == p2[p2.rfind('@'):]) elif 'at_const_value' in lines1[i]: # On 32-bit machines, readelf doesn't correctly represent # some boundary LEB128 numbers @@ -178,17 +199,11 @@ def compare_output(s1, s2): elif 'os/abi' in lines1[i]: if 'unix - gnu' in lines1[i] and 'unix - linux' in lines2[i]: ok = True - elif ( 'unknown at value' in lines1[i] and - 'dw_at_apple' in lines2[i]): - ok = True - elif 'loos+0x474e553' in lines1[i]: - # readelf v2.29 does not know about PT_GNU_PROPERTY apparently - ok = lines2_parts[0] == 'gnu_property' elif len(lines1_parts) == 3 and lines1_parts[2] == 'nt_gnu_property_type_0': # readelf does not seem to print a readable description for this ok = lines1_parts == lines2_parts[:3] else: - for s in ('t (tls)', 'l (large)'): + for s in ('t (tls)', 'l (large)', 'd (mbind)'): if s in lines1[i] or s in lines2[i]: ok = True break