From: Seva Alekseyev Date: Tue, 22 Aug 2023 12:26:05 +0000 (-0400) Subject: readelf 2.41 with Ranges test excluded on 2 files (#489) X-Git-Tag: v0.30~6 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=268d1fb454d39ae826ea619fa5e6d6b3e260fa04;p=pyelftools.git readelf 2.41 with Ranges test excluded on 2 files (#489) * readelf 2.41 with Ranges test excluded * Ranges test reinstated except two binaries --------- Co-authored-by: Seva --- diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 7db7f3f..0b409c7 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -183,34 +183,37 @@ def describe_form_class(form): # _MACHINE_ARCH = None +# Implements the alternative format of readelf: lowercase hex, prefixed with 0x unless 0 +def _format_hex(n): + return '0x%x' % n if n != 0 else '0' def _describe_attr_ref(attr, die, section_offset): - return '<0x%x>' % (attr.value + die.cu.cu_offset) + return '<%s>' % _format_hex(attr.value + die.cu.cu_offset) def _describe_attr_ref_sig8(attr, die, section_offset): - return 'signature: 0x%x' % (attr.value) + return 'signature: %s' % _format_hex(attr.value) def _describe_attr_value_passthrough(attr, die, section_offset): return attr.value def _describe_attr_hex(attr, die, section_offset): - return '0x%x' % (attr.value) + return '%s' % _format_hex(attr.value) def _describe_attr_hex_addr(attr, die, section_offset): - return '<0x%x>' % (attr.value) + return '<%s>' % _format_hex(attr.value) def _describe_attr_split_64bit(attr, die, section_offset): low_word = attr.value & 0xFFFFFFFF high_word = (attr.value >> 32) & 0xFFFFFFFF - return '0x%x 0x%x' % (low_word, high_word) + return '%s %s' % (_format_hex(low_word), _format_hex(high_word)) def _describe_attr_strp(attr, die, section_offset): - return '(indirect string, offset: 0x%x): %s' % ( - attr.raw_value, bytes2str(attr.value)) + return '(indirect string, offset: %s): %s' % ( + _format_hex(attr.raw_value), bytes2str(attr.value)) def _describe_attr_line_strp(attr, die, section_offset): - return '(indirect line string, offset: 0x%x): %s' % ( - attr.raw_value, bytes2str(attr.value)) + return '(indirect line string, offset: %s): %s' % ( + _format_hex(attr.raw_value), bytes2str(attr.value)) def _describe_attr_string(attr, die, section_offset): return bytes2str(attr.value) diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index 85fc593..43ab293 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -114,14 +114,21 @@ class RangeLists(object): return struct_parse(self.structs.Dwarf_rnglists_entries, self.stream, offset) def iter_range_lists(self): - """ Yield all range lists found in the section according to readelf rules. + """ Yields all range lists found in the section according to readelf rules. Scans the DIEs for rangelist offsets, then pulls those. + Returned rangelists are always translated into lists of BaseAddressEntry/RangeEntry objects. """ - # Calling parse until the stream ends is wrong, because ranges can overlap. - # Need to scan the DIEs to know all range locations + # Rangelists can overlap. That is, one DIE points at the rangelist beginning, and another + # points at the middle of the same. Therefore, enumerating them is not a well defined + # operation - do you count those as two different (but overlapping) ones, or as a single one? + # For debugging utility, you want two. That's what readelf does. For faithfully + # representing the section contents, you want one. + # That was the behaviour of pyelftools 0.28 and below - calling + # parse until the stream end. Leaving aside the question of correctless, + # that's uncompatible with readelf. - # This maps list offset to CU ver5 = self.version >= 5 + # This maps list offset to CU cu_map = {die.attributes['DW_AT_ranges'].value : cu for cu in self._dwarfinfo.iter_CUs() for die in cu.iter_DIEs() @@ -147,8 +154,13 @@ class RangeLists(object): stream = self.stream stream.seek(cu.offset_table_offset + (64 if cu.is64 else 32) * cu.offset_count) while stream.tell() < cu.offset_after_length + cu.unit_length: - yield struct_parse(self.structs.Dwarf_rnglists_entries, stream); + yield struct_parse(self.structs.Dwarf_rnglists_entries, stream) + def translate_v5_entry(self, entry, cu): + """Translates entries in a DWARFv5 rangelist from raw parsed format to + a list of BaseAddressEntry/RangeEntry, using the CU + """ + return entry_translate[entry.entry_type](entry, cu) #------ PRIVATE ------# diff --git a/scripts/readelf.py b/scripts/readelf.py index 14e29ae..a58085b 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -949,7 +949,8 @@ class ReadElf(object): lead0x = False else: lead0x = True - fieldsize -= 2 + if fieldsize is not None: + fieldsize -= 2 s = '0x' if lead0x else '' if fullhex: @@ -980,7 +981,7 @@ class ReadElf(object): self._format_hex( version_section['sh_addr'], fieldsize=16, lead0x=lead0x), self._format_hex( - version_section['sh_offset'], fieldsize=6, lead0x=True), + version_section['sh_offset'], fieldsize=8, lead0x=True), version_section['sh_link'], self.elffile.get_section(version_section['sh_link']).name ) @@ -1117,7 +1118,7 @@ class ReadElf(object): for cu in self._dwarfinfo.iter_CUs(): self._emitline(' Compilation Unit @ offset %s:' % - self._format_hex(cu.cu_offset)) + self._format_hex(cu.cu_offset, alternate=True)) self._emitline(' Length: %s (%s)' % ( self._format_hex(cu['unit_length']), '%s-bit' % cu.dwarf_format())) @@ -1128,7 +1129,7 @@ class ReadElf(object): self._emitline(' Unit Type: %s (%d)' % ( unit_type, ENUM_DW_UT.get(cu.header.unit_type, 0))) self._emitline(' Abbrev Offset: %s' % ( - self._format_hex(cu['debug_abbrev_offset']))) + self._format_hex(cu['debug_abbrev_offset'], alternate=True))) self._emitline(' Pointer Size: %s' % cu['address_size']) if unit_type in ('DW_UT_skeleton', 'DW_UT_split_compile'): self._emitline(' Dwo id: %s' % cu['dwo_id']) @@ -1137,7 +1138,7 @@ class ReadElf(object): self._emitline(' Type Offset: 0x%x' % cu['type_offset']) else: self._emitline(' Abbrev Offset: %s' % ( - self._format_hex(cu['debug_abbrev_offset']))), + self._format_hex(cu['debug_abbrev_offset'], alternate=True))), self._emitline(' Pointer Size: %s' % cu['address_size']) # The nesting depth of each DIE within the tree of DIEs must be @@ -1627,33 +1628,54 @@ class ReadElf(object): self._dump_debug_rangesection(di, range_lists_sec) def _dump_debug_rangesection(self, di, range_lists_sec): - # In the master branch of binutils, the v5 dump format is way different by now. - + # Last amended to match readelf 2.41 ver5 = range_lists_sec.version >= 5 section_name = (di.debug_rnglists_sec if ver5 else di.debug_ranges_sec).name addr_size = di.config.default_address_size # In bytes, 4 or 8 addr_width = addr_size * 2 # In hex digits, 8 or 16 line_template = " %%08x %%0%dx %%0%dx %%s" % (addr_width, addr_width) - base_template = " %%08x %%0%dx (base address)" % (addr_width) - - range_lists = list(range_lists_sec.iter_range_lists()) - if len(range_lists) == 0: - # Present but empty locations section - readelf outputs a message - self._emitline("\nSection '%s' has no debugging data." % section_name) - return + base_template = " %%08x %%0%dx (base address)" % (addr_width) # In order to determine the base address of the range # We need to know the corresponding CU. cu_map = {die.attributes['DW_AT_ranges'].value : cu # Range list offset => CU for cu in di.iter_CUs() for die in cu.iter_DIEs() - if 'DW_AT_ranges' in die.attributes} - - self._emitline('Contents of the %s section:\n' % section_name) - self._emitline(' Offset Begin End') - - for range_list in range_lists: - self._dump_rangelist(range_list, cu_map, ver5, line_template, base_template) + if 'DW_AT_ranges' in die.attributes} + + if ver5: # Dump by CUs - unsure at this point what does readelf do, ranges dump is buggy in 2.41 + self._emitline('Contents of the %s section:\n\n\n' % section_name) + for cu in range_lists_sec.iter_CUs(): + self._emitline(' Table at Offset: %s:' % self._format_hex(cu.cu_offset, alternate=True)) + self._emitline(' Length: %s' % self._format_hex(cu.unit_length, alternate=True)) + self._emitline(' DWARF version: %d' % cu.version) + self._emitline(' Address size: %d' % cu.address_size) + self._emitline(' Segment size: %d' % cu.segment_selector_size) + self._emitline(' Offset entries: %d\n' % cu.offset_count) + # Is the offset table dumped too? + for (i, range_list) in enumerate(range_lists_sec.iter_CU_range_lists_ex(cu)): + list_offset = range_list[0].entry_offset + range_list = list(range_lists_sec.translate_v5_entry(entry, cu_map[list_offset]) for entry in range_list) + self._emitline(' Offset: %s, Index: %d' % (self._format_hex(list_offset, alternate=True), i)) + self._emitline(' Offset Begin End') + self._dump_rangelist(range_list, cu_map, ver5, line_template, base_template) + else: # Dump by DIE reference offset + range_lists = list(range_lists_sec.iter_range_lists()) + if len(range_lists) == 0: + # Present but empty ranges section - readelf outputs a message + self._emitline("\nSection '%s' has no debugging data." % section_name) + return + + self._emitline('Contents of the %s section:\n\n\n' % section_name) + self._emitline(' Offset Begin End') + + for range_list in range_lists: + if len(range_list) == 0: # working around a bogus behavior in readelf 2.41 + # No entries means no offset. Dirty hack: peek the stream position + range_list_offset = range_lists_sec.stream.tell() - self._dwarfinfo.config.default_address_size*2 + self._emitline(' %08x ' % (range_list_offset)) + else: + self._dump_rangelist(range_list, cu_map, ver5, line_template, base_template) def _dump_rangelist(self, range_list, cu_map, ver5, line_template, base_template): # Weird discrepancy in binutils: for DWARFv5 it outputs entry offset, diff --git a/test/external_tools/README.txt b/test/external_tools/README.txt index 968c677..87d0b17 100644 --- a/test/external_tools/README.txt +++ b/test/external_tools/README.txt @@ -4,7 +4,7 @@ readelf is built as follows: * From binutils Git: https://sourceware.org/git/binutils-gdb.git * git fetch --all --tags -* git co binutils--branch +* git checkout binutils--release * Run configure, then make * Built on a 64-bit Ubuntu machine diff --git a/test/external_tools/readelf b/test/external_tools/readelf index f7df456..e23d301 100755 Binary files a/test/external_tools/readelf and b/test/external_tools/readelf differ diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index c1fc48c..f94c42e 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -72,6 +72,11 @@ def run_test_on_file(filename, verbose=False, opt=None): else: options = [opt] + # TODO(sevaa): excluding two files from the --debug-dump=Ranges test until the maintainers + # of GNU binutils fix https://sourceware.org/bugzilla/show_bug.cgi?id=30781 + if filename.endswith('dwarf_test_versions_mix.elf') or filename.endswith('dwarf_v5ops.so.elf'): + options.remove('--debug-dump=Ranges') + for option in options: if verbose: testlog.info("..option='%s'" % option)