Fix formatting of some dynamic tag fields to match readelf
[pyelftools.git] / scripts / readelf.py
index a7296f52feee3bff039b83ed2a06da7b0ef9fdb8..c1ed3fc8d2146bd1610b7366b64d76436f7b3464 100755 (executable)
@@ -9,13 +9,10 @@
 #-------------------------------------------------------------------------------
 import os, sys
 from optparse import OptionParser
-from itertools import ifilter
 import string
 
-
 # If elftools is not installed, maybe we're running from the root or scripts
 # dir of the source distribution
-#
 try:
     import elftools
 except ImportError:
@@ -23,7 +20,11 @@ except ImportError:
 
 from elftools import __version__
 from elftools.common.exceptions import ELFError
+from elftools.common.py3compat import (
+        ifilter, byte2int, bytes2str, itervalues, str2bytes)
 from elftools.elf.elffile import ELFFile
+from elftools.elf.dynamic import DynamicSection, DynamicSegment
+from elftools.elf.enums import ENUM_D_TAG
 from elftools.elf.segments import InterpSegment
 from elftools.elf.sections import SymbolTableSection
 from elftools.elf.relocation import RelocationSection
@@ -33,7 +34,7 @@ from elftools.elf.descriptions import (
     describe_e_version_numeric, describe_p_type, describe_p_flags,
     describe_sh_type, describe_sh_flags,
     describe_symbol_type, describe_symbol_bind, describe_symbol_visibility,
-    describe_symbol_shndx, describe_reloc_type,
+    describe_symbol_shndx, describe_reloc_type, describe_dyn_tag,
     )
 from elftools.dwarf.dwarfinfo import DWARFInfo
 from elftools.dwarf.descriptions import (
@@ -50,15 +51,15 @@ class ReadElf(object):
     """ display_* methods are used to emit output into the output stream
     """
     def __init__(self, file, output):
-        """ file: 
+        """ file:
                 stream object with the ELF file to read
-            
+
             output:
                 output stream to write to
         """
         self.elffile = ELFFile(file)
         self.output = output
-        
+
         # Lazily initialized if a debug dump is requested
         self._dwarfinfo = None
 
@@ -67,35 +68,35 @@ class ReadElf(object):
         """
         self._emitline('ELF Header:')
         self._emit('  Magic:   ')
-        self._emitline(' '.join('%2.2x' % ord(b) 
+        self._emitline(' '.join('%2.2x' % byte2int(b)
                                     for b in self.elffile.e_ident_raw))
         header = self.elffile.header
         e_ident = header['e_ident']
-        self._emitline('  Class:                             %s' % 
+        self._emitline('  Class:                             %s' %
                 describe_ei_class(e_ident['EI_CLASS']))
-        self._emitline('  Data:                              %s' % 
+        self._emitline('  Data:                              %s' %
                 describe_ei_data(e_ident['EI_DATA']))
-        self._emitline('  Version:                           %s' % 
+        self._emitline('  Version:                           %s' %
                 describe_ei_version(e_ident['EI_VERSION']))
         self._emitline('  OS/ABI:                            %s' %
                 describe_ei_osabi(e_ident['EI_OSABI']))
-        self._emitline('  ABI Version:                       %d' % 
+        self._emitline('  ABI Version:                       %d' %
                 e_ident['EI_ABIVERSION'])
         self._emitline('  Type:                              %s' %
                 describe_e_type(header['e_type']))
-        self._emitline('  Machine:                           %s' % 
+        self._emitline('  Machine:                           %s' %
                 describe_e_machine(header['e_machine']))
         self._emitline('  Version:                           %s' %
                 describe_e_version_numeric(header['e_version']))
-        self._emitline('  Entry point address:               %s' % 
+        self._emitline('  Entry point address:               %s' %
                 self._format_hex(header['e_entry']))
-        self._emit('  Start of program headers:          %s' % 
+        self._emit('  Start of program headers:          %s' %
                 header['e_phoff'])
         self._emitline(' (bytes into file)')
-        self._emit('  Start of section headers:          %s' % 
+        self._emit('  Start of section headers:          %s' %
                 header['e_shoff'])
         self._emitline(' (bytes into file)')
-        self._emitline('  Flags:                             %s' % 
+        self._emitline('  Flags:                             %s' %
                 self._format_hex(header['e_flags']))
         self._emitline('  Size of this header:               %s (bytes)' %
                 header['e_ehsize'])
@@ -174,14 +175,14 @@ class ReadElf(object):
                     self._format_hex(segment['p_align'], lead0x=False)))
 
             if isinstance(segment, InterpSegment):
-                self._emitline('      [Requesting program interpreter: %s]' % 
-                    segment.get_interp_name())
+                self._emitline('      [Requesting program interpreter: %s]' %
+                    bytes2str(segment.get_interp_name()))
 
         # Sections to segments mapping
         #
         if self.elffile.num_sections() == 0:
             # No sections? We're done
-            return 
+            return
 
         self._emitline('\n Section to Segment mapping:')
         self._emitline('  Segment Sections...')
@@ -190,9 +191,9 @@ class ReadElf(object):
             self._emit('   %2.2d     ' % nseg)
 
             for section in self.elffile.iter_sections():
-                if (    not section.is_null() and 
+                if (    not section.is_null() and
                         segment.section_in_segment(section)):
-                    self._emit('%s ' % section.name)
+                    self._emit('%s ' % bytes2str(section.name))
 
             self._emitline('')
 
@@ -219,7 +220,7 @@ class ReadElf(object):
         #
         for nsec, section in enumerate(self.elffile.iter_sections()):
             self._emit('  [%2u] %-17.17s %-15.15s ' % (
-                nsec, section.name, describe_sh_type(section['sh_type'])))
+                nsec, bytes2str(section.name), describe_sh_type(section['sh_type'])))
 
             if self.elffile.elfclass == 32:
                 self._emitline('%s %s %s %s %3s %2s %3s %2s' % (
@@ -261,11 +262,11 @@ class ReadElf(object):
 
             if section['sh_entsize'] == 0:
                 self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % (
-                    section.name))
+                    bytes2str(section.name)))
                 continue
 
             self._emitline("\nSymbol table '%s' contains %s entries:" % (
-                section.name, section.num_symbols()))
+                bytes2str(section.name), section.num_symbols()))
 
             if self.elffile.elfclass == 32:
                 self._emitline('   Num:    Value  Size Type    Bind   Vis      Ndx Name')
@@ -282,8 +283,51 @@ class ReadElf(object):
                     describe_symbol_bind(symbol['st_info']['bind']),
                     describe_symbol_visibility(symbol['st_other']['visibility']),
                     describe_symbol_shndx(symbol['st_shndx']),
-                    symbol.name))
-        
+                    bytes2str(symbol.name)))
+
+    def display_dynamic_tags(self):
+        """ Display the dynamic tags contained in the file
+        """
+        for section in self.elffile.iter_sections():
+            if not isinstance(section, DynamicSection):
+                continue
+
+            self._emitline("\nDynamic section at offset %s contains %s entries:" % (
+                self._format_hex(section['sh_offset']),
+                section.num_tags()))
+            self._emitline("  Tag        Type                         Name/Value")
+
+            padding = 20 + (8 if self.elffile.elfclass == 32 else 0)
+            for tag in section.iter_tags():
+                if tag.entry.d_tag == 'DT_NEEDED':
+                    parsed = 'Shared library: [%s]' % tag.needed
+                elif tag.entry.d_tag == 'DT_RPATH':
+                    parsed = 'Library rpath: [%s]' % tag.rpath
+                elif tag.entry.d_tag == 'DT_RUNPATH':
+                    parsed = 'Library runpath: [%s]' % tag.runpath
+                elif tag.entry.d_tag == 'DT_SONAME':
+                    parsed = 'Library soname: [%s]' % tag.soname
+                elif (tag.entry.d_tag.endswith('SZ') or
+                      tag.entry.d_tag.endswith('ENT')):
+                    parsed = '%i (bytes)' % tag['d_val']
+                elif (tag.entry.d_tag.endswith('NUM') or
+                      tag.entry.d_tag.endswith('COUNT')):
+                    parsed = '%i' % tag['d_val']
+                elif tag.entry.d_tag == 'DT_PLTREL':
+                    s = describe_dyn_tag(tag.entry.d_val)
+                    if s.startswith('DT_'):
+                        s = s[3:]
+                    parsed = '%s' % s
+                else:
+                    parsed = '%#x' % tag['d_val']
+
+                self._emitline(" %s %-*s %s" % (
+                    self._format_hex(ENUM_D_TAG.get(tag.entry.d_tag, tag.entry.d_tag),
+                        fullhex=True, lead0x=True),
+                    padding,
+                    '(%s)' % (tag.entry.d_tag[3:],),
+                    parsed))
+
     def display_relocations(self):
         """ Display the relocations contained in the file
         """
@@ -294,7 +338,7 @@ class ReadElf(object):
 
             has_relocation_sections = True
             self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % (
-                section.name,
+                bytes2str(section.name),
                 self._format_hex(section['sh_offset']),
                 section.num_relocations()))
             if section.is_RELA():
@@ -308,9 +352,9 @@ class ReadElf(object):
             for rel in section.iter_relocations():
                 hexwidth = 8 if self.elffile.elfclass == 32 else 12
                 self._emit('%s  %s %-17.17s' % (
-                    self._format_hex(rel['r_offset'], 
+                    self._format_hex(rel['r_offset'],
                         fieldsize=hexwidth, lead0x=False),
-                    self._format_hex(rel['r_info'], 
+                    self._format_hex(rel['r_info'],
                         fieldsize=hexwidth, lead0x=False),
                     describe_reloc_type(
                         rel['r_info_type'], self.elffile)))
@@ -332,7 +376,7 @@ class ReadElf(object):
                         symbol['st_value'],
                         fullhex=True, lead0x=False),
                     '  ' if self.elffile.elfclass == 32 else '',
-                    symbol_name))
+                    bytes2str(symbol_name)))
                 if section.is_RELA():
                     self._emit(' %s %x' % (
                         '+' if rel['r_addend'] >= 0 else '-',
@@ -341,7 +385,7 @@ class ReadElf(object):
 
         if not has_relocation_sections:
             self._emitline('\nThere are no relocations in this file.')
-        
+
     def display_hex_dump(self, section_spec):
         """ Display a hex dump of a section. section_spec is either a section
             number or a name.
@@ -352,7 +396,7 @@ class ReadElf(object):
                 section_spec))
             return
 
-        self._emitline("\nHex dump of section '%s':" % section.name)
+        self._emitline("\nHex dump of section '%s':" % bytes2str(section.name))
         self._note_relocs_for_section(section)
         addr = section['sh_addr']
         data = section.data()
@@ -366,18 +410,18 @@ class ReadElf(object):
             self._emit('  %s ' % self._format_hex(addr, fieldsize=8))
             for i in range(16):
                 if i < linebytes:
-                    self._emit('%2.2x' % ord(data[dataptr + i]))
+                    self._emit('%2.2x' % byte2int(data[dataptr + i]))
                 else:
                     self._emit('  ')
                 if i % 4 == 3:
                     self._emit(' ')
 
             for i in range(linebytes):
-                c = data[dataptr + i]
-                if c >= ' ' and ord(c) < 0x7f:
-                    self._emit(c)
+                c = data[dataptr + i : dataptr + i + 1]
+                if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f:
+                    self._emit(bytes2str(c))
                 else:
-                    self._emit('.')
+                    self._emit(bytes2str(b'.'))
 
             self._emitline()
             addr += linebytes
@@ -395,27 +439,27 @@ class ReadElf(object):
                 section_spec))
             return
 
-        printables = set(string.printable)
-        self._emitline("\nString dump of section '%s':" % section.name)
+        self._emitline("\nString dump of section '%s':" % bytes2str(section.name))
 
         found = False
         data = section.data()
         dataptr = 0
 
         while dataptr < len(data):
-            while dataptr < len(data) and data[dataptr] not in printables:
+            while ( dataptr < len(data) and
+                    not (32 <= byte2int(data[dataptr]) <= 127)):
                 dataptr += 1
 
             if dataptr >= len(data):
                 break
 
             endptr = dataptr
-            while endptr < len(data) and data[endptr] != '\x00':
+            while endptr < len(data) and byte2int(data[endptr]) != 0:
                 endptr += 1
 
             found = True
             self._emitline('  [%6x]  %s' % (
-                dataptr, data[dataptr:endptr]))
+                dataptr, bytes2str(data[dataptr:endptr])))
 
             dataptr = endptr
 
@@ -430,7 +474,7 @@ class ReadElf(object):
         self._init_dwarfinfo()
         if self._dwarfinfo is None:
             return
-        
+
         set_global_machine_arch(self.elffile.get_machine_arch())
 
         if dump_what == 'info':
@@ -454,7 +498,7 @@ class ReadElf(object):
                 If None, the minimal required field size will be used.
 
             fullhex:
-                If True, override fieldsize to set it to the maximal size 
+                If True, override fieldsize to set it to the maximal size
                 needed for the elfclass
 
             lead0x:
@@ -468,7 +512,7 @@ class ReadElf(object):
         else:
             field = '%' + '0%sx' % fieldsize
         return s + field % addr
-        
+
     def _section_from_spec(self, spec):
         """ Retrieve a section given a "spec" (either number or name).
             Return None if no such section exists in the file.
@@ -476,12 +520,12 @@ class ReadElf(object):
         try:
             num = int(spec)
             if num < self.elffile.num_sections():
-                return self.elffile.get_section(num) 
+                return self.elffile.get_section(num)
             else:
                 return None
         except ValueError:
             # Not a number. Must be a name then
-            return self.elffile.get_section_by_name(spec)
+            return self.elffile.get_section_by_name(str2bytes(spec))
 
     def _note_relocs_for_section(self, section):
         """ If there are relocation sections pointing to the givne section,
@@ -501,7 +545,7 @@ class ReadElf(object):
         """
         if self._dwarfinfo is not None:
             return
-        
+
         if self.elffile.has_dwarf_info():
             self._dwarfinfo = self.elffile.get_dwarf_info()
         else:
@@ -511,7 +555,7 @@ class ReadElf(object):
         """ Dump the debugging info section.
         """
         self._emitline('Contents of the .debug_info section:\n')
-        
+
         # Offset of the .debug_info section in the stream
         section_offset = self._dwarfinfo.debug_info_sec.global_offset
 
@@ -522,10 +566,11 @@ class ReadElf(object):
                 self._format_hex(cu['unit_length']),
                 '%s-bit' % cu.dwarf_format()))
             self._emitline('   Version:       %s' % cu['version']),
-            self._emitline('   Abbrev Offset: %s' % cu['debug_abbrev_offset']),
+            self._emitline('   Abbrev Offset: %s' % (
+                self._format_hex(cu['debug_abbrev_offset']))),
             self._emitline('   Pointer Size:  %s' % cu['address_size'])
-            
-            # The nesting depth of each DIE within the tree of DIEs must be 
+
+            # The nesting depth of each DIE within the tree of DIEs must be
             # displayed. To implement this, a counter is incremented each time
             # the current DIE has children, and decremented when a null die is
             # encountered. Due to the way the DIE tree is serialized, this will
@@ -533,25 +578,29 @@ class ReadElf(object):
             #
             die_depth = 0
             for die in cu.iter_DIEs():
-                if die.is_null():
-                    die_depth -= 1
-                    continue
-                self._emitline(' <%s><%x>: Abbrev Number: %s (%s)' % (
+                self._emitline(' <%s><%x>: Abbrev Number: %s%s' % (
                     die_depth,
                     die.offset,
                     die.abbrev_code,
-                    die.tag))
-                
-                for attr in die.attributes.itervalues():
+                    (' (%s)' % die.tag) if not die.is_null() else ''))
+                if die.is_null():
+                    die_depth -= 1
+                    continue
+
+                for attr in itervalues(die.attributes):
+                    name = attr.name
+                    # Unknown attribute values are passed-through as integers
+                    if isinstance(name, int):
+                        name = 'Unknown AT value: %x' % name
                     self._emitline('    <%2x>   %-18s: %s' % (
                         attr.offset,
-                        attr.name,
+                        name,
                         describe_attr_value(
                             attr, die, section_offset)))
-                
+
                 if die.has_children:
                     die_depth += 1
-                    
+
         self._emitline()
 
     def _dump_debug_line_programs(self):
@@ -563,13 +612,14 @@ class ReadElf(object):
         for cu in self._dwarfinfo.iter_CUs():
             lineprogram = self._dwarfinfo.line_program_for_CU(cu)
 
-            cu_filename = ''
+            cu_filename = bytes2str(lineprogram['file_entry'][0].name)
             if len(lineprogram['include_directory']) > 0:
-                cu_filename = '%s/%s' % (
-                    lineprogram['include_directory'][0],
-                    lineprogram['file_entry'][0].name)
-            else:
-                cu_filename = lineprogram['file_entry'][0].name
+                dir_index = lineprogram['file_entry'][0].dir_index
+                if dir_index > 0:
+                    dir = lineprogram['include_directory'][dir_index - 1]
+                else:
+                    dir = '.'
+                cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
 
             self._emitline('CU: %s:' % cu_filename)
             self._emitline('File name                            Line number    Starting address')
@@ -586,22 +636,22 @@ class ReadElf(object):
                         if file_entry.dir_index == 0:
                             # current directory
                             self._emitline('\n./%s:[++]' % (
-                                file_entry.name))
+                                bytes2str(file_entry.name)))
                         else:
                             self._emitline('\n%s/%s:' % (
-                                lineprogram['include_directory'][file_entry.dir_index - 1],
-                                file_entry.name))
+                                bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]),
+                                bytes2str(file_entry.name)))
                     elif entry.command == DW_LNE_define_file:
                         self._emitline('%s:' % (
-                            lineprogram['include_directory'][entry.args[0].dir_index]))
+                            bytes2str(lineprogram['include_directory'][entry.args[0].dir_index])))
                 elif not state.end_sequence:
                     # readelf doesn't print the state after end_sequence
                     # instructions. I think it's a bug but to be compatible
                     # I don't print them too.
                     self._emitline('%-35s  %11d  %18s' % (
-                        lineprogram['file_entry'][state.file - 1].name,
+                        bytes2str(lineprogram['file_entry'][state.file - 1].name),
                         state.line,
-                        '0' if state.address == 0 else 
+                        '0' if state.address == 0 else
                                self._format_hex(state.address)))
                 if entry.command == DW_LNS_copy:
                     # Another readelf oddity...
@@ -619,7 +669,7 @@ class ReadElf(object):
                 self._emitline('\n%08x %08x %08x CIE' % (
                     entry.offset, entry['length'], entry['CIE_id']))
                 self._emitline('  Version:               %d' % entry['version'])
-                self._emitline('  Augmentation:          "%s"' % entry['augmentation'])
+                self._emitline('  Augmentation:          "%s"' % bytes2str(entry['augmentation']))
                 self._emitline('  Code alignment factor: %u' % entry['code_alignment_factor'])
                 self._emitline('  Data alignment factor: %d' % entry['data_alignment_factor'])
                 self._emitline('  Return address column: %d' % entry['return_address_register'])
@@ -650,7 +700,7 @@ class ReadElf(object):
                     entry.offset,
                     entry['length'],
                     entry['CIE_id'],
-                    entry['augmentation'],
+                    bytes2str(entry['augmentation']),
                     entry['code_alignment_factor'],
                     entry['data_alignment_factor'],
                     entry['return_address_register']))
@@ -676,14 +726,14 @@ class ReadElf(object):
             # ra_regnum is always listed last with a special heading.
             decoded_table = entry.get_decoded()
             reg_order = sorted(ifilter(
-                lambda r: r != ra_regnum, 
+                lambda r: r != ra_regnum,
                 decoded_table.reg_order))
 
             # Headings for the registers
             for regnum in reg_order:
                 self._emit('%-6s' % describe_reg_name(regnum))
             self._emitline('ra      ')
-            
+
             # Now include ra_regnum in reg_order to print its values similarly
             # to the other registers.
             reg_order.append(ra_regnum)
@@ -724,6 +774,9 @@ def main(stream=None):
             add_help_option=False, # -h is a real option of readelf
             prog='readelf.py',
             version=VERSION_STRING)
+    optparser.add_option('-d', '--dynamic',
+            action='store_true', dest='show_dynamic_tags',
+            help='Display the dynamic section')
     optparser.add_option('-H', '--help',
             action='store_true', dest='help',
             help='Display this information')
@@ -752,8 +805,10 @@ def main(stream=None):
             action='store', dest='show_string_dump', metavar='<number|name>',
             help='Dump the contents of section <number|name> as strings')
     optparser.add_option('--debug-dump',
-            action='store', dest='debug_dump_what', metavar='<section>',
-            help='Display the contents of DWARF debug sections')
+            action='store', dest='debug_dump_what', metavar='<what>',
+            help=(
+                'Display the contents of DWARF debug sections. <what> can ' +
+                'one of {info,decodedline,frames,frames-interp}'))
 
     options, args = optparser.parse_args()
 
@@ -779,6 +834,8 @@ def main(stream=None):
             if do_program_header:
                 readelf.display_program_headers(
                         show_heading=not do_file_header)
+            if options.show_dynamic_tags:
+                readelf.display_dynamic_tags()
             if options.show_symbols:
                 readelf.display_symbol_tables()
             if options.show_relocs: