Fix Container's __pretty_str__ to use str instead of repr. This provides more consist...
[pyelftools.git] / scripts / readelf.py
index 4554ab0988300c5bf43f6dddef65d83299f381ec..6ecac7bd8a68f4db92cf5ac976ceeced8f70eda5 100755 (executable)
@@ -9,12 +9,11 @@
 #-------------------------------------------------------------------------------
 import os, sys
 from optparse import OptionParser
+from itertools import ifilter
 import string
 
-
 # If elftools is not installed, maybe we're running from the root or scripts
 # dir of the source distribution
-#
 try:
     import elftools
 except ImportError:
@@ -25,14 +24,24 @@ from elftools.common.exceptions import ELFError
 from elftools.elf.elffile import ELFFile
 from elftools.elf.segments import InterpSegment
 from elftools.elf.sections import SymbolTableSection
+from elftools.elf.relocation import RelocationSection
 from elftools.elf.descriptions import (
     describe_ei_class, describe_ei_data, describe_ei_version,
     describe_ei_osabi, describe_e_type, describe_e_machine,
     describe_e_version_numeric, describe_p_type, describe_p_flags,
     describe_sh_type, describe_sh_flags,
     describe_symbol_type, describe_symbol_bind, describe_symbol_visibility,
-    describe_symbol_shndx,
+    describe_symbol_shndx, describe_reloc_type,
+    )
+from elftools.dwarf.dwarfinfo import DWARFInfo
+from elftools.dwarf.descriptions import (
+    describe_reg_name, describe_attr_value, set_global_machine_arch,
+    describe_CFI_instructions, describe_CFI_register_rule,
+    describe_CFI_CFA_rule,
     )
+from elftools.dwarf.constants import (
+    DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file)
+from elftools.dwarf.callframe import CIE, FDE
 
 
 class ReadElf(object):
@@ -47,6 +56,9 @@ class ReadElf(object):
         """
         self.elffile = ELFFile(file)
         self.output = output
+        
+        # Lazily initialized if a debug dump is requested
+        self._dwarfinfo = None
 
     def display_file_header(self):
         """ Display the ELF file header
@@ -230,8 +242,12 @@ class ReadElf(object):
                     section['sh_addralign']))
 
         self._emitline('Key to Flags:')
-        self._emitline('  W (write), A (alloc), X (execute), M (merge), S (strings)')
-        self._emitline('  I (info), L (link order), G (group), x (unknown)')
+        self._emit('  W (write), A (alloc), X (execute), M (merge), S (strings)')
+        if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'):
+            self._emitline(', l (large)')
+        else:
+            self._emitline()
+        self._emitline('  I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)')
         self._emitline('  O (extra OS processing required) o (OS specific), p (processor specific)')
 
     def display_symbol_tables(self):
@@ -266,6 +282,64 @@ class ReadElf(object):
                     describe_symbol_shndx(symbol['st_shndx']),
                     symbol.name))
         
+    def display_relocations(self):
+        """ Display the relocations contained in the file
+        """
+        has_relocation_sections = False
+        for section in self.elffile.iter_sections():
+            if not isinstance(section, RelocationSection):
+                continue
+
+            has_relocation_sections = True
+            self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % (
+                section.name,
+                self._format_hex(section['sh_offset']),
+                section.num_relocations()))
+            if section.is_RELA():
+                self._emitline("  Offset          Info           Type           Sym. Value    Sym. Name + Addend")
+            else:
+                self._emitline(" Offset     Info    Type            Sym.Value  Sym. Name")
+
+            # The symbol table section pointed to in sh_link
+            symtable = self.elffile.get_section(section['sh_link'])
+
+            for rel in section.iter_relocations():
+                hexwidth = 8 if self.elffile.elfclass == 32 else 12
+                self._emit('%s  %s %-17.17s' % (
+                    self._format_hex(rel['r_offset'], 
+                        fieldsize=hexwidth, lead0x=False),
+                    self._format_hex(rel['r_info'], 
+                        fieldsize=hexwidth, lead0x=False),
+                    describe_reloc_type(
+                        rel['r_info_type'], self.elffile)))
+
+                if rel['r_info_sym'] == 0:
+                    self._emitline()
+                    continue
+
+                symbol = symtable.get_symbol(rel['r_info_sym'])
+                # Some symbols have zero 'st_name', so instead what's used is
+                # the name of the section they point at
+                if symbol['st_name'] == 0:
+                    symsec = self.elffile.get_section(symbol['st_shndx'])
+                    symbol_name = symsec.name
+                else:
+                    symbol_name = symbol.name
+                self._emit(' %s %s%22.22s' % (
+                    self._format_hex(
+                        symbol['st_value'],
+                        fullhex=True, lead0x=False),
+                    '  ' if self.elffile.elfclass == 32 else '',
+                    symbol_name))
+                if section.is_RELA():
+                    self._emit(' %s %x' % (
+                        '+' if rel['r_addend'] >= 0 else '-',
+                        abs(rel['r_addend'])))
+                self._emitline()
+
+        if not has_relocation_sections:
+            self._emitline('\nThere are no relocations in this file.')
+        
     def display_hex_dump(self, section_spec):
         """ Display a hex dump of a section. section_spec is either a section
             number or a name.
@@ -277,7 +351,7 @@ class ReadElf(object):
             return
 
         self._emitline("\nHex dump of section '%s':" % section.name)
-
+        self._note_relocs_for_section(section)
         addr = section['sh_addr']
         data = section.data()
         dataptr = 0
@@ -298,7 +372,7 @@ class ReadElf(object):
 
             for i in range(linebytes):
                 c = data[dataptr + i]
-                if c >= ' ' and ord(c) <= 0x7f:
+                if c >= ' ' and ord(c) < 0x7f:
                     self._emit(c)
                 else:
                     self._emit('.')
@@ -348,6 +422,26 @@ class ReadElf(object):
         else:
             self._emitline()
 
+    def display_debug_dump(self, dump_what):
+        """ Dump a DWARF section
+        """
+        self._init_dwarfinfo()
+        if self._dwarfinfo is None:
+            return
+        
+        set_global_machine_arch(self.elffile.get_machine_arch())
+
+        if dump_what == 'info':
+            self._dump_debug_info()
+        elif dump_what == 'decodedline':
+            self._dump_debug_line_programs()
+        elif dump_what == 'frames':
+            self._dump_debug_frames()
+        elif dump_what == 'frames-interp':
+            self._dump_debug_frames_interp()
+        else:
+            self._emitline('debug dump not yet supported for "%s"' % dump_what)
+
     def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True):
         """ Format an address into a hexadecimal string.
 
@@ -387,6 +481,228 @@ class ReadElf(object):
             # Not a number. Must be a name then
             return self.elffile.get_section_by_name(spec)
 
+    def _note_relocs_for_section(self, section):
+        """ If there are relocation sections pointing to the givne section,
+            emit a note about it.
+        """
+        for relsec in self.elffile.iter_sections():
+            if isinstance(relsec, RelocationSection):
+                info_idx = relsec['sh_info']
+                if self.elffile.get_section(info_idx) == section:
+                    self._emitline('  Note: This section has relocations against it, but these have NOT been applied to this dump.')
+                    return
+
+    def _init_dwarfinfo(self):
+        """ Initialize the DWARF info contained in the file and assign it to
+            self._dwarfinfo.
+            Leave self._dwarfinfo at None if no DWARF info was found in the file
+        """
+        if self._dwarfinfo is not None:
+            return
+        
+        if self.elffile.has_dwarf_info():
+            self._dwarfinfo = self.elffile.get_dwarf_info()
+        else:
+            self._dwarfinfo = None
+
+    def _dump_debug_info(self):
+        """ Dump the debugging info section.
+        """
+        self._emitline('Contents of the .debug_info section:\n')
+        
+        # Offset of the .debug_info section in the stream
+        section_offset = self._dwarfinfo.debug_info_sec.global_offset
+
+        for cu in self._dwarfinfo.iter_CUs():
+            self._emitline('  Compilation Unit @ offset %s:' %
+                self._format_hex(cu.cu_offset))
+            self._emitline('   Length:        %s (%s)' % (
+                self._format_hex(cu['unit_length']),
+                '%s-bit' % cu.dwarf_format()))
+            self._emitline('   Version:       %s' % cu['version']),
+            self._emitline('   Abbrev Offset: %s' % cu['debug_abbrev_offset']),
+            self._emitline('   Pointer Size:  %s' % cu['address_size'])
+            
+            # The nesting depth of each DIE within the tree of DIEs must be 
+            # displayed. To implement this, a counter is incremented each time
+            # the current DIE has children, and decremented when a null die is
+            # encountered. Due to the way the DIE tree is serialized, this will
+            # correctly reflect the nesting depth
+            #
+            die_depth = 0
+            for die in cu.iter_DIEs():
+                if die.is_null():
+                    die_depth -= 1
+                    continue
+                self._emitline(' <%s><%x>: Abbrev Number: %s (%s)' % (
+                    die_depth,
+                    die.offset,
+                    die.abbrev_code,
+                    die.tag))
+                
+                for attr in die.attributes.itervalues():
+                    name = attr.name
+                    # Unknown attribute values are passed-through as integers
+                    if isinstance(name, int):
+                        name = 'Unknown AT value: %x' % name
+                    self._emitline('    <%2x>   %-18s: %s' % (
+                        attr.offset,
+                        name,
+                        describe_attr_value(
+                            attr, die, section_offset)))
+                
+                if die.has_children:
+                    die_depth += 1
+                    
+        self._emitline()
+
+    def _dump_debug_line_programs(self):
+        """ Dump the (decoded) line programs from .debug_line
+            The programs are dumped in the order of the CUs they belong to.
+        """
+        self._emitline('Decoded dump of debug contents of section .debug_line:\n')
+
+        for cu in self._dwarfinfo.iter_CUs():
+            lineprogram = self._dwarfinfo.line_program_for_CU(cu)
+
+            cu_filename = ''
+            if len(lineprogram['include_directory']) > 0:
+                cu_filename = '%s/%s' % (
+                    lineprogram['include_directory'][0],
+                    lineprogram['file_entry'][0].name)
+            else:
+                cu_filename = lineprogram['file_entry'][0].name
+
+            self._emitline('CU: %s:' % cu_filename)
+            self._emitline('File name                            Line number    Starting address')
+
+            # Print each state's file, line and address information. For some
+            # instructions other output is needed to be compatible with
+            # readelf.
+            for entry in lineprogram.get_entries():
+                state = entry.state
+                if state is None:
+                    # Special handling for commands that don't set a new state
+                    if entry.command == DW_LNS_set_file:
+                        file_entry = lineprogram['file_entry'][entry.args[0] - 1]
+                        if file_entry.dir_index == 0:
+                            # current directory
+                            self._emitline('\n./%s:[++]' % (
+                                file_entry.name))
+                        else:
+                            self._emitline('\n%s/%s:' % (
+                                lineprogram['include_directory'][file_entry.dir_index - 1],
+                                file_entry.name))
+                    elif entry.command == DW_LNE_define_file:
+                        self._emitline('%s:' % (
+                            lineprogram['include_directory'][entry.args[0].dir_index]))
+                elif not state.end_sequence:
+                    # readelf doesn't print the state after end_sequence
+                    # instructions. I think it's a bug but to be compatible
+                    # I don't print them too.
+                    self._emitline('%-35s  %11d  %18s' % (
+                        lineprogram['file_entry'][state.file - 1].name,
+                        state.line,
+                        '0' if state.address == 0 else 
+                               self._format_hex(state.address)))
+                if entry.command == DW_LNS_copy:
+                    # Another readelf oddity...
+                    self._emitline()
+
+    def _dump_debug_frames(self):
+        """ Dump the raw frame information from .debug_frame
+        """
+        if not self._dwarfinfo.has_CFI():
+            return
+        self._emitline('Contents of the .debug_frame section:')
+
+        for entry in self._dwarfinfo.CFI_entries():
+            if isinstance(entry, CIE):
+                self._emitline('\n%08x %08x %08x CIE' % (
+                    entry.offset, entry['length'], entry['CIE_id']))
+                self._emitline('  Version:               %d' % entry['version'])
+                self._emitline('  Augmentation:          "%s"' % entry['augmentation'])
+                self._emitline('  Code alignment factor: %u' % entry['code_alignment_factor'])
+                self._emitline('  Data alignment factor: %d' % entry['data_alignment_factor'])
+                self._emitline('  Return address column: %d' % entry['return_address_register'])
+                self._emitline()
+            else: # FDE
+                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
+                    entry.offset,
+                    entry['length'],
+                    entry['CIE_pointer'],
+                    entry.cie.offset,
+                    entry['initial_location'],
+                    entry['initial_location'] + entry['address_range']))
+
+            self._emit(describe_CFI_instructions(entry))
+        self._emitline()
+
+    def _dump_debug_frames_interp(self):
+        """ Dump the interpreted (decoded) frame information from .debug_frame
+        """
+        if not self._dwarfinfo.has_CFI():
+            return
+
+        self._emitline('Contents of the .debug_frame section:')
+
+        for entry in self._dwarfinfo.CFI_entries():
+            if isinstance(entry, CIE):
+                self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % (
+                    entry.offset,
+                    entry['length'],
+                    entry['CIE_id'],
+                    entry['augmentation'],
+                    entry['code_alignment_factor'],
+                    entry['data_alignment_factor'],
+                    entry['return_address_register']))
+                ra_regnum = entry['return_address_register']
+            else: # FDE
+                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
+                    entry.offset,
+                    entry['length'],
+                    entry['CIE_pointer'],
+                    entry.cie.offset,
+                    entry['initial_location'],
+                    entry['initial_location'] + entry['address_range']))
+                ra_regnum = entry.cie['return_address_register']
+
+            # Print the heading row for the decoded table
+            self._emit('   LOC')
+            self._emit('  ' if entry.structs.address_size == 4 else '          ')
+            self._emit(' CFA      ')
+
+            # Decode the table nad look at the registers it describes.
+            # We build reg_order here to match readelf's order. In particular,
+            # registers are sorted by their number, and the register matching
+            # ra_regnum is always listed last with a special heading.
+            decoded_table = entry.get_decoded()
+            reg_order = sorted(ifilter(
+                lambda r: r != ra_regnum, 
+                decoded_table.reg_order))
+
+            # Headings for the registers
+            for regnum in reg_order:
+                self._emit('%-6s' % describe_reg_name(regnum))
+            self._emitline('ra      ')
+            
+            # Now include ra_regnum in reg_order to print its values similarly
+            # to the other registers.
+            reg_order.append(ra_regnum)
+            for line in decoded_table.table:
+                self._emit(self._format_hex(
+                    line['pc'], fullhex=True, lead0x=False))
+                self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa']))
+
+                for regnum in reg_order:
+                    if regnum in line:
+                        s = describe_CFI_register_rule(line[regnum])
+                    else:
+                        s = 'u'
+                    self._emit('%-6s' % s)
+                self._emitline()
+        self._emitline()
+
     def _emit(self, s=''):
         """ Emit an object to output
         """
@@ -402,7 +718,7 @@ SCRIPT_DESCRIPTION = 'Display information about the contents of ELF format files
 VERSION_STRING = '%%prog: based on pyelftools %s' % __version__
 
 
-def main():
+def main(stream=None):
     # parse the command-line arguments and invoke ReadElf
     optparser = OptionParser(
             usage='usage: %prog [options] <elf-file>',
@@ -428,12 +744,20 @@ def main():
     optparser.add_option('-s', '--symbols', '--syms',
             action='store_true', dest='show_symbols',
             help='Display the symbol table')
+    optparser.add_option('-r', '--relocs',
+            action='store_true', dest='show_relocs',
+            help='Display the relocations (if present)')
     optparser.add_option('-x', '--hex-dump',
             action='store', dest='show_hex_dump', metavar='<number|name>',
             help='Dump the contents of section <number|name> as bytes')
     optparser.add_option('-p', '--string-dump',
             action='store', dest='show_string_dump', metavar='<number|name>',
             help='Dump the contents of section <number|name> as strings')
+    optparser.add_option('--debug-dump',
+            action='store', dest='debug_dump_what', metavar='<what>',
+            help=(
+                'Display the contents of DWARF debug sections. <what> can ' +
+                'one of {info,decodedline,frames,frames-interp}'))
 
     options, args = optparser.parse_args()
 
@@ -450,7 +774,7 @@ def main():
 
     with open(args[0], 'rb') as file:
         try:
-            readelf = ReadElf(file, sys.stdout)
+            readelf = ReadElf(file, stream or sys.stdout)
             if do_file_header:
                 readelf.display_file_header()
             if do_section_header:
@@ -461,16 +785,35 @@ def main():
                         show_heading=not do_file_header)
             if options.show_symbols:
                 readelf.display_symbol_tables()
+            if options.show_relocs:
+                readelf.display_relocations()
             if options.show_hex_dump:
                 readelf.display_hex_dump(options.show_hex_dump)
             if options.show_string_dump:
                 readelf.display_string_dump(options.show_string_dump)
+            if options.debug_dump_what:
+                readelf.display_debug_dump(options.debug_dump_what)
         except ELFError as ex:
             sys.stderr.write('ELF error: %s\n' % ex)
             sys.exit(1)
 
 
+def profile_main():
+    # Run 'main' redirecting its output to readelfout.txt
+    # Saves profiling information in readelf.profile
+    PROFFILE = 'readelf.profile'
+    import cProfile
+    cProfile.run('main(open("readelfout.txt", "w"))', PROFFILE)
+
+    # Dig in some profiling stats
+    import pstats
+    p = pstats.Stats(PROFFILE)
+    p.sort_stats('cumulative').print_stats(25)
+
+
 #-------------------------------------------------------------------------------
 if __name__ == '__main__':
     main()
+    #profile_main()
+