From: Eli Bendersky <eliben@gmail.com>
Date: Tue, 6 Dec 2011 04:48:10 +0000 (+0200)
Subject: changed line program entries to contain commands and arguments, AND state. this allow... 
X-Git-Tag: v0.10~54
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bf83b1b560deb5c292fe0bc11d57ec91f5b64aab;p=pyelftools.git

changed line program entries to contain commands and arguments, AND state. this allows more faithful decoding in readelf
---

diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py
index 2dc4b3f..5db7204 100644
--- a/elftools/dwarf/lineprogram.py
+++ b/elftools/dwarf/lineprogram.py
@@ -7,11 +7,38 @@
 # This code is in the public domain
 #-------------------------------------------------------------------------------
 import copy
+from collections import namedtuple
 
 from ..common.utils import struct_parse
 from .constants import *
 
 
+# LineProgramEntry - an entry in the line program.
+# A line program is a sequence of encoded entries. Some of these entries add a
+# new LineState (mapping between line and address), and some don't.
+#
+# command:
+#   The command/opcode - always numeric. For standard commands - it's the opcode
+#   that can be matched with one of the DW_LNS_* constants. For extended commands
+#   it's the extended opcode that can be matched with one of the DW_LNE_*
+#   constants. For special commands, it's the opcode itself.
+#
+# args:
+#   A list of decoded arguments of the command.
+#
+# is_extended:
+#   Since extended commands are encoded by a zero followed by an extended
+#   opcode, and these extended opcodes overlap with other opcodes, this
+#   flag is needed to mark that the command has an extended opcode.
+#
+# state:
+#   For commands that add a new state, it's the relevant LineState object.
+#   For commands that don't add a new state, it's None.
+#
+LineProgramEntry = namedtuple(
+    'LineProgramEntry', 'command is_extended args state')
+
+
 class LineState(object):
     """ Represents a line program state (or a "row" in the matrix
         describing debug location information for addresses).
@@ -70,15 +97,15 @@ class LineProgram(object):
         self.structs = structs
         self.program_start_offset = program_start_offset
         self.program_end_offset = program_end_offset
+        self._decoded_entries = None
 
-        self._line_table = None
-
-    def get_line_table(self):
-        """ Get the decoded line table for this line program
+    def get_entries(self):
+        """ Get the decoded entries for this line program. Return a list of
+            LineProgramEntry objects.
         """
-        if self._line_table is None:
-            self._line_table = self._decode_line_program()
-        return self._line_table
+        if self._decoded_entries is None:
+            self._decoded_entries = self._decode_line_program()
+        return self._decoded_entries
 
     #------ PRIVATE ------#
     
@@ -88,18 +115,22 @@ class LineProgram(object):
         return self.header[name]
 
     def _decode_line_program(self):
-        linetable = []
+        entries = []
         state = LineState(self.header['default_is_stmt'])
 
-        def add_state_to_table():
-            # Used by instructions that have to add the current state to the
-            # line table. After adding, some state registers have to be
-            # cleared.
-            linetable.append(copy.copy(state))
+        def add_entry_new_state(cmd, args, is_extended=False):
+            # Add an entry that sets a new state.
+            # After adding, clear some state registers.
+            entries.append(LineProgramEntry(
+                cmd, is_extended, args, copy.copy(state)))
             state.basic_block = False
             state.prologue_end = False
             state.epilogue_begin = False
 
+        def add_entry_old_state(cmd, args, is_extended=False):
+            # Add an entry that doesn't visibly set a new state
+            entries.append(LineProgramEntry(cmd, is_extended, args, None))
+
         offset = self.program_start_offset
         while offset < self.program_end_offset:
             opcode = struct_parse(
@@ -115,11 +146,13 @@ class LineProgram(object):
             if opcode >= self.header['opcode_base']:
                 # Special opcode (follow the recipe in 6.2.5.1)
                 adjusted_opcode = opcode - self['opcode_base']
-                state.address += ((adjusted_opcode / self['line_range']) *
+                address_addend = ((adjusted_opcode / self['line_range']) *
                                   self['minimum_instruction_length'])
-                state.line += (self['line_base'] + 
-                              adjusted_opcode % self['line_range'])
-                add_state_to_table()
+                state.address += address_addend
+                line_addend = (self['line_base'] + 
+                               adjusted_opcode % self['line_range'])
+                state.line += line_addend
+                add_entry_new_state(opcode, [line_addend, address_addend])
             elif opcode == 0:
                 # Extended opcode: start with a zero byte, followed by
                 # instruction size and the instruction itself.
@@ -130,26 +163,30 @@ class LineProgram(object):
 
                 if ex_opcode == DW_LNE_end_sequence:
                     state.end_sequence = True
-                    add_state_to_table()
+                    add_entry_new_state(ex_opcode, [], is_extended=True)
                     # reset state
                     state = LineState(self.header['default_is_stmt']) 
                 elif ex_opcode == DW_LNE_set_address:
                     operand = struct_parse(self.structs.Dwarf_target_addr(''),
                                            self.stream)
                     state.address = operand
+                    add_entry_old_state(ex_opcode, [operand], is_extended=True)
                 elif ex_opcode == DW_LNE_define_file:
                     operand = struct_parse(
                         self.structs.Dwarf_lineprog_file_entry, self.stream)
                     self['file_entry'].append(operand)
+                    add_entry_old_state(ex_opcode, [operand], is_extended=True)
             else: # 0 < opcode < opcode_base
                 # Standard opcode
                 if opcode == DW_LNS_copy:
-                    add_state_to_table()
+                    add_entry_new_state(opcode, [])
                 elif opcode == DW_LNS_advance_pc:
                     operand = struct_parse(self.structs.Dwarf_uleb128(''),
                                            self.stream)
-                    state.address += (
+                    address_addend = (
                         operand * self.header['minimum_instruction_length'])
+                    state.address += address_addend
+                    add_entry_old_state(opcode, [address_addend])
                 elif opcode == DW_LNS_advance_line:
                     operand = struct_parse(self.structs.Dwarf_sleb128(''),
                                            self.stream)
@@ -158,33 +195,43 @@ class LineProgram(object):
                     operand = struct_parse(self.structs.Dwarf_sleb128(''),
                                            self.stream)
                     state.file = operand
+                    add_entry_old_state(opcode, [operand])
                 elif opcode == DW_LNS_set_column:
                     operand = struct_parse(self.structs.Dwarf_uleb128(''),
                                            self.stream)
                     state.column = operand
+                    add_entry_old_state(opcode, [operand])
                 elif opcode == DW_LNS_negate_stmt:
                     state.is_stmt = not state.is_stmt
+                    add_entry_old_state(opcode, [operand])
                 elif opcode == DW_LNS_set_basic_block:
                     state.basic_block = True
+                    add_entry_old_state(opcode, [operand])
                 elif opcode == DW_LNS_const_add_pc:
                     adjusted_opcode = 255 - self['opcode_base']
-                    state.address += ((adjusted_opcode / self['line_range']) *
+                    address_addend = ((adjusted_opcode / self['line_range']) *
                                       self['minimum_instruction_length'])
+                    state.address += address_addend
+                    add_entry_old_state(opcode, [address_addend])
                 elif opcode == DW_LNS_fixed_advance_pc:
                     operand = struct_parse(self.structs.Dwarf_uint16(''),
                                            self.stream)
                     state.address += operand
+                    add_entry_old_state(opcode, [operand])
                 elif opcode == DW_LNS_set_prologue_end:
                     state.prologue_end = True
+                    add_entry_old_state(opcode, [])
                 elif opcode == DW_LNS_set_epilogue_begin:
                     state.epilogue_begin = True
+                    add_entry_old_state(opcode, [])
                 elif opcode == DW_LNS_set_isa:
                     operand = struct_parse(self.structs.Dwarf_uleb128(''),
                                            self.stream)
                     state.isa = operand
+                    add_entry_old_state(opcode, [operand])
                 else:
                     dwarf_assert(False, 'Invalid standard line program opcode: %s' % (
                         opcode,))
             offset = self.stream.tell()
-        return linetable
+        return entries
 
diff --git a/scripts/readelf.py b/scripts/readelf.py
index e95dac3..c11a197 100755
--- a/scripts/readelf.py
+++ b/scripts/readelf.py
@@ -36,6 +36,8 @@ from elftools.elf.descriptions import (
     )
 from elftools.dwarf.dwarfinfo import DWARFInfo
 from elftools.dwarf.descriptions import describe_attr_value
+from elftools.dwarf.constants import (
+    DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file)
 
 
 class ReadElf(object):
@@ -544,7 +546,7 @@ class ReadElf(object):
         """ Dump the (decoded) line programs from .debug_line
             The programs are dumped in the order of the CUs they belong to.
         """
-        self._emitline('Decoded dump of debug contents of section .debug_line:')
+        self._emitline('Decoded dump of debug contents of section .debug_line:\n')
 
         for cu in self._dwarfinfo.iter_CUs():
             lineprogram = self._dwarfinfo.line_program_for_CU(cu)
@@ -560,16 +562,38 @@ class ReadElf(object):
             self._emitline('CU: %s:' % cu_filename)
             self._emitline('File name                            Line number    Starting address')
 
-            # readelf doesn't print the state after end_sequence instructions. 
-            # I think it's a bug but to be compatible I don't print them too.
-            for state in lineprogram.get_line_table():
-                if not state.end_sequence:
+            # Print each state's file, line and address information. For some
+            # instructions other output is needed to be compatible with
+            # readelf.
+            for entry in lineprogram.get_entries():
+                state = entry.state
+                if state is None:
+                    # Special handling for commands that don't set a new state
+                    if entry.command == DW_LNS_set_file:
+                        file_entry = lineprogram['file_entry'][entry.args[0] - 1]
+                        if file_entry.dir_index == 0:
+                            # current directory
+                            self._emitline('\n./%s:[++]' % (
+                                file_entry.name))
+                        else:
+                            self._emitline('\n%s/%s:' % (
+                                lineprogram['include_directory'][file_entry.dir_index - 1],
+                                file_entry.name))
+                    elif entry.command == DW_LNE_define_file:
+                        self._emitline('%s:' % (
+                            lineprogram['include_directory'][entry.args[0].dir_index]))
+                elif not state.end_sequence:
+                    # readelf doesn't print the state after end_sequence
+                    # instructions. I think it's a bug but to be compatible
+                    # I don't print them too.
                     self._emitline('%-35s  %11d  %18s' % (
                         lineprogram['file_entry'][state.file - 1].name,
                         state.line,
                         '0' if state.address == 0 else 
                                self._format_hex(state.address)))
-            self._emitline()
+                if entry.command == DW_LNS_copy:
+                    # Another readelf oddity...
+                    self._emitline()
 
     def _emit(self, s=''):
         """ Emit an object to output
diff --git a/tests/test_dwarf_lineprogram.py b/tests/test_dwarf_lineprogram.py
index 50d8af3..56d96c3 100644
--- a/tests/test_dwarf_lineprogram.py
+++ b/tests/test_dwarf_lineprogram.py
@@ -2,8 +2,9 @@ import sys, unittest
 from cStringIO import StringIO
 
 sys.path.extend(['.', '..'])
-from elftools.dwarf.lineprogram import LineProgram, LineState
+from elftools.dwarf.lineprogram import LineProgram, LineState, LineProgramEntry
 from elftools.dwarf.structs import DWARFStructs
+from elftools.dwarf.constants import *
 
 
 class TestLineProgram(unittest.TestCase):
@@ -50,13 +51,21 @@ class TestLineProgram(unittest.TestCase):
             '\x00\x01\x01')
 
         lp = self._make_program_in_stream(s)
-        linetable = lp.get_line_table()
+        linetable = lp.get_entries()
 
-        self.assertLineState(linetable[0], address=0x239, line=3)
-        self.assertLineState(linetable[1], address=0x23c, line=5)
-        self.assertLineState(linetable[2], address=0x244, line=6)
-        self.assertLineState(linetable[3], address=0x24b, line=7, end_sequence=False)
-        self.assertLineState(linetable[4], address=0x24d, line=7, end_sequence=True)
+        self.assertEqual(len(linetable), 7)
+        self.assertIs(linetable[0].state, None)  # doesn't modify state
+        self.assertEqual(linetable[0].command, DW_LNS_advance_pc)
+        self.assertEqual(linetable[0].args, [0x239])
+        self.assertLineState(linetable[1].state, address=0x239, line=3)
+        self.assertEqual(linetable[1].command, 0xb)
+        self.assertEqual(linetable[1].args, [2, 0])
+        self.assertLineState(linetable[2].state, address=0x23c, line=5)
+        self.assertLineState(linetable[3].state, address=0x244, line=6)
+        self.assertLineState(linetable[4].state, address=0x24b, line=7, end_sequence=False)
+        self.assertEqual(linetable[5].command, DW_LNS_advance_pc)
+        self.assertEqual(linetable[5].args, [2])
+        self.assertLineState(linetable[6].state, address=0x24d, line=7, end_sequence=True)
 
     def test_spec_sample_60(self):
         # Sample in figure 60 of DWARFv3
@@ -74,13 +83,17 @@ class TestLineProgram(unittest.TestCase):
             '\x00\x01\x01')
 
         lp = self._make_program_in_stream(s)
-        linetable = lp.get_line_table()
+        linetable = lp.get_entries()
 
-        self.assertLineState(linetable[0], address=0x239, line=3)
-        self.assertLineState(linetable[1], address=0x23c, line=5)
-        self.assertLineState(linetable[2], address=0x244, line=6)
-        self.assertLineState(linetable[3], address=0x24b, line=7, end_sequence=False)
-        self.assertLineState(linetable[4], address=0x24d, line=7, end_sequence=True)
+        self.assertEqual(len(linetable), 10)
+        self.assertIs(linetable[0].state, None)  # doesn't modify state
+        self.assertEqual(linetable[0].command, DW_LNS_fixed_advance_pc)
+        self.assertEqual(linetable[0].args, [0x239])
+        self.assertLineState(linetable[1].state, address=0x239, line=3)
+        self.assertLineState(linetable[3].state, address=0x23c, line=5)
+        self.assertLineState(linetable[5].state, address=0x244, line=6)
+        self.assertLineState(linetable[7].state, address=0x24b, line=7, end_sequence=False)
+        self.assertLineState(linetable[9].state, address=0x24d, line=7, end_sequence=True)
 
 
 if __name__ == '__main__':