From bf83b1b560deb5c292fe0bc11d57ec91f5b64aab Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 6 Dec 2011 06:48:10 +0200 Subject: [PATCH] changed line program entries to contain commands and arguments, AND state. this allows more faithful decoding in readelf --- elftools/dwarf/lineprogram.py | 91 +++++++++++++++++++++++++-------- scripts/readelf.py | 36 ++++++++++--- tests/test_dwarf_lineprogram.py | 39 +++++++++----- 3 files changed, 125 insertions(+), 41 deletions(-) diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index 2dc4b3f..5db7204 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -7,11 +7,38 @@ # This code is in the public domain #------------------------------------------------------------------------------- import copy +from collections import namedtuple from ..common.utils import struct_parse from .constants import * +# LineProgramEntry - an entry in the line program. +# A line program is a sequence of encoded entries. Some of these entries add a +# new LineState (mapping between line and address), and some don't. +# +# command: +# The command/opcode - always numeric. For standard commands - it's the opcode +# that can be matched with one of the DW_LNS_* constants. For extended commands +# it's the extended opcode that can be matched with one of the DW_LNE_* +# constants. For special commands, it's the opcode itself. +# +# args: +# A list of decoded arguments of the command. +# +# is_extended: +# Since extended commands are encoded by a zero followed by an extended +# opcode, and these extended opcodes overlap with other opcodes, this +# flag is needed to mark that the command has an extended opcode. +# +# state: +# For commands that add a new state, it's the relevant LineState object. +# For commands that don't add a new state, it's None. +# +LineProgramEntry = namedtuple( + 'LineProgramEntry', 'command is_extended args state') + + class LineState(object): """ Represents a line program state (or a "row" in the matrix describing debug location information for addresses). @@ -70,15 +97,15 @@ class LineProgram(object): self.structs = structs self.program_start_offset = program_start_offset self.program_end_offset = program_end_offset + self._decoded_entries = None - self._line_table = None - - def get_line_table(self): - """ Get the decoded line table for this line program + def get_entries(self): + """ Get the decoded entries for this line program. Return a list of + LineProgramEntry objects. """ - if self._line_table is None: - self._line_table = self._decode_line_program() - return self._line_table + if self._decoded_entries is None: + self._decoded_entries = self._decode_line_program() + return self._decoded_entries #------ PRIVATE ------# @@ -88,18 +115,22 @@ class LineProgram(object): return self.header[name] def _decode_line_program(self): - linetable = [] + entries = [] state = LineState(self.header['default_is_stmt']) - def add_state_to_table(): - # Used by instructions that have to add the current state to the - # line table. After adding, some state registers have to be - # cleared. - linetable.append(copy.copy(state)) + def add_entry_new_state(cmd, args, is_extended=False): + # Add an entry that sets a new state. + # After adding, clear some state registers. + entries.append(LineProgramEntry( + cmd, is_extended, args, copy.copy(state))) state.basic_block = False state.prologue_end = False state.epilogue_begin = False + def add_entry_old_state(cmd, args, is_extended=False): + # Add an entry that doesn't visibly set a new state + entries.append(LineProgramEntry(cmd, is_extended, args, None)) + offset = self.program_start_offset while offset < self.program_end_offset: opcode = struct_parse( @@ -115,11 +146,13 @@ class LineProgram(object): if opcode >= self.header['opcode_base']: # Special opcode (follow the recipe in 6.2.5.1) adjusted_opcode = opcode - self['opcode_base'] - state.address += ((adjusted_opcode / self['line_range']) * + address_addend = ((adjusted_opcode / self['line_range']) * self['minimum_instruction_length']) - state.line += (self['line_base'] + - adjusted_opcode % self['line_range']) - add_state_to_table() + state.address += address_addend + line_addend = (self['line_base'] + + adjusted_opcode % self['line_range']) + state.line += line_addend + add_entry_new_state(opcode, [line_addend, address_addend]) elif opcode == 0: # Extended opcode: start with a zero byte, followed by # instruction size and the instruction itself. @@ -130,26 +163,30 @@ class LineProgram(object): if ex_opcode == DW_LNE_end_sequence: state.end_sequence = True - add_state_to_table() + add_entry_new_state(ex_opcode, [], is_extended=True) # reset state state = LineState(self.header['default_is_stmt']) elif ex_opcode == DW_LNE_set_address: operand = struct_parse(self.structs.Dwarf_target_addr(''), self.stream) state.address = operand + add_entry_old_state(ex_opcode, [operand], is_extended=True) elif ex_opcode == DW_LNE_define_file: operand = struct_parse( self.structs.Dwarf_lineprog_file_entry, self.stream) self['file_entry'].append(operand) + add_entry_old_state(ex_opcode, [operand], is_extended=True) else: # 0 < opcode < opcode_base # Standard opcode if opcode == DW_LNS_copy: - add_state_to_table() + add_entry_new_state(opcode, []) elif opcode == DW_LNS_advance_pc: operand = struct_parse(self.structs.Dwarf_uleb128(''), self.stream) - state.address += ( + address_addend = ( operand * self.header['minimum_instruction_length']) + state.address += address_addend + add_entry_old_state(opcode, [address_addend]) elif opcode == DW_LNS_advance_line: operand = struct_parse(self.structs.Dwarf_sleb128(''), self.stream) @@ -158,33 +195,43 @@ class LineProgram(object): operand = struct_parse(self.structs.Dwarf_sleb128(''), self.stream) state.file = operand + add_entry_old_state(opcode, [operand]) elif opcode == DW_LNS_set_column: operand = struct_parse(self.structs.Dwarf_uleb128(''), self.stream) state.column = operand + add_entry_old_state(opcode, [operand]) elif opcode == DW_LNS_negate_stmt: state.is_stmt = not state.is_stmt + add_entry_old_state(opcode, [operand]) elif opcode == DW_LNS_set_basic_block: state.basic_block = True + add_entry_old_state(opcode, [operand]) elif opcode == DW_LNS_const_add_pc: adjusted_opcode = 255 - self['opcode_base'] - state.address += ((adjusted_opcode / self['line_range']) * + address_addend = ((adjusted_opcode / self['line_range']) * self['minimum_instruction_length']) + state.address += address_addend + add_entry_old_state(opcode, [address_addend]) elif opcode == DW_LNS_fixed_advance_pc: operand = struct_parse(self.structs.Dwarf_uint16(''), self.stream) state.address += operand + add_entry_old_state(opcode, [operand]) elif opcode == DW_LNS_set_prologue_end: state.prologue_end = True + add_entry_old_state(opcode, []) elif opcode == DW_LNS_set_epilogue_begin: state.epilogue_begin = True + add_entry_old_state(opcode, []) elif opcode == DW_LNS_set_isa: operand = struct_parse(self.structs.Dwarf_uleb128(''), self.stream) state.isa = operand + add_entry_old_state(opcode, [operand]) else: dwarf_assert(False, 'Invalid standard line program opcode: %s' % ( opcode,)) offset = self.stream.tell() - return linetable + return entries diff --git a/scripts/readelf.py b/scripts/readelf.py index e95dac3..c11a197 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -36,6 +36,8 @@ from elftools.elf.descriptions import ( ) from elftools.dwarf.dwarfinfo import DWARFInfo from elftools.dwarf.descriptions import describe_attr_value +from elftools.dwarf.constants import ( + DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file) class ReadElf(object): @@ -544,7 +546,7 @@ class ReadElf(object): """ Dump the (decoded) line programs from .debug_line The programs are dumped in the order of the CUs they belong to. """ - self._emitline('Decoded dump of debug contents of section .debug_line:') + self._emitline('Decoded dump of debug contents of section .debug_line:\n') for cu in self._dwarfinfo.iter_CUs(): lineprogram = self._dwarfinfo.line_program_for_CU(cu) @@ -560,16 +562,38 @@ class ReadElf(object): self._emitline('CU: %s:' % cu_filename) self._emitline('File name Line number Starting address') - # readelf doesn't print the state after end_sequence instructions. - # I think it's a bug but to be compatible I don't print them too. - for state in lineprogram.get_line_table(): - if not state.end_sequence: + # Print each state's file, line and address information. For some + # instructions other output is needed to be compatible with + # readelf. + for entry in lineprogram.get_entries(): + state = entry.state + if state is None: + # Special handling for commands that don't set a new state + if entry.command == DW_LNS_set_file: + file_entry = lineprogram['file_entry'][entry.args[0] - 1] + if file_entry.dir_index == 0: + # current directory + self._emitline('\n./%s:[++]' % ( + file_entry.name)) + else: + self._emitline('\n%s/%s:' % ( + lineprogram['include_directory'][file_entry.dir_index - 1], + file_entry.name)) + elif entry.command == DW_LNE_define_file: + self._emitline('%s:' % ( + lineprogram['include_directory'][entry.args[0].dir_index])) + elif not state.end_sequence: + # readelf doesn't print the state after end_sequence + # instructions. I think it's a bug but to be compatible + # I don't print them too. self._emitline('%-35s %11d %18s' % ( lineprogram['file_entry'][state.file - 1].name, state.line, '0' if state.address == 0 else self._format_hex(state.address))) - self._emitline() + if entry.command == DW_LNS_copy: + # Another readelf oddity... + self._emitline() def _emit(self, s=''): """ Emit an object to output diff --git a/tests/test_dwarf_lineprogram.py b/tests/test_dwarf_lineprogram.py index 50d8af3..56d96c3 100644 --- a/tests/test_dwarf_lineprogram.py +++ b/tests/test_dwarf_lineprogram.py @@ -2,8 +2,9 @@ import sys, unittest from cStringIO import StringIO sys.path.extend(['.', '..']) -from elftools.dwarf.lineprogram import LineProgram, LineState +from elftools.dwarf.lineprogram import LineProgram, LineState, LineProgramEntry from elftools.dwarf.structs import DWARFStructs +from elftools.dwarf.constants import * class TestLineProgram(unittest.TestCase): @@ -50,13 +51,21 @@ class TestLineProgram(unittest.TestCase): '\x00\x01\x01') lp = self._make_program_in_stream(s) - linetable = lp.get_line_table() + linetable = lp.get_entries() - self.assertLineState(linetable[0], address=0x239, line=3) - self.assertLineState(linetable[1], address=0x23c, line=5) - self.assertLineState(linetable[2], address=0x244, line=6) - self.assertLineState(linetable[3], address=0x24b, line=7, end_sequence=False) - self.assertLineState(linetable[4], address=0x24d, line=7, end_sequence=True) + self.assertEqual(len(linetable), 7) + self.assertIs(linetable[0].state, None) # doesn't modify state + self.assertEqual(linetable[0].command, DW_LNS_advance_pc) + self.assertEqual(linetable[0].args, [0x239]) + self.assertLineState(linetable[1].state, address=0x239, line=3) + self.assertEqual(linetable[1].command, 0xb) + self.assertEqual(linetable[1].args, [2, 0]) + self.assertLineState(linetable[2].state, address=0x23c, line=5) + self.assertLineState(linetable[3].state, address=0x244, line=6) + self.assertLineState(linetable[4].state, address=0x24b, line=7, end_sequence=False) + self.assertEqual(linetable[5].command, DW_LNS_advance_pc) + self.assertEqual(linetable[5].args, [2]) + self.assertLineState(linetable[6].state, address=0x24d, line=7, end_sequence=True) def test_spec_sample_60(self): # Sample in figure 60 of DWARFv3 @@ -74,13 +83,17 @@ class TestLineProgram(unittest.TestCase): '\x00\x01\x01') lp = self._make_program_in_stream(s) - linetable = lp.get_line_table() + linetable = lp.get_entries() - self.assertLineState(linetable[0], address=0x239, line=3) - self.assertLineState(linetable[1], address=0x23c, line=5) - self.assertLineState(linetable[2], address=0x244, line=6) - self.assertLineState(linetable[3], address=0x24b, line=7, end_sequence=False) - self.assertLineState(linetable[4], address=0x24d, line=7, end_sequence=True) + self.assertEqual(len(linetable), 10) + self.assertIs(linetable[0].state, None) # doesn't modify state + self.assertEqual(linetable[0].command, DW_LNS_fixed_advance_pc) + self.assertEqual(linetable[0].args, [0x239]) + self.assertLineState(linetable[1].state, address=0x239, line=3) + self.assertLineState(linetable[3].state, address=0x23c, line=5) + self.assertLineState(linetable[5].state, address=0x244, line=6) + self.assertLineState(linetable[7].state, address=0x24b, line=7, end_sequence=False) + self.assertLineState(linetable[9].state, address=0x24d, line=7, end_sequence=True) if __name__ == '__main__': -- 2.30.2