Update version to 0.25 to prepare for release
[pyelftools.git] / elftools / dwarf / lineprogram.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/lineprogram.py
3 #
4 # DWARF line number program
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 import os
10 import copy
11 from collections import namedtuple
12
13 from ..common.utils import struct_parse
14 from .constants import *
15
16
17 # LineProgramEntry - an entry in the line program.
18 # A line program is a sequence of encoded entries. Some of these entries add a
19 # new LineState (mapping between line and address), and some don't.
20 #
21 # command:
22 # The command/opcode - always numeric. For standard commands - it's the opcode
23 # that can be matched with one of the DW_LNS_* constants. For extended commands
24 # it's the extended opcode that can be matched with one of the DW_LNE_*
25 # constants. For special commands, it's the opcode itself.
26 #
27 # args:
28 # A list of decoded arguments of the command.
29 #
30 # is_extended:
31 # Since extended commands are encoded by a zero followed by an extended
32 # opcode, and these extended opcodes overlap with other opcodes, this
33 # flag is needed to mark that the command has an extended opcode.
34 #
35 # state:
36 # For commands that add a new state, it's the relevant LineState object.
37 # For commands that don't add a new state, it's None.
38 #
39 LineProgramEntry = namedtuple(
40 'LineProgramEntry', 'command is_extended args state')
41
42
43 class LineState(object):
44 """ Represents a line program state (or a "row" in the matrix
45 describing debug location information for addresses).
46 The instance variables of this class are the "state machine registers"
47 described in section 6.2.2 of DWARFv3
48 """
49 def __init__(self, default_is_stmt):
50 self.address = 0
51 self.file = 1
52 self.line = 1
53 self.column = 0
54 self.op_index = 0
55 self.is_stmt = default_is_stmt
56 self.basic_block = False
57 self.end_sequence = False
58 self.prologue_end = False
59 self.epilogue_begin = False
60 self.isa = 0
61
62 def __repr__(self):
63 a = ['<LineState %x:' % id(self)]
64 a.append(' address = 0x%x' % self.address)
65 for attr in ('file', 'line', 'column', 'is_stmt', 'basic_block',
66 'end_sequence', 'prologue_end', 'epilogue_begin', 'isa'):
67 a.append(' %s = %s' % (attr, getattr(self, attr)))
68 return '\n'.join(a) + '>\n'
69
70
71 class LineProgram(object):
72 """ Builds a "line table", which is essentially the matrix described
73 in section 6.2 of DWARFv3. It's a list of LineState objects,
74 sorted by increasing address, so it can be used to obtain the
75 state information for each address.
76 """
77 def __init__(self, header, stream, structs,
78 program_start_offset, program_end_offset):
79 """
80 header:
81 The header of this line program. Note: LineProgram may modify
82 its header by appending file entries if DW_LNE_define_file
83 instructions are encountered.
84
85 stream:
86 The stream this program can be read from.
87
88 structs:
89 A DWARFStructs instance suitable for this line program
90
91 program_{start|end}_offset:
92 Offset in the debug_line section stream where this program
93 starts (the actual program, after the header), and where it
94 ends.
95 The actual range includes start but not end: [start, end - 1]
96 """
97 self.stream = stream
98 self.header = header
99 self.structs = structs
100 self.program_start_offset = program_start_offset
101 self.program_end_offset = program_end_offset
102 self._decoded_entries = None
103
104 def get_entries(self):
105 """ Get the decoded entries for this line program. Return a list of
106 LineProgramEntry objects.
107 Note that this contains more information than absolutely required
108 for the line table. The line table can be easily extracted from
109 the list of entries by looking only at entries with non-None
110 state. The extra information is mainly for the purposes of display
111 with readelf and debugging.
112 """
113 if self._decoded_entries is None:
114 self._decoded_entries = self._decode_line_program()
115 return self._decoded_entries
116
117 #------ PRIVATE ------#
118
119 def __getitem__(self, name):
120 """ Implement dict-like access to header entries
121 """
122 return self.header[name]
123
124 def _decode_line_program(self):
125 entries = []
126 state = LineState(self.header['default_is_stmt'])
127
128 def add_entry_new_state(cmd, args, is_extended=False):
129 # Add an entry that sets a new state.
130 # After adding, clear some state registers.
131 entries.append(LineProgramEntry(
132 cmd, is_extended, args, copy.copy(state)))
133 state.basic_block = False
134 state.prologue_end = False
135 state.epilogue_begin = False
136
137 def add_entry_old_state(cmd, args, is_extended=False):
138 # Add an entry that doesn't visibly set a new state
139 entries.append(LineProgramEntry(cmd, is_extended, args, None))
140
141 offset = self.program_start_offset
142 while offset < self.program_end_offset:
143 opcode = struct_parse(
144 self.structs.Dwarf_uint8(''),
145 self.stream,
146 offset)
147
148 # As an exercise in avoiding premature optimization, if...elif
149 # chains are used here for standard and extended opcodes instead
150 # of dispatch tables. This keeps the code much cleaner. Besides,
151 # the majority of instructions in a typical program are special
152 # opcodes anyway.
153 if opcode >= self.header['opcode_base']:
154 # Special opcode (follow the recipe in 6.2.5.1)
155 maximum_operations_per_instruction = self['maximum_operations_per_instruction']
156 adjusted_opcode = opcode - self['opcode_base']
157 operation_advance = adjusted_opcode // self['line_range']
158 address_addend = (
159 self['minimum_instruction_length'] *
160 ((state.op_index + operation_advance) //
161 maximum_operations_per_instruction))
162 state.address += address_addend
163 state.op_index = (state.op_index + operation_advance) % maximum_operations_per_instruction
164 line_addend = self['line_base'] + (adjusted_opcode % self['line_range'])
165 state.line += line_addend
166 add_entry_new_state(
167 opcode, [line_addend, address_addend, state.op_index])
168 elif opcode == 0:
169 # Extended opcode: start with a zero byte, followed by
170 # instruction size and the instruction itself.
171 inst_len = struct_parse(self.structs.Dwarf_uleb128(''),
172 self.stream)
173 ex_opcode = struct_parse(self.structs.Dwarf_uint8(''),
174 self.stream)
175
176 if ex_opcode == DW_LNE_end_sequence:
177 state.end_sequence = True
178 add_entry_new_state(ex_opcode, [], is_extended=True)
179 # reset state
180 state = LineState(self.header['default_is_stmt'])
181 elif ex_opcode == DW_LNE_set_address:
182 operand = struct_parse(self.structs.Dwarf_target_addr(''),
183 self.stream)
184 state.address = operand
185 add_entry_old_state(ex_opcode, [operand], is_extended=True)
186 elif ex_opcode == DW_LNE_define_file:
187 operand = struct_parse(
188 self.structs.Dwarf_lineprog_file_entry, self.stream)
189 self['file_entry'].append(operand)
190 add_entry_old_state(ex_opcode, [operand], is_extended=True)
191 else:
192 # Unknown, but need to roll forward the stream because the
193 # length is specified. Seek forward inst_len - 1 because
194 # we've already read the extended opcode, which takes part
195 # in the length.
196 self.stream.seek(inst_len - 1, os.SEEK_CUR)
197 else: # 0 < opcode < opcode_base
198 # Standard opcode
199 if opcode == DW_LNS_copy:
200 add_entry_new_state(opcode, [])
201 elif opcode == DW_LNS_advance_pc:
202 operand = struct_parse(self.structs.Dwarf_uleb128(''),
203 self.stream)
204 address_addend = (
205 operand * self.header['minimum_instruction_length'])
206 state.address += address_addend
207 add_entry_old_state(opcode, [address_addend])
208 elif opcode == DW_LNS_advance_line:
209 operand = struct_parse(self.structs.Dwarf_sleb128(''),
210 self.stream)
211 state.line += operand
212 elif opcode == DW_LNS_set_file:
213 operand = struct_parse(self.structs.Dwarf_uleb128(''),
214 self.stream)
215 state.file = operand
216 add_entry_old_state(opcode, [operand])
217 elif opcode == DW_LNS_set_column:
218 operand = struct_parse(self.structs.Dwarf_uleb128(''),
219 self.stream)
220 state.column = operand
221 add_entry_old_state(opcode, [operand])
222 elif opcode == DW_LNS_negate_stmt:
223 state.is_stmt = not state.is_stmt
224 add_entry_old_state(opcode, [])
225 elif opcode == DW_LNS_set_basic_block:
226 state.basic_block = True
227 add_entry_old_state(opcode, [])
228 elif opcode == DW_LNS_const_add_pc:
229 adjusted_opcode = 255 - self['opcode_base']
230 address_addend = ((adjusted_opcode // self['line_range']) *
231 self['minimum_instruction_length'])
232 state.address += address_addend
233 add_entry_old_state(opcode, [address_addend])
234 elif opcode == DW_LNS_fixed_advance_pc:
235 operand = struct_parse(self.structs.Dwarf_uint16(''),
236 self.stream)
237 state.address += operand
238 add_entry_old_state(opcode, [operand])
239 elif opcode == DW_LNS_set_prologue_end:
240 state.prologue_end = True
241 add_entry_old_state(opcode, [])
242 elif opcode == DW_LNS_set_epilogue_begin:
243 state.epilogue_begin = True
244 add_entry_old_state(opcode, [])
245 elif opcode == DW_LNS_set_isa:
246 operand = struct_parse(self.structs.Dwarf_uleb128(''),
247 self.stream)
248 state.isa = operand
249 add_entry_old_state(opcode, [operand])
250 else:
251 dwarf_assert(False, 'Invalid standard line program opcode: %s' % (
252 opcode,))
253 offset = self.stream.tell()
254 return entries
255