cleanups of trailing whitespace
[pyelftools.git] / elftools / dwarf / callframe.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/callframe.py
3 #
4 # DWARF call frame information
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 import copy
10 from collections import namedtuple
11 from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos)
12 from ..common.py3compat import iterkeys
13 from .structs import DWARFStructs
14 from .constants import *
15
16
17 class CallFrameInfo(object):
18 """ DWARF CFI (Call Frame Info)
19
20 stream, size:
21 A stream holding the .debug_frame section, and the size of the
22 section in it.
23
24 base_structs:
25 The structs to be used as the base for parsing this section.
26 Eventually, each entry gets its own structs based on the initial
27 length field it starts with. The address_size, however, is taken
28 from base_structs. This appears to be a limitation of the DWARFv3
29 standard, fixed in v4 (where an address_size field exists for each
30 CFI. I had a discussion about this on dwarf-discuss that confirms
31 this.
32 Currently for base_structs I simply use the elfclass of the
33 containing file, but more sophisticated methods are used by
34 libdwarf and others, such as guessing which CU contains which FDEs
35 (based on their address ranges) and taking the address_size from
36 those CUs.
37 """
38 def __init__(self, stream, size, base_structs):
39 self.stream = stream
40 self.size = size
41 self.base_structs = base_structs
42 self.entries = None
43
44 # Map between an offset in the stream and the entry object found at this
45 # offset. Useful for assigning CIE to FDEs according to the CIE_pointer
46 # header field which contains a stream offset.
47 self._entry_cache = {}
48
49 def get_entries(self):
50 """ Get a list of entries that constitute this CFI. The list consists
51 of CIE or FDE objects, in the order of their appearance in the
52 section.
53 """
54 if self.entries is None:
55 self.entries = self._parse_entries()
56 return self.entries
57
58 #-------------------------
59
60 def _parse_entries(self):
61 entries = []
62 offset = 0
63 while offset < self.size:
64 entries.append(self._parse_entry_at(offset))
65 offset = self.stream.tell()
66 return entries
67
68 def _parse_entry_at(self, offset):
69 """ Parse an entry from self.stream starting with the given offset.
70 Return the entry object. self.stream will point right after the
71 entry.
72 """
73 if offset in self._entry_cache:
74 return self._entry_cache[offset]
75
76 entry_length = struct_parse(
77 self.base_structs.Dwarf_uint32(''), self.stream, offset)
78 dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32
79
80 entry_structs = DWARFStructs(
81 little_endian=self.base_structs.little_endian,
82 dwarf_format=dwarf_format,
83 address_size=self.base_structs.address_size)
84
85 # Read the next field to see whether this is a CIE or FDE
86 CIE_id = struct_parse(
87 entry_structs.Dwarf_offset(''), self.stream)
88
89 is_CIE = (
90 (dwarf_format == 32 and CIE_id == 0xFFFFFFFF) or
91 CIE_id == 0xFFFFFFFFFFFFFFFF)
92
93 if is_CIE:
94 header_struct = entry_structs.Dwarf_CIE_header
95 else:
96 header_struct = entry_structs.Dwarf_FDE_header
97
98 # Parse the header, which goes up to and including the
99 # return_address_register field
100 header = struct_parse(
101 header_struct, self.stream, offset)
102
103 # For convenience, compute the end offset for this entry
104 end_offset = (
105 offset + header.length +
106 entry_structs.initial_length_field_size())
107
108 # At this point self.stream is at the start of the instruction list
109 # for this entry
110 instructions = self._parse_instructions(
111 entry_structs, self.stream.tell(), end_offset)
112
113 if is_CIE:
114 self._entry_cache[offset] = CIE(
115 header=header, instructions=instructions, offset=offset,
116 structs=entry_structs)
117 else: # FDE
118 with preserve_stream_pos(self.stream):
119 cie = self._parse_entry_at(header['CIE_pointer'])
120 self._entry_cache[offset] = FDE(
121 header=header, instructions=instructions, offset=offset,
122 structs=entry_structs, cie=cie)
123 return self._entry_cache[offset]
124
125 def _parse_instructions(self, structs, offset, end_offset):
126 """ Parse a list of CFI instructions from self.stream, starting with
127 the offset and until (not including) end_offset.
128 Return a list of CallFrameInstruction objects.
129 """
130 instructions = []
131 while offset < end_offset:
132 opcode = struct_parse(structs.Dwarf_uint8(''), self.stream, offset)
133 args = []
134
135 primary = opcode & _PRIMARY_MASK
136 primary_arg = opcode & _PRIMARY_ARG_MASK
137 if primary == DW_CFA_advance_loc:
138 args = [primary_arg]
139 elif primary == DW_CFA_offset:
140 args = [
141 primary_arg,
142 struct_parse(structs.Dwarf_uleb128(''), self.stream)]
143 elif primary == DW_CFA_restore:
144 args = [primary_arg]
145 # primary == 0 and real opcode is extended
146 elif opcode in (DW_CFA_nop, DW_CFA_remember_state,
147 DW_CFA_restore_state):
148 args = []
149 elif opcode == DW_CFA_set_loc:
150 args = [
151 struct_parse(structs.Dwarf_target_addr(''), self.stream)]
152 elif opcode == DW_CFA_advance_loc1:
153 args = [struct_parse(structs.Dwarf_uint8(''), self.stream)]
154 elif opcode == DW_CFA_advance_loc2:
155 args = [struct_parse(structs.Dwarf_uint16(''), self.stream)]
156 elif opcode == DW_CFA_advance_loc4:
157 args = [struct_parse(structs.Dwarf_uint32(''), self.stream)]
158 elif opcode in (DW_CFA_offset_extended, DW_CFA_register,
159 DW_CFA_def_cfa, DW_CFA_val_offset):
160 args = [
161 struct_parse(structs.Dwarf_uleb128(''), self.stream),
162 struct_parse(structs.Dwarf_uleb128(''), self.stream)]
163 elif opcode in (DW_CFA_restore_extended, DW_CFA_undefined,
164 DW_CFA_same_value, DW_CFA_def_cfa_register,
165 DW_CFA_def_cfa_offset):
166 args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)]
167 elif opcode == DW_CFA_def_cfa_offset_sf:
168 args = [struct_parse(structs.Dwarf_sleb128(''), self.stream)]
169 elif opcode == DW_CFA_def_cfa_expression:
170 args = [struct_parse(
171 structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
172 elif opcode in (DW_CFA_expression, DW_CFA_val_expression):
173 args = [
174 struct_parse(structs.Dwarf_uleb128(''), self.stream),
175 struct_parse(
176 structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
177 elif opcode in (DW_CFA_offset_extended_sf,
178 DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf):
179 args = [
180 struct_parse(structs.Dwarf_uleb128(''), self.stream),
181 struct_parse(structs.Dwarf_sleb128(''), self.stream)]
182 else:
183 dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode)
184
185 instructions.append(CallFrameInstruction(opcode=opcode, args=args))
186 offset = self.stream.tell()
187 return instructions
188
189
190 def instruction_name(opcode):
191 """ Given an opcode, return the instruction name.
192 """
193 primary = opcode & _PRIMARY_MASK
194 if primary == 0:
195 return _OPCODE_NAME_MAP[opcode]
196 else:
197 return _OPCODE_NAME_MAP[primary]
198
199
200 class CallFrameInstruction(object):
201 """ An instruction in the CFI section. opcode is the instruction
202 opcode, numeric - as it appears in the section. args is a list of
203 arguments (including arguments embedded in the low bits of some
204 instructions, when applicable), decoded from the stream.
205 """
206 def __init__(self, opcode, args):
207 self.opcode = opcode
208 self.args = args
209
210 def __repr__(self):
211 return '%s (0x%x): %s' % (
212 instruction_name(self.opcode), self.opcode, self.args)
213
214
215 class CFIEntry(object):
216 """ A common base class for CFI entries.
217 Contains a header and a list of instructions (CallFrameInstruction).
218 offset: the offset of this entry from the beginning of the section
219 cie: for FDEs, a CIE pointer is required
220 """
221 def __init__(self, header, structs, instructions, offset, cie=None):
222 self.header = header
223 self.structs = structs
224 self.instructions = instructions
225 self.offset = offset
226 self.cie = cie
227 self._decoded_table = None
228
229 def get_decoded(self):
230 """ Decode the CFI contained in this entry and return a
231 DecodedCallFrameTable object representing it. See the documentation
232 of that class to understand how to interpret the decoded table.
233 """
234 if self._decoded_table is None:
235 self._decoded_table = self._decode_CFI_table()
236 return self._decoded_table
237
238 def __getitem__(self, name):
239 """ Implement dict-like access to header entries
240 """
241 return self.header[name]
242
243 def _decode_CFI_table(self):
244 """ Decode the instructions contained in the given CFI entry and return
245 a DecodedCallFrameTable.
246 """
247 if isinstance(self, CIE):
248 # For a CIE, initialize cur_line to an "empty" line
249 cie = self
250 cur_line = dict(pc=0, cfa=None)
251 reg_order = []
252 else: # FDE
253 # For a FDE, we need to decode the attached CIE first, because its
254 # decoded table is needed. Its "initial instructions" describe a
255 # line that serves as the base (first) line in the FDE's table.
256 cie = self.cie
257 cie_decoded_table = cie.get_decoded()
258 last_line_in_CIE = copy.copy(cie_decoded_table.table[-1])
259 cur_line = last_line_in_CIE
260 cur_line['pc'] = self['initial_location']
261 reg_order = copy.copy(cie_decoded_table.reg_order)
262
263 table = []
264
265 # Keeps a stack for the use of DW_CFA_{remember|restore}_state
266 # instructions.
267 line_stack = []
268
269 def _add_to_order(regnum):
270 if regnum not in cur_line:
271 reg_order.append(regnum)
272
273 for instr in self.instructions:
274 # Throughout this loop, cur_line is the current line. Some
275 # instructions add it to the table, but most instructions just
276 # update it without adding it to the table.
277
278 name = instruction_name(instr.opcode)
279
280 if name == 'DW_CFA_set_loc':
281 table.append(copy.copy(cur_line))
282 cur_line['pc'] = instr.args[0]
283 elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2',
284 'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
285 table.append(copy.copy(cur_line))
286 cur_line['pc'] += instr.args[0] * cie['code_alignment_factor']
287 elif name == 'DW_CFA_def_cfa':
288 cur_line['cfa'] = CFARule(
289 reg=instr.args[0],
290 offset=instr.args[1])
291 elif name == 'DW_CFA_def_cfa_sf':
292 cur_line['cfa'] = CFARule(
293 reg=instr.args[0],
294 offset=instr.args[1] * cie['code_alignment_factor'])
295 elif name == 'DW_CFA_def_cfa_register':
296 cur_line['cfa'] = CFARule(
297 reg=instr.args[0],
298 offset=cur_line['cfa'].offset)
299 elif name == 'DW_CFA_def_cfa_offset':
300 cur_line['cfa'] = CFARule(
301 reg=cur_line['cfa'].reg,
302 offset=instr.args[0])
303 elif name == 'DW_CFA_def_cfa_expression':
304 cur_line['cfa'] = CFARule(expr=instr.args[0])
305 elif name == 'DW_CFA_undefined':
306 _add_to_order(instr.args[0])
307 cur_line[instr.args[0]] = RegisterRule(RegisterRule.UNDEFINED)
308 elif name == 'DW_CFA_same_value':
309 _add_to_order(instr.args[0])
310 cur_line[instr.args[0]] = RegisterRule(RegisterRule.SAME_VALUE)
311 elif name in ( 'DW_CFA_offset', 'DW_CFA_offset_extended',
312 'DW_CFA_offset_extended_sf'):
313 _add_to_order(instr.args[0])
314 cur_line[instr.args[0]] = RegisterRule(
315 RegisterRule.OFFSET,
316 instr.args[1] * cie['data_alignment_factor'])
317 elif name in ('DW_CFA_val_offset', 'DW_CFA_val_offset_sf'):
318 _add_to_order(instr.args[0])
319 cur_line[instr.args[0]] = RegisterRule(
320 RegisterRule.VAL_OFFSET,
321 instr.args[1] * cie['data_alignment_factor'])
322 elif name == 'DW_CFA_register':
323 _add_to_order(instr.args[0])
324 cur_line[instr.args[0]] = RegisterRule(
325 RegisterRule.REGISTER,
326 instr.args[1])
327 elif name == 'DW_CFA_expression':
328 _add_to_order(instr.args[0])
329 cur_line[instr.args[0]] = RegisterRule(
330 RegisterRule.EXPRESSION,
331 instr.args[1])
332 elif name == 'DW_CFA_val_expression':
333 _add_to_order(instr.args[0])
334 cur_line[instr.args[0]] = RegisterRule(
335 RegisterRule.VAL_EXPRESSION,
336 instr.args[1])
337 elif name in ('DW_CFA_restore', 'DW_CFA_restore_extended'):
338 _add_to_order(instr.args[0])
339 dwarf_assert(
340 isinstance(self, FDE),
341 '%s instruction must be in a FDE' % name)
342 dwarf_assert(
343 instr.args[0] in last_line_in_CIE,
344 '%s: can not find register in CIE')
345 cur_line[instr.args[0]] = last_line_in_CIE[instr.args[0]]
346 elif name == 'DW_CFA_remember_state':
347 line_stack.append(cur_line)
348 elif name == 'DW_CFA_restore_state':
349 cur_line = line_stack.pop()
350
351 # The current line is appended to the table after all instructions
352 # have ended, in any case (even if there were no instructions).
353 table.append(cur_line)
354 return DecodedCallFrameTable(table=table, reg_order=reg_order)
355
356
357 # A CIE and FDE have exactly the same functionality, except that a FDE has
358 # a pointer to its CIE. The functionality was wholly encapsulated in CFIEntry,
359 # so the CIE and FDE classes exists separately for identification (instead
360 # of having an explicit "entry_type" field in CFIEntry).
361 #
362 class CIE(CFIEntry):
363 pass
364
365
366 class FDE(CFIEntry):
367 pass
368
369
370 class RegisterRule(object):
371 """ Register rules are used to find registers in call frames. Each rule
372 consists of a type (enumeration following DWARFv3 section 6.4.1)
373 and an optional argument to augment the type.
374 """
375 UNDEFINED = 'UNDEFINED'
376 SAME_VALUE = 'SAME_VALUE'
377 OFFSET = 'OFFSET'
378 VAL_OFFSET = 'VAL_OFFSET'
379 REGISTER = 'REGISTER'
380 EXPRESSION = 'EXPRESSION'
381 VAL_EXPRESSION = 'VAL_EXPRESSION'
382 ARCHITECTURAL = 'ARCHITECTURAL'
383
384 def __init__(self, type, arg=None):
385 self.type = type
386 self.arg = arg
387
388 def __repr__(self):
389 return 'RegisterRule(%s, %s)' % (self.type, self.arg)
390
391
392 class CFARule(object):
393 """ A CFA rule is used to compute the CFA for each location. It either
394 consists of a register+offset, or a DWARF expression.
395 """
396 def __init__(self, reg=None, offset=None, expr=None):
397 self.reg = reg
398 self.offset = offset
399 self.expr = expr
400
401 def __repr__(self):
402 return 'CFARule(reg=%s, offset=%s, expr=%s)' % (
403 self.reg, self.offset, self.expr)
404
405
406 # Represents the decoded CFI for an entry, which is just a large table,
407 # according to DWARFv3 section 6.4.1
408 #
409 # DecodedCallFrameTable is a simple named tuple to group together the table
410 # and the register appearance order.
411 #
412 # table:
413 #
414 # A list of dicts that represent "lines" in the decoded table. Each line has
415 # some special dict entries: 'pc' for the location/program counter (LOC),
416 # and 'cfa' for the CFARule to locate the CFA on that line.
417 # The other entries are keyed by register numbers with RegisterRule values,
418 # and describe the rules for these registers.
419 #
420 # reg_order:
421 #
422 # A list of register numbers that are described in the table by the order of
423 # their appearance.
424 #
425 DecodedCallFrameTable = namedtuple(
426 'DecodedCallFrameTable', 'table reg_order')
427
428
429 #---------------- PRIVATE ----------------#
430
431 _PRIMARY_MASK = 0b11000000
432 _PRIMARY_ARG_MASK = 0b00111111
433
434 # This dictionary is filled by automatically scanning the constants module
435 # for DW_CFA_* instructions, and mapping their values to names. Since all
436 # names were imported from constants with `import *`, we look in globals()
437 _OPCODE_NAME_MAP = {}
438 for name in list(iterkeys(globals())):
439 if name.startswith('DW_CFA'):
440 _OPCODE_NAME_MAP[globals()[name]] = name
441
442
443
444