Indirect encoding support (#430)
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging.config import valid_ident
11 from ..construct import (
12 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
13 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
14 Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
15 CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence,
16 Switch, Value
17 )
18 from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128,
19 StreamOffset)
20 from .enums import *
21
22
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
29 by (+).
30
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
32 spec v3):
33
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
36
37 Dwarf_offset:
38 32-bit or 64-bit word, depending on dwarf_format
39
40 Dwarf_length:
41 32-bit or 64-bit word, depending on dwarf_format
42
43 Dwarf_target_addr:
44 32-bit or 64-bit word, depending on address size
45
46 Dwarf_initial_length:
47 "Initial length field" encoding
48 section 7.4
49
50 Dwarf_{u,s}leb128:
51 ULEB128 and SLEB128 variable-length encoding
52
53 Dwarf_CU_header (+):
54 Compilation unit header
55
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
59
60 Dwarf_dw_form (+):
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
63 dummy names.
64
65 Dwarf_lineprog_header (+):
66 Line program header
67
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
70
71 Dwarf_CIE_header (+):
72 A call-frame CIE
73
74 Dwarf_FDE_header (+):
75 A call-frame FDE
76
77 See also the documentation of public methods.
78 """
79 def __init__(self,
80 little_endian, dwarf_format, address_size, dwarf_version=2):
81 """ dwarf_version:
82 Numeric DWARF version
83
84 little_endian:
85 True if the file is little endian, False if big
86
87 dwarf_format:
88 DWARF Format: 32 or 64-bit (see spec section 7.4)
89
90 address_size:
91 Target machine address size, in bytes (4 or 8). (See spec
92 section 7.5.1)
93 """
94 assert dwarf_format == 32 or dwarf_format == 64
95 assert address_size == 8 or address_size == 4, str(address_size)
96 self.little_endian = little_endian
97 self.dwarf_format = dwarf_format
98 self.address_size = address_size
99 self.dwarf_version = dwarf_version
100 self._create_structs()
101
102 def initial_length_field_size(self):
103 """ Size of an initial length field.
104 """
105 return 4 if self.dwarf_format == 32 else 12
106
107 def _create_structs(self):
108 if self.little_endian:
109 self.Dwarf_uint8 = ULInt8
110 self.Dwarf_uint16 = ULInt16
111 self.Dwarf_uint32 = ULInt32
112 self.Dwarf_uint64 = ULInt64
113 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
114 self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
115 self.Dwarf_target_addr = (
116 ULInt32 if self.address_size == 4 else ULInt64)
117 self.Dwarf_int8 = SLInt8
118 self.Dwarf_int16 = SLInt16
119 self.Dwarf_int32 = SLInt32
120 self.Dwarf_int64 = SLInt64
121 else:
122 self.Dwarf_uint8 = UBInt8
123 self.Dwarf_uint16 = UBInt16
124 self.Dwarf_uint32 = UBInt32
125 self.Dwarf_uint64 = UBInt64
126 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
127 self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
128 self.Dwarf_target_addr = (
129 UBInt32 if self.address_size == 4 else UBInt64)
130 self.Dwarf_int8 = SBInt8
131 self.Dwarf_int16 = SBInt16
132 self.Dwarf_int32 = SBInt32
133 self.Dwarf_int64 = SBInt64
134
135 self._create_initial_length()
136 self._create_leb128()
137 self._create_cu_header()
138 self._create_abbrev_declaration()
139 self._create_dw_form()
140 self._create_lineprog_header()
141 self._create_callframe_entry_headers()
142 self._create_aranges_header()
143 self._create_nameLUT_header()
144 self._create_string_offsets_table_header()
145 self._create_address_table_header()
146 self._create_loclists_parsers()
147 self._create_rnglists_parsers()
148
149 def _create_initial_length(self):
150 def _InitialLength(name):
151 # Adapts a Struct that parses forward a full initial length field.
152 # Only if the first word is the continuation value, the second
153 # word is parsed from the stream.
154 return _InitialLengthAdapter(
155 Struct(name,
156 self.Dwarf_uint32('first'),
157 If(lambda ctx: ctx.first == 0xFFFFFFFF,
158 self.Dwarf_uint64('second'),
159 elsevalue=None)))
160 self.Dwarf_initial_length = _InitialLength
161
162 def _create_leb128(self):
163 self.Dwarf_uleb128 = ULEB128
164 self.Dwarf_sleb128 = SLEB128
165
166 def _create_cu_header(self):
167 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
168 self.Dwarf_initial_length('unit_length'),
169 self.Dwarf_uint16('version'),
170 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
171 IfThenElse('', lambda ctx: ctx['version'] >= 5,
172 Embed(Struct('',
173 self.Dwarf_uint8('unit_type'),
174 self.Dwarf_uint8('address_size'),
175 self.Dwarf_offset('debug_abbrev_offset'))),
176 Embed(Struct('',
177 self.Dwarf_offset('debug_abbrev_offset'),
178 self.Dwarf_uint8('address_size'))),
179 ))
180
181 def _create_abbrev_declaration(self):
182 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
183 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
184 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
185 RepeatUntilExcluding(
186 lambda obj, ctx:
187 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
188 Struct('attr_spec',
189 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
190 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM),
191 If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const',
192 self.Dwarf_sleb128('value')))))
193
194 def _create_dw_form(self):
195 self.Dwarf_dw_form = dict(
196 DW_FORM_addr=self.Dwarf_target_addr(''),
197 DW_FORM_addrx=self.Dwarf_uleb128(''),
198 DW_FORM_addrx1=self.Dwarf_uint8(''),
199 DW_FORM_addrx2=self.Dwarf_uint16(''),
200 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
201 DW_FORM_addrx4=self.Dwarf_uint32(''),
202
203 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
204 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
205 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
206 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
207
208 # All DW_FORM_data<n> forms are assumed to be unsigned
209 DW_FORM_data1=self.Dwarf_uint8(''),
210 DW_FORM_data2=self.Dwarf_uint16(''),
211 DW_FORM_data4=self.Dwarf_uint32(''),
212 DW_FORM_data8=self.Dwarf_uint64(''),
213 DW_FORM_sdata=self.Dwarf_sleb128(''),
214 DW_FORM_udata=self.Dwarf_uleb128(''),
215
216 DW_FORM_string=CString(''),
217 DW_FORM_strp=self.Dwarf_offset(''),
218 DW_FORM_line_strp=self.Dwarf_offset(''),
219 DW_FORM_strx1=self.Dwarf_uint8(''),
220 DW_FORM_strx2=self.Dwarf_uint16(''),
221 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
222 DW_FORM_strx4=self.Dwarf_uint64(''),
223 DW_FORM_flag=self.Dwarf_uint8(''),
224
225 DW_FORM_ref=self.Dwarf_uint32(''),
226 DW_FORM_ref1=self.Dwarf_uint8(''),
227 DW_FORM_ref2=self.Dwarf_uint16(''),
228 DW_FORM_ref4=self.Dwarf_uint32(''),
229 DW_FORM_ref8=self.Dwarf_uint64(''),
230 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
231 DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
232
233 DW_FORM_indirect=self.Dwarf_uleb128(''),
234
235 # New forms in DWARFv4
236 DW_FORM_flag_present = StaticField('', 0),
237 DW_FORM_sec_offset = self.Dwarf_offset(''),
238 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
239 DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
240
241 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
242 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
243 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
244
245 # New forms in DWARFv5
246 DW_FORM_loclistx=self.Dwarf_uleb128(''),
247 DW_FORM_rnglistx=self.Dwarf_uleb128('')
248 )
249
250 def _create_aranges_header(self):
251 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
252 self.Dwarf_initial_length('unit_length'),
253 self.Dwarf_uint16('version'),
254 self.Dwarf_offset('debug_info_offset'), # a little tbd
255 self.Dwarf_uint8('address_size'),
256 self.Dwarf_uint8('segment_size')
257 )
258
259 def _create_nameLUT_header(self):
260 self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
261 self.Dwarf_initial_length('unit_length'),
262 self.Dwarf_uint16('version'),
263 self.Dwarf_offset('debug_info_offset'),
264 self.Dwarf_length('debug_info_length')
265 )
266
267 def _create_string_offsets_table_header(self):
268 self.Dwarf_string_offsets_table_header = Struct(
269 "Dwarf_string_offets_table_header",
270 self.Dwarf_initial_length('unit_length'),
271 self.Dwarf_uint16('version'),
272 self.Dwarf_uint16('padding'),
273 )
274
275 def _create_address_table_header(self):
276 self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
277 self.Dwarf_initial_length('unit_length'),
278 self.Dwarf_uint16('version'),
279 self.Dwarf_uint8('address_size'),
280 self.Dwarf_uint8('segment_selector_size'),
281 )
282
283 def _create_lineprog_header(self):
284 # A file entry is terminated by a NULL byte, so we don't want to parse
285 # past it. Therefore an If is used.
286 self.Dwarf_lineprog_file_entry = Struct('file_entry',
287 CString('name'),
288 If(lambda ctx: len(ctx.name) != 0,
289 Embed(Struct('',
290 self.Dwarf_uleb128('dir_index'),
291 self.Dwarf_uleb128('mtime'),
292 self.Dwarf_uleb128('length')))))
293
294 class FormattedEntry(Construct):
295 # Generates a parser based on a previously parsed piece,
296 # similar to deprecared Dynamic.
297 # Strings are resolved later, since it potentially requires
298 # looking at another section.
299 def __init__(self, name, structs, format_field):
300 Construct.__init__(self, name)
301 self.structs = structs
302 self.format_field = format_field
303
304 def _parse(self, stream, context):
305 # Somewhat tricky technique here, explicitly writing back to the context
306 if self.format_field + "_parser" in context:
307 parser = context[self.format_field + "_parser"]
308 else:
309 fields = tuple(
310 Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
311 for f in context[self.format_field])
312 parser = Struct('formatted_entry', *fields)
313 context[self.format_field + "_parser"] = parser
314 return parser._parse(stream, context)
315
316 ver5 = lambda ctx: ctx.version >= 5
317
318 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
319 self.Dwarf_initial_length('unit_length'),
320 self.Dwarf_uint16('version'),
321 If(ver5,
322 self.Dwarf_uint8("address_size"),
323 None),
324 If(ver5,
325 self.Dwarf_uint8("segment_selector_size"),
326 None),
327 self.Dwarf_offset('header_length'),
328 self.Dwarf_uint8('minimum_instruction_length'),
329 If(lambda ctx: ctx.version >= 4,
330 self.Dwarf_uint8("maximum_operations_per_instruction"),
331 1),
332 self.Dwarf_uint8('default_is_stmt'),
333 self.Dwarf_int8('line_base'),
334 self.Dwarf_uint8('line_range'),
335 self.Dwarf_uint8('opcode_base'),
336 Array(lambda ctx: ctx.opcode_base - 1,
337 self.Dwarf_uint8('standard_opcode_lengths')),
338 If(ver5,
339 PrefixedArray(
340 Struct('directory_entry_format',
341 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
342 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
343 self.Dwarf_uint8("directory_entry_format_count"))),
344 If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
345 PrefixedArray(
346 FormattedEntry('directories', self, "directory_entry_format"),
347 self.Dwarf_uleb128('directories_count'))),
348 If(ver5,
349 PrefixedArray(
350 Struct('file_name_entry_format',
351 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
352 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
353 self.Dwarf_uint8("file_name_entry_format_count"))),
354 If(ver5,
355 PrefixedArray(
356 FormattedEntry('file_names', self, "file_name_entry_format"),
357 self.Dwarf_uleb128('file_names_count'))),
358 # Legacy directories/files - DWARF < 5 only
359 If(lambda ctx: ctx.version < 5,
360 RepeatUntilExcluding(
361 lambda obj, ctx: obj == b'',
362 CString('include_directory'))),
363 If(lambda ctx: ctx.version < 5,
364 RepeatUntilExcluding(
365 lambda obj, ctx: len(obj.name) == 0,
366 self.Dwarf_lineprog_file_entry)) # array name is file_entry
367 )
368
369 def _create_callframe_entry_headers(self):
370 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
371 self.Dwarf_initial_length('length'),
372 self.Dwarf_offset('CIE_id'),
373 self.Dwarf_uint8('version'),
374 CString('augmentation'),
375 self.Dwarf_uleb128('code_alignment_factor'),
376 self.Dwarf_sleb128('data_alignment_factor'),
377 self.Dwarf_uleb128('return_address_register'))
378 self.EH_CIE_header = self.Dwarf_CIE_header
379
380 # The CIE header was modified in DWARFv4.
381 if self.dwarf_version == 4:
382 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
383 self.Dwarf_initial_length('length'),
384 self.Dwarf_offset('CIE_id'),
385 self.Dwarf_uint8('version'),
386 CString('augmentation'),
387 self.Dwarf_uint8('address_size'),
388 self.Dwarf_uint8('segment_size'),
389 self.Dwarf_uleb128('code_alignment_factor'),
390 self.Dwarf_sleb128('data_alignment_factor'),
391 self.Dwarf_uleb128('return_address_register'))
392
393 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
394 self.Dwarf_initial_length('length'),
395 self.Dwarf_offset('CIE_pointer'),
396 self.Dwarf_target_addr('initial_location'),
397 self.Dwarf_target_addr('address_range'))
398
399 def _make_block_struct(self, length_field):
400 """ Create a struct for DW_FORM_block<size>
401 """
402 return PrefixedArray(
403 subcon=self.Dwarf_uint8('elem'),
404 length_field=length_field(''))
405
406 def _create_loclists_parsers(self):
407 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
408 """
409 self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
410 StreamOffset('cu_offset'),
411 self.Dwarf_initial_length('unit_length'),
412 Value('is64', lambda ctx: ctx.is64),
413 StreamOffset('offset_after_length'),
414 self.Dwarf_uint16('version'),
415 self.Dwarf_uint8('address_size'),
416 self.Dwarf_uint8('segment_selector_size'),
417 self.Dwarf_uint32('offset_count'),
418 StreamOffset('offset_table_offset'))
419
420 cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
421
422 self.Dwarf_loclists_entries = RepeatUntilExcluding(
423 lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list',
424 Struct('entry',
425 StreamOffset('entry_offset'),
426 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE),
427 Embed(Switch('', lambda ctx: ctx.entry_type,
428 {
429 'DW_LLE_end_of_list' : Struct('end_of_list'),
430 'DW_LLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
431 'DW_LLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld),
432 'DW_LLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld),
433 'DW_LLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld),
434 'DW_LLE_default_location' : Struct('default_location', cld),
435 'DW_LLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
436 'DW_LLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld),
437 'DW_LLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld),
438 })),
439 StreamOffset('entry_end_offset'),
440 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
441
442 self.Dwarf_locview_pair = Struct('locview_pair',
443 StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
444
445 def _create_rnglists_parsers(self):
446 self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header',
447 StreamOffset('cu_offset'),
448 self.Dwarf_initial_length('unit_length'),
449 Value('is64', lambda ctx: ctx.is64),
450 StreamOffset('offset_after_length'),
451 self.Dwarf_uint16('version'),
452 self.Dwarf_uint8('address_size'),
453 self.Dwarf_uint8('segment_selector_size'),
454 self.Dwarf_uint32('offset_count'),
455 StreamOffset('offset_table_offset'))
456
457 self.Dwarf_rnglists_entries = RepeatUntilExcluding(
458 lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list',
459 Struct('entry',
460 StreamOffset('entry_offset'),
461 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_RLE),
462 Embed(Switch('', lambda ctx: ctx.entry_type,
463 {
464 'DW_RLE_end_of_list' : Struct('end_of_list'),
465 'DW_RLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
466 'DW_RLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index')),
467 'DW_RLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length')),
468 'DW_RLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset')),
469 'DW_RLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
470 'DW_RLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address')),
471 'DW_RLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'))
472 })),
473 StreamOffset('entry_end_offset'),
474 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
475
476
477 class _InitialLengthAdapter(Adapter):
478 """ A standard Construct adapter that expects a sub-construct
479 as a struct with one or two values (first, second).
480 """
481 def _decode(self, obj, context):
482 if obj.first < 0xFFFFFF00:
483 context['is64'] = False
484 return obj.first
485 else:
486 if obj.first == 0xFFFFFFFF:
487 context['is64'] = True
488 return obj.second
489 else:
490 raise ConstructError("Failed decoding initial length for %X" % (
491 obj.first))