Support for DW_FORM_data16, unit test (#437)
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging.config import valid_ident
11 from ..construct import (
12 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
13 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
14 Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
15 CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence,
16 String, Switch, Value
17 )
18 from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128,
19 StreamOffset)
20 from .enums import *
21
22
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
29 by (+).
30
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
32 spec v3):
33
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
36
37 Dwarf_offset:
38 32-bit or 64-bit word, depending on dwarf_format
39
40 Dwarf_length:
41 32-bit or 64-bit word, depending on dwarf_format
42
43 Dwarf_target_addr:
44 32-bit or 64-bit word, depending on address size
45
46 Dwarf_initial_length:
47 "Initial length field" encoding
48 section 7.4
49
50 Dwarf_{u,s}leb128:
51 ULEB128 and SLEB128 variable-length encoding
52
53 Dwarf_CU_header (+):
54 Compilation unit header
55
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
59
60 Dwarf_dw_form (+):
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
63 dummy names.
64
65 Dwarf_lineprog_header (+):
66 Line program header
67
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
70
71 Dwarf_CIE_header (+):
72 A call-frame CIE
73
74 Dwarf_FDE_header (+):
75 A call-frame FDE
76
77 See also the documentation of public methods.
78 """
79 def __init__(self,
80 little_endian, dwarf_format, address_size, dwarf_version=2):
81 """ dwarf_version:
82 Numeric DWARF version
83
84 little_endian:
85 True if the file is little endian, False if big
86
87 dwarf_format:
88 DWARF Format: 32 or 64-bit (see spec section 7.4)
89
90 address_size:
91 Target machine address size, in bytes (4 or 8). (See spec
92 section 7.5.1)
93 """
94 assert dwarf_format == 32 or dwarf_format == 64
95 assert address_size == 8 or address_size == 4, str(address_size)
96 self.little_endian = little_endian
97 self.dwarf_format = dwarf_format
98 self.address_size = address_size
99 self.dwarf_version = dwarf_version
100 self._create_structs()
101
102 def initial_length_field_size(self):
103 """ Size of an initial length field.
104 """
105 return 4 if self.dwarf_format == 32 else 12
106
107 def _create_structs(self):
108 if self.little_endian:
109 self.Dwarf_uint8 = ULInt8
110 self.Dwarf_uint16 = ULInt16
111 self.Dwarf_uint32 = ULInt32
112 self.Dwarf_uint64 = ULInt64
113 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
114 self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
115 self.Dwarf_target_addr = (
116 ULInt32 if self.address_size == 4 else ULInt64)
117 self.Dwarf_int8 = SLInt8
118 self.Dwarf_int16 = SLInt16
119 self.Dwarf_int32 = SLInt32
120 self.Dwarf_int64 = SLInt64
121 else:
122 self.Dwarf_uint8 = UBInt8
123 self.Dwarf_uint16 = UBInt16
124 self.Dwarf_uint32 = UBInt32
125 self.Dwarf_uint64 = UBInt64
126 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
127 self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
128 self.Dwarf_target_addr = (
129 UBInt32 if self.address_size == 4 else UBInt64)
130 self.Dwarf_int8 = SBInt8
131 self.Dwarf_int16 = SBInt16
132 self.Dwarf_int32 = SBInt32
133 self.Dwarf_int64 = SBInt64
134
135 self._create_initial_length()
136 self._create_leb128()
137 self._create_cu_header()
138 self._create_abbrev_declaration()
139 self._create_dw_form()
140 self._create_lineprog_header()
141 self._create_callframe_entry_headers()
142 self._create_aranges_header()
143 self._create_nameLUT_header()
144 self._create_string_offsets_table_header()
145 self._create_address_table_header()
146 self._create_loclists_parsers()
147 self._create_rnglists_parsers()
148
149 self._create_debugsup()
150 self._create_gnu_debugaltlink()
151
152 def _create_initial_length(self):
153 def _InitialLength(name):
154 # Adapts a Struct that parses forward a full initial length field.
155 # Only if the first word is the continuation value, the second
156 # word is parsed from the stream.
157 return _InitialLengthAdapter(
158 Struct(name,
159 self.Dwarf_uint32('first'),
160 If(lambda ctx: ctx.first == 0xFFFFFFFF,
161 self.Dwarf_uint64('second'),
162 elsevalue=None)))
163 self.Dwarf_initial_length = _InitialLength
164
165 def _create_leb128(self):
166 self.Dwarf_uleb128 = ULEB128
167 self.Dwarf_sleb128 = SLEB128
168
169 def _create_cu_header(self):
170 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
171 self.Dwarf_initial_length('unit_length'),
172 self.Dwarf_uint16('version'),
173 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
174 IfThenElse('', lambda ctx: ctx['version'] >= 5,
175 Embed(Struct('',
176 self.Dwarf_uint8('unit_type'),
177 self.Dwarf_uint8('address_size'),
178 self.Dwarf_offset('debug_abbrev_offset'))),
179 Embed(Struct('',
180 self.Dwarf_offset('debug_abbrev_offset'),
181 self.Dwarf_uint8('address_size'))),
182 ))
183
184 def _create_abbrev_declaration(self):
185 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
186 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
187 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
188 RepeatUntilExcluding(
189 lambda obj, ctx:
190 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
191 Struct('attr_spec',
192 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
193 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM),
194 If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const',
195 self.Dwarf_sleb128('value')))))
196
197 def _create_debugsup(self):
198 # We don't care about checksums, for now.
199 self.Dwarf_debugsup = Struct('Elf_debugsup',
200 self.Dwarf_int16('version'),
201 self.Dwarf_uint8('is_supplementary'),
202 CString('sup_filename'))
203
204 def _create_gnu_debugaltlink(self):
205 self.Dwarf_debugaltlink = Struct('Elf_debugaltlink',
206 CString("sup_filename"),
207 String("sup_checksum", length=20))
208
209 def _create_dw_form(self):
210 self.Dwarf_dw_form = dict(
211 DW_FORM_addr=self.Dwarf_target_addr(''),
212 DW_FORM_addrx=self.Dwarf_uleb128(''),
213 DW_FORM_addrx1=self.Dwarf_uint8(''),
214 DW_FORM_addrx2=self.Dwarf_uint16(''),
215 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
216 DW_FORM_addrx4=self.Dwarf_uint32(''),
217
218 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
219 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
220 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
221 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
222
223 # All DW_FORM_data<n> forms are assumed to be unsigned
224 DW_FORM_data1=self.Dwarf_uint8(''),
225 DW_FORM_data2=self.Dwarf_uint16(''),
226 DW_FORM_data4=self.Dwarf_uint32(''),
227 DW_FORM_data8=self.Dwarf_uint64(''),
228 DW_FORM_data16=Array(16, self.Dwarf_uint8('')), # Used for hashes and such, not for integers
229 DW_FORM_sdata=self.Dwarf_sleb128(''),
230 DW_FORM_udata=self.Dwarf_uleb128(''),
231
232 DW_FORM_string=CString(''),
233 DW_FORM_strp=self.Dwarf_offset(''),
234 DW_FORM_strp_sup=self.Dwarf_offset(''),
235 DW_FORM_line_strp=self.Dwarf_offset(''),
236 DW_FORM_strx1=self.Dwarf_uint8(''),
237 DW_FORM_strx2=self.Dwarf_uint16(''),
238 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
239 DW_FORM_strx4=self.Dwarf_uint64(''),
240 DW_FORM_flag=self.Dwarf_uint8(''),
241
242 DW_FORM_ref=self.Dwarf_uint32(''),
243 DW_FORM_ref1=self.Dwarf_uint8(''),
244 DW_FORM_ref2=self.Dwarf_uint16(''),
245 DW_FORM_ref4=self.Dwarf_uint32(''),
246 DW_FORM_ref_sup4=self.Dwarf_uint32(''),
247 DW_FORM_ref8=self.Dwarf_uint64(''),
248 DW_FORM_ref_sup8=self.Dwarf_uint64(''),
249 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
250 DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
251
252 DW_FORM_indirect=self.Dwarf_uleb128(''),
253
254 # New forms in DWARFv4
255 DW_FORM_flag_present = StaticField('', 0),
256 DW_FORM_sec_offset = self.Dwarf_offset(''),
257 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
258 DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
259
260 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
261 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
262 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
263
264 # New forms in DWARFv5
265 DW_FORM_loclistx=self.Dwarf_uleb128(''),
266 DW_FORM_rnglistx=self.Dwarf_uleb128('')
267 )
268
269 def _create_aranges_header(self):
270 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
271 self.Dwarf_initial_length('unit_length'),
272 self.Dwarf_uint16('version'),
273 self.Dwarf_offset('debug_info_offset'), # a little tbd
274 self.Dwarf_uint8('address_size'),
275 self.Dwarf_uint8('segment_size')
276 )
277
278 def _create_nameLUT_header(self):
279 self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
280 self.Dwarf_initial_length('unit_length'),
281 self.Dwarf_uint16('version'),
282 self.Dwarf_offset('debug_info_offset'),
283 self.Dwarf_length('debug_info_length')
284 )
285
286 def _create_string_offsets_table_header(self):
287 self.Dwarf_string_offsets_table_header = Struct(
288 "Dwarf_string_offets_table_header",
289 self.Dwarf_initial_length('unit_length'),
290 self.Dwarf_uint16('version'),
291 self.Dwarf_uint16('padding'),
292 )
293
294 def _create_address_table_header(self):
295 self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
296 self.Dwarf_initial_length('unit_length'),
297 self.Dwarf_uint16('version'),
298 self.Dwarf_uint8('address_size'),
299 self.Dwarf_uint8('segment_selector_size'),
300 )
301
302 def _create_lineprog_header(self):
303 # A file entry is terminated by a NULL byte, so we don't want to parse
304 # past it. Therefore an If is used.
305 self.Dwarf_lineprog_file_entry = Struct('file_entry',
306 CString('name'),
307 If(lambda ctx: len(ctx.name) != 0,
308 Embed(Struct('',
309 self.Dwarf_uleb128('dir_index'),
310 self.Dwarf_uleb128('mtime'),
311 self.Dwarf_uleb128('length')))))
312
313 class FormattedEntry(Construct):
314 # Generates a parser based on a previously parsed piece,
315 # similar to deprecared Dynamic.
316 # Strings are resolved later, since it potentially requires
317 # looking at another section.
318 def __init__(self, name, structs, format_field):
319 Construct.__init__(self, name)
320 self.structs = structs
321 self.format_field = format_field
322
323 def _parse(self, stream, context):
324 # Somewhat tricky technique here, explicitly writing back to the context
325 if self.format_field + "_parser" in context:
326 parser = context[self.format_field + "_parser"]
327 else:
328 fields = tuple(
329 Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
330 for f in context[self.format_field])
331 parser = Struct('formatted_entry', *fields)
332 context[self.format_field + "_parser"] = parser
333 return parser._parse(stream, context)
334
335 ver5 = lambda ctx: ctx.version >= 5
336
337 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
338 self.Dwarf_initial_length('unit_length'),
339 self.Dwarf_uint16('version'),
340 If(ver5,
341 self.Dwarf_uint8("address_size"),
342 None),
343 If(ver5,
344 self.Dwarf_uint8("segment_selector_size"),
345 None),
346 self.Dwarf_offset('header_length'),
347 self.Dwarf_uint8('minimum_instruction_length'),
348 If(lambda ctx: ctx.version >= 4,
349 self.Dwarf_uint8("maximum_operations_per_instruction"),
350 1),
351 self.Dwarf_uint8('default_is_stmt'),
352 self.Dwarf_int8('line_base'),
353 self.Dwarf_uint8('line_range'),
354 self.Dwarf_uint8('opcode_base'),
355 Array(lambda ctx: ctx.opcode_base - 1,
356 self.Dwarf_uint8('standard_opcode_lengths')),
357 If(ver5,
358 PrefixedArray(
359 Struct('directory_entry_format',
360 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
361 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
362 self.Dwarf_uint8("directory_entry_format_count"))),
363 If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
364 PrefixedArray(
365 FormattedEntry('directories', self, "directory_entry_format"),
366 self.Dwarf_uleb128('directories_count'))),
367 If(ver5,
368 PrefixedArray(
369 Struct('file_name_entry_format',
370 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
371 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
372 self.Dwarf_uint8("file_name_entry_format_count"))),
373 If(ver5,
374 PrefixedArray(
375 FormattedEntry('file_names', self, "file_name_entry_format"),
376 self.Dwarf_uleb128('file_names_count'))),
377 # Legacy directories/files - DWARF < 5 only
378 If(lambda ctx: ctx.version < 5,
379 RepeatUntilExcluding(
380 lambda obj, ctx: obj == b'',
381 CString('include_directory'))),
382 If(lambda ctx: ctx.version < 5,
383 RepeatUntilExcluding(
384 lambda obj, ctx: len(obj.name) == 0,
385 self.Dwarf_lineprog_file_entry)) # array name is file_entry
386 )
387
388 def _create_callframe_entry_headers(self):
389 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
390 self.Dwarf_initial_length('length'),
391 self.Dwarf_offset('CIE_id'),
392 self.Dwarf_uint8('version'),
393 CString('augmentation'),
394 self.Dwarf_uleb128('code_alignment_factor'),
395 self.Dwarf_sleb128('data_alignment_factor'),
396 self.Dwarf_uleb128('return_address_register'))
397 self.EH_CIE_header = self.Dwarf_CIE_header
398
399 # The CIE header was modified in DWARFv4.
400 if self.dwarf_version == 4:
401 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
402 self.Dwarf_initial_length('length'),
403 self.Dwarf_offset('CIE_id'),
404 self.Dwarf_uint8('version'),
405 CString('augmentation'),
406 self.Dwarf_uint8('address_size'),
407 self.Dwarf_uint8('segment_size'),
408 self.Dwarf_uleb128('code_alignment_factor'),
409 self.Dwarf_sleb128('data_alignment_factor'),
410 self.Dwarf_uleb128('return_address_register'))
411
412 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
413 self.Dwarf_initial_length('length'),
414 self.Dwarf_offset('CIE_pointer'),
415 self.Dwarf_target_addr('initial_location'),
416 self.Dwarf_target_addr('address_range'))
417
418 def _make_block_struct(self, length_field):
419 """ Create a struct for DW_FORM_block<size>
420 """
421 return PrefixedArray(
422 subcon=self.Dwarf_uint8('elem'),
423 length_field=length_field(''))
424
425 def _create_loclists_parsers(self):
426 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
427 """
428 self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
429 StreamOffset('cu_offset'),
430 self.Dwarf_initial_length('unit_length'),
431 Value('is64', lambda ctx: ctx.is64),
432 StreamOffset('offset_after_length'),
433 self.Dwarf_uint16('version'),
434 self.Dwarf_uint8('address_size'),
435 self.Dwarf_uint8('segment_selector_size'),
436 self.Dwarf_uint32('offset_count'),
437 StreamOffset('offset_table_offset'))
438
439 cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
440
441 self.Dwarf_loclists_entries = RepeatUntilExcluding(
442 lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list',
443 Struct('entry',
444 StreamOffset('entry_offset'),
445 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE),
446 Embed(Switch('', lambda ctx: ctx.entry_type,
447 {
448 'DW_LLE_end_of_list' : Struct('end_of_list'),
449 'DW_LLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
450 'DW_LLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld),
451 'DW_LLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld),
452 'DW_LLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld),
453 'DW_LLE_default_location' : Struct('default_location', cld),
454 'DW_LLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
455 'DW_LLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld),
456 'DW_LLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld),
457 })),
458 StreamOffset('entry_end_offset'),
459 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
460
461 self.Dwarf_locview_pair = Struct('locview_pair',
462 StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
463
464 def _create_rnglists_parsers(self):
465 self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header',
466 StreamOffset('cu_offset'),
467 self.Dwarf_initial_length('unit_length'),
468 Value('is64', lambda ctx: ctx.is64),
469 StreamOffset('offset_after_length'),
470 self.Dwarf_uint16('version'),
471 self.Dwarf_uint8('address_size'),
472 self.Dwarf_uint8('segment_selector_size'),
473 self.Dwarf_uint32('offset_count'),
474 StreamOffset('offset_table_offset'))
475
476 self.Dwarf_rnglists_entries = RepeatUntilExcluding(
477 lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list',
478 Struct('entry',
479 StreamOffset('entry_offset'),
480 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_RLE),
481 Embed(Switch('', lambda ctx: ctx.entry_type,
482 {
483 'DW_RLE_end_of_list' : Struct('end_of_list'),
484 'DW_RLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
485 'DW_RLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index')),
486 'DW_RLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length')),
487 'DW_RLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset')),
488 'DW_RLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
489 'DW_RLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address')),
490 'DW_RLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'))
491 })),
492 StreamOffset('entry_end_offset'),
493 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
494
495
496 class _InitialLengthAdapter(Adapter):
497 """ A standard Construct adapter that expects a sub-construct
498 as a struct with one or two values (first, second).
499 """
500 def _decode(self, obj, context):
501 if obj.first < 0xFFFFFF00:
502 context['is64'] = False
503 return obj.first
504 else:
505 if obj.first == 0xFFFFFFFF:
506 context['is64'] = True
507 return obj.second
508 else:
509 raise ConstructError("Failed decoding initial length for %X" % (
510 obj.first))