1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging
.config
import valid_ident
11 from ..construct
import (
12 UBInt8
, UBInt16
, UBInt32
, UBInt64
, ULInt8
, ULInt16
, ULInt32
, ULInt64
,
13 SBInt8
, SBInt16
, SBInt32
, SBInt64
, SLInt8
, SLInt16
, SLInt32
, SLInt64
,
14 Adapter
, Struct
, ConstructError
, If
, Enum
, Array
, PrefixedArray
,
15 CString
, Embed
, StaticField
, IfThenElse
, Construct
, Rename
, Sequence
,
18 from ..common
.construct_utils
import (RepeatUntilExcluding
, ULEB128
, SLEB128
,
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
38 32-bit or 64-bit word, depending on dwarf_format
41 32-bit or 64-bit word, depending on dwarf_format
44 32-bit or 64-bit word, depending on address size
47 "Initial length field" encoding
51 ULEB128 and SLEB128 variable-length encoding
54 Compilation unit header
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
65 Dwarf_lineprog_header (+):
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
77 See also the documentation of public methods.
80 little_endian
, dwarf_format
, address_size
, dwarf_version
=2):
85 True if the file is little endian, False if big
88 DWARF Format: 32 or 64-bit (see spec section 7.4)
91 Target machine address size, in bytes (4 or 8). (See spec
94 assert dwarf_format
== 32 or dwarf_format
== 64
95 assert address_size
== 8 or address_size
== 4, str(address_size
)
96 self
.little_endian
= little_endian
97 self
.dwarf_format
= dwarf_format
98 self
.address_size
= address_size
99 self
.dwarf_version
= dwarf_version
100 self
._create
_structs
()
102 def initial_length_field_size(self
):
103 """ Size of an initial length field.
105 return 4 if self
.dwarf_format
== 32 else 12
107 def _create_structs(self
):
108 if self
.little_endian
:
109 self
.Dwarf_uint8
= ULInt8
110 self
.Dwarf_uint16
= ULInt16
111 self
.Dwarf_uint32
= ULInt32
112 self
.Dwarf_uint64
= ULInt64
113 self
.Dwarf_offset
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
114 self
.Dwarf_length
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
115 self
.Dwarf_target_addr
= (
116 ULInt32
if self
.address_size
== 4 else ULInt64
)
117 self
.Dwarf_int8
= SLInt8
118 self
.Dwarf_int16
= SLInt16
119 self
.Dwarf_int32
= SLInt32
120 self
.Dwarf_int64
= SLInt64
122 self
.Dwarf_uint8
= UBInt8
123 self
.Dwarf_uint16
= UBInt16
124 self
.Dwarf_uint32
= UBInt32
125 self
.Dwarf_uint64
= UBInt64
126 self
.Dwarf_offset
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
127 self
.Dwarf_length
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
128 self
.Dwarf_target_addr
= (
129 UBInt32
if self
.address_size
== 4 else UBInt64
)
130 self
.Dwarf_int8
= SBInt8
131 self
.Dwarf_int16
= SBInt16
132 self
.Dwarf_int32
= SBInt32
133 self
.Dwarf_int64
= SBInt64
135 self
._create
_initial
_length
()
136 self
._create
_leb
128()
137 self
._create
_cu
_header
()
138 self
._create
_abbrev
_declaration
()
139 self
._create
_dw
_form
()
140 self
._create
_lineprog
_header
()
141 self
._create
_callframe
_entry
_headers
()
142 self
._create
_aranges
_header
()
143 self
._create
_nameLUT
_header
()
144 self
._create
_string
_offsets
_table
_header
()
145 self
._create
_address
_table
_header
()
146 self
._create
_loclists
_parsers
()
147 self
._create
_rnglists
_parsers
()
149 self
._create
_debugsup
()
150 self
._create
_gnu
_debugaltlink
()
152 def _create_initial_length(self
):
153 def _InitialLength(name
):
154 # Adapts a Struct that parses forward a full initial length field.
155 # Only if the first word is the continuation value, the second
156 # word is parsed from the stream.
157 return _InitialLengthAdapter(
159 self
.Dwarf_uint32('first'),
160 If(lambda ctx
: ctx
.first
== 0xFFFFFFFF,
161 self
.Dwarf_uint64('second'),
163 self
.Dwarf_initial_length
= _InitialLength
165 def _create_leb128(self
):
166 self
.Dwarf_uleb128
= ULEB128
167 self
.Dwarf_sleb128
= SLEB128
169 def _create_cu_header(self
):
170 self
.Dwarf_CU_header
= Struct('Dwarf_CU_header',
171 self
.Dwarf_initial_length('unit_length'),
172 self
.Dwarf_uint16('version'),
173 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
174 IfThenElse('', lambda ctx
: ctx
['version'] >= 5,
176 self
.Dwarf_uint8('unit_type'),
177 self
.Dwarf_uint8('address_size'),
178 self
.Dwarf_offset('debug_abbrev_offset'))),
180 self
.Dwarf_offset('debug_abbrev_offset'),
181 self
.Dwarf_uint8('address_size'))),
184 def _create_abbrev_declaration(self
):
185 self
.Dwarf_abbrev_declaration
= Struct('Dwarf_abbrev_entry',
186 Enum(self
.Dwarf_uleb128('tag'), **ENUM_DW_TAG
),
187 Enum(self
.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN
),
188 RepeatUntilExcluding(
190 obj
.name
== 'DW_AT_null' and obj
.form
== 'DW_FORM_null',
192 Enum(self
.Dwarf_uleb128('name'), **ENUM_DW_AT
),
193 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
),
194 If(lambda ctx
: ctx
['form'] == 'DW_FORM_implicit_const',
195 self
.Dwarf_sleb128('value')))))
197 def _create_debugsup(self
):
198 # We don't care about checksums, for now.
199 self
.Dwarf_debugsup
= Struct('Elf_debugsup',
200 self
.Dwarf_int16('version'),
201 self
.Dwarf_uint8('is_supplementary'),
202 CString('sup_filename'))
204 def _create_gnu_debugaltlink(self
):
205 self
.Dwarf_debugaltlink
= Struct('Elf_debugaltlink',
206 CString("sup_filename"),
207 String("sup_checksum", length
=20))
209 def _create_dw_form(self
):
210 self
.Dwarf_dw_form
= dict(
211 DW_FORM_addr
=self
.Dwarf_target_addr(''),
212 DW_FORM_addrx
=self
.Dwarf_uleb128(''),
213 DW_FORM_addrx1
=self
.Dwarf_uint8(''),
214 DW_FORM_addrx2
=self
.Dwarf_uint16(''),
215 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
216 DW_FORM_addrx4
=self
.Dwarf_uint32(''),
218 DW_FORM_block1
=self
._make
_block
_struct
(self
.Dwarf_uint8
),
219 DW_FORM_block2
=self
._make
_block
_struct
(self
.Dwarf_uint16
),
220 DW_FORM_block4
=self
._make
_block
_struct
(self
.Dwarf_uint32
),
221 DW_FORM_block
=self
._make
_block
_struct
(self
.Dwarf_uleb128
),
223 # All DW_FORM_data<n> forms are assumed to be unsigned
224 DW_FORM_data1
=self
.Dwarf_uint8(''),
225 DW_FORM_data2
=self
.Dwarf_uint16(''),
226 DW_FORM_data4
=self
.Dwarf_uint32(''),
227 DW_FORM_data8
=self
.Dwarf_uint64(''),
228 DW_FORM_data16
=Array(16, self
.Dwarf_uint8('')), # Used for hashes and such, not for integers
229 DW_FORM_sdata
=self
.Dwarf_sleb128(''),
230 DW_FORM_udata
=self
.Dwarf_uleb128(''),
232 DW_FORM_string
=CString(''),
233 DW_FORM_strp
=self
.Dwarf_offset(''),
234 DW_FORM_strp_sup
=self
.Dwarf_offset(''),
235 DW_FORM_line_strp
=self
.Dwarf_offset(''),
236 DW_FORM_strx1
=self
.Dwarf_uint8(''),
237 DW_FORM_strx2
=self
.Dwarf_uint16(''),
238 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
239 DW_FORM_strx4
=self
.Dwarf_uint64(''),
240 DW_FORM_flag
=self
.Dwarf_uint8(''),
242 DW_FORM_ref
=self
.Dwarf_uint32(''),
243 DW_FORM_ref1
=self
.Dwarf_uint8(''),
244 DW_FORM_ref2
=self
.Dwarf_uint16(''),
245 DW_FORM_ref4
=self
.Dwarf_uint32(''),
246 DW_FORM_ref_sup4
=self
.Dwarf_uint32(''),
247 DW_FORM_ref8
=self
.Dwarf_uint64(''),
248 DW_FORM_ref_sup8
=self
.Dwarf_uint64(''),
249 DW_FORM_ref_udata
=self
.Dwarf_uleb128(''),
250 DW_FORM_ref_addr
=self
.Dwarf_target_addr('') if self
.dwarf_version
== 2 else self
.Dwarf_offset(''),
252 DW_FORM_indirect
=self
.Dwarf_uleb128(''),
254 # New forms in DWARFv4
255 DW_FORM_flag_present
= StaticField('', 0),
256 DW_FORM_sec_offset
= self
.Dwarf_offset(''),
257 DW_FORM_exprloc
= self
._make
_block
_struct
(self
.Dwarf_uleb128
),
258 DW_FORM_ref_sig8
= self
.Dwarf_uint64(''),
260 DW_FORM_GNU_strp_alt
=self
.Dwarf_offset(''),
261 DW_FORM_GNU_ref_alt
=self
.Dwarf_offset(''),
262 DW_AT_GNU_all_call_sites
=self
.Dwarf_uleb128(''),
264 # New forms in DWARFv5
265 DW_FORM_loclistx
=self
.Dwarf_uleb128(''),
266 DW_FORM_rnglistx
=self
.Dwarf_uleb128('')
269 def _create_aranges_header(self
):
270 self
.Dwarf_aranges_header
= Struct("Dwarf_aranges_header",
271 self
.Dwarf_initial_length('unit_length'),
272 self
.Dwarf_uint16('version'),
273 self
.Dwarf_offset('debug_info_offset'), # a little tbd
274 self
.Dwarf_uint8('address_size'),
275 self
.Dwarf_uint8('segment_size')
278 def _create_nameLUT_header(self
):
279 self
.Dwarf_nameLUT_header
= Struct("Dwarf_nameLUT_header",
280 self
.Dwarf_initial_length('unit_length'),
281 self
.Dwarf_uint16('version'),
282 self
.Dwarf_offset('debug_info_offset'),
283 self
.Dwarf_length('debug_info_length')
286 def _create_string_offsets_table_header(self
):
287 self
.Dwarf_string_offsets_table_header
= Struct(
288 "Dwarf_string_offets_table_header",
289 self
.Dwarf_initial_length('unit_length'),
290 self
.Dwarf_uint16('version'),
291 self
.Dwarf_uint16('padding'),
294 def _create_address_table_header(self
):
295 self
.Dwarf_address_table_header
= Struct("Dwarf_address_table_header",
296 self
.Dwarf_initial_length('unit_length'),
297 self
.Dwarf_uint16('version'),
298 self
.Dwarf_uint8('address_size'),
299 self
.Dwarf_uint8('segment_selector_size'),
302 def _create_lineprog_header(self
):
303 # A file entry is terminated by a NULL byte, so we don't want to parse
304 # past it. Therefore an If is used.
305 self
.Dwarf_lineprog_file_entry
= Struct('file_entry',
307 If(lambda ctx
: len(ctx
.name
) != 0,
309 self
.Dwarf_uleb128('dir_index'),
310 self
.Dwarf_uleb128('mtime'),
311 self
.Dwarf_uleb128('length')))))
313 class FormattedEntry(Construct
):
314 # Generates a parser based on a previously parsed piece,
315 # similar to deprecared Dynamic.
316 # Strings are resolved later, since it potentially requires
317 # looking at another section.
318 def __init__(self
, name
, structs
, format_field
):
319 Construct
.__init
__(self
, name
)
320 self
.structs
= structs
321 self
.format_field
= format_field
323 def _parse(self
, stream
, context
):
324 # Somewhat tricky technique here, explicitly writing back to the context
325 if self
.format_field
+ "_parser" in context
:
326 parser
= context
[self
.format_field
+ "_parser"]
329 Rename(f
.content_type
, self
.structs
.Dwarf_dw_form
[f
.form
])
330 for f
in context
[self
.format_field
])
331 parser
= Struct('formatted_entry', *fields
)
332 context
[self
.format_field
+ "_parser"] = parser
333 return parser
._parse
(stream
, context
)
335 ver5
= lambda ctx
: ctx
.version
>= 5
337 self
.Dwarf_lineprog_header
= Struct('Dwarf_lineprog_header',
338 self
.Dwarf_initial_length('unit_length'),
339 self
.Dwarf_uint16('version'),
341 self
.Dwarf_uint8("address_size"),
344 self
.Dwarf_uint8("segment_selector_size"),
346 self
.Dwarf_offset('header_length'),
347 self
.Dwarf_uint8('minimum_instruction_length'),
348 If(lambda ctx
: ctx
.version
>= 4,
349 self
.Dwarf_uint8("maximum_operations_per_instruction"),
351 self
.Dwarf_uint8('default_is_stmt'),
352 self
.Dwarf_int8('line_base'),
353 self
.Dwarf_uint8('line_range'),
354 self
.Dwarf_uint8('opcode_base'),
355 Array(lambda ctx
: ctx
.opcode_base
- 1,
356 self
.Dwarf_uint8('standard_opcode_lengths')),
359 Struct('directory_entry_format',
360 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
361 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
362 self
.Dwarf_uint8("directory_entry_format_count"))),
363 If(ver5
, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
365 FormattedEntry('directories', self
, "directory_entry_format"),
366 self
.Dwarf_uleb128('directories_count'))),
369 Struct('file_name_entry_format',
370 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
371 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
372 self
.Dwarf_uint8("file_name_entry_format_count"))),
375 FormattedEntry('file_names', self
, "file_name_entry_format"),
376 self
.Dwarf_uleb128('file_names_count'))),
377 # Legacy directories/files - DWARF < 5 only
378 If(lambda ctx
: ctx
.version
< 5,
379 RepeatUntilExcluding(
380 lambda obj
, ctx
: obj
== b
'',
381 CString('include_directory'))),
382 If(lambda ctx
: ctx
.version
< 5,
383 RepeatUntilExcluding(
384 lambda obj
, ctx
: len(obj
.name
) == 0,
385 self
.Dwarf_lineprog_file_entry
)) # array name is file_entry
388 def _create_callframe_entry_headers(self
):
389 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
390 self
.Dwarf_initial_length('length'),
391 self
.Dwarf_offset('CIE_id'),
392 self
.Dwarf_uint8('version'),
393 CString('augmentation'),
394 self
.Dwarf_uleb128('code_alignment_factor'),
395 self
.Dwarf_sleb128('data_alignment_factor'),
396 self
.Dwarf_uleb128('return_address_register'))
397 self
.EH_CIE_header
= self
.Dwarf_CIE_header
399 # The CIE header was modified in DWARFv4.
400 if self
.dwarf_version
== 4:
401 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
402 self
.Dwarf_initial_length('length'),
403 self
.Dwarf_offset('CIE_id'),
404 self
.Dwarf_uint8('version'),
405 CString('augmentation'),
406 self
.Dwarf_uint8('address_size'),
407 self
.Dwarf_uint8('segment_size'),
408 self
.Dwarf_uleb128('code_alignment_factor'),
409 self
.Dwarf_sleb128('data_alignment_factor'),
410 self
.Dwarf_uleb128('return_address_register'))
412 self
.Dwarf_FDE_header
= Struct('Dwarf_FDE_header',
413 self
.Dwarf_initial_length('length'),
414 self
.Dwarf_offset('CIE_pointer'),
415 self
.Dwarf_target_addr('initial_location'),
416 self
.Dwarf_target_addr('address_range'))
418 def _make_block_struct(self
, length_field
):
419 """ Create a struct for DW_FORM_block<size>
421 return PrefixedArray(
422 subcon
=self
.Dwarf_uint8('elem'),
423 length_field
=length_field(''))
425 def _create_loclists_parsers(self
):
426 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
428 self
.Dwarf_loclists_CU_header
= Struct('Dwarf_loclists_CU_header',
429 StreamOffset('cu_offset'),
430 self
.Dwarf_initial_length('unit_length'),
431 Value('is64', lambda ctx
: ctx
.is64
),
432 StreamOffset('offset_after_length'),
433 self
.Dwarf_uint16('version'),
434 self
.Dwarf_uint8('address_size'),
435 self
.Dwarf_uint8('segment_selector_size'),
436 self
.Dwarf_uint32('offset_count'),
437 StreamOffset('offset_table_offset'))
439 cld
= self
.Dwarf_loclists_counted_location_description
= PrefixedArray(self
.Dwarf_uint8('loc_expr'), self
.Dwarf_uleb128(''))
441 self
.Dwarf_loclists_entries
= RepeatUntilExcluding(
442 lambda obj
, ctx
: obj
.entry_type
== 'DW_LLE_end_of_list',
444 StreamOffset('entry_offset'),
445 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_LLE
),
446 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
448 'DW_LLE_end_of_list' : Struct('end_of_list'),
449 'DW_LLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
450 'DW_LLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index'), cld
),
451 'DW_LLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length'), cld
),
452 'DW_LLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset'), cld
),
453 'DW_LLE_default_location' : Struct('default_location', cld
),
454 'DW_LLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
455 'DW_LLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address'), cld
),
456 'DW_LLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'), cld
),
458 StreamOffset('entry_end_offset'),
459 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
461 self
.Dwarf_locview_pair
= Struct('locview_pair',
462 StreamOffset('entry_offset'), self
.Dwarf_uleb128('begin'), self
.Dwarf_uleb128('end'))
464 def _create_rnglists_parsers(self
):
465 self
.Dwarf_rnglists_CU_header
= Struct('Dwarf_rnglists_CU_header',
466 StreamOffset('cu_offset'),
467 self
.Dwarf_initial_length('unit_length'),
468 Value('is64', lambda ctx
: ctx
.is64
),
469 StreamOffset('offset_after_length'),
470 self
.Dwarf_uint16('version'),
471 self
.Dwarf_uint8('address_size'),
472 self
.Dwarf_uint8('segment_selector_size'),
473 self
.Dwarf_uint32('offset_count'),
474 StreamOffset('offset_table_offset'))
476 self
.Dwarf_rnglists_entries
= RepeatUntilExcluding(
477 lambda obj
, ctx
: obj
.entry_type
== 'DW_RLE_end_of_list',
479 StreamOffset('entry_offset'),
480 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_RLE
),
481 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
483 'DW_RLE_end_of_list' : Struct('end_of_list'),
484 'DW_RLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
485 'DW_RLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index')),
486 'DW_RLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length')),
487 'DW_RLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset')),
488 'DW_RLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
489 'DW_RLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address')),
490 'DW_RLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'))
492 StreamOffset('entry_end_offset'),
493 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
496 class _InitialLengthAdapter(Adapter
):
497 """ A standard Construct adapter that expects a sub-construct
498 as a struct with one or two values (first, second).
500 def _decode(self
, obj
, context
):
501 if obj
.first
< 0xFFFFFF00:
502 context
['is64'] = False
505 if obj
.first
== 0xFFFFFFFF:
506 context
['is64'] = True
509 raise ConstructError("Failed decoding initial length for %X" % (