1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging
.config
import valid_ident
11 from ..construct
import (
12 UBInt8
, UBInt16
, UBInt32
, UBInt64
, ULInt8
, ULInt16
, ULInt32
, ULInt64
,
13 SBInt8
, SBInt16
, SBInt32
, SBInt64
, SLInt8
, SLInt16
, SLInt32
, SLInt64
,
14 Adapter
, Struct
, ConstructError
, If
, Enum
, Array
, PrefixedArray
,
15 CString
, Embed
, StaticField
, IfThenElse
, Construct
, Rename
, Sequence
,
18 from ..common
.construct_utils
import (RepeatUntilExcluding
, ULEB128
, SLEB128
,
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
38 32-bit or 64-bit word, depending on dwarf_format
41 32-bit or 64-bit word, depending on dwarf_format
44 32-bit or 64-bit word, depending on address size
47 "Initial length field" encoding
51 ULEB128 and SLEB128 variable-length encoding
54 Compilation unit header
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
65 Dwarf_lineprog_header (+):
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
77 See also the documentation of public methods.
80 little_endian
, dwarf_format
, address_size
, dwarf_version
=2):
85 True if the file is little endian, False if big
88 DWARF Format: 32 or 64-bit (see spec section 7.4)
91 Target machine address size, in bytes (4 or 8). (See spec
94 assert dwarf_format
== 32 or dwarf_format
== 64
95 assert address_size
== 8 or address_size
== 4, str(address_size
)
96 self
.little_endian
= little_endian
97 self
.dwarf_format
= dwarf_format
98 self
.address_size
= address_size
99 self
.dwarf_version
= dwarf_version
100 self
._create
_structs
()
102 def initial_length_field_size(self
):
103 """ Size of an initial length field.
105 return 4 if self
.dwarf_format
== 32 else 12
107 def _create_structs(self
):
108 if self
.little_endian
:
109 self
.Dwarf_uint8
= ULInt8
110 self
.Dwarf_uint16
= ULInt16
111 self
.Dwarf_uint32
= ULInt32
112 self
.Dwarf_uint64
= ULInt64
113 self
.Dwarf_offset
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
114 self
.Dwarf_length
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
115 self
.Dwarf_target_addr
= (
116 ULInt32
if self
.address_size
== 4 else ULInt64
)
117 self
.Dwarf_int8
= SLInt8
118 self
.Dwarf_int16
= SLInt16
119 self
.Dwarf_int32
= SLInt32
120 self
.Dwarf_int64
= SLInt64
122 self
.Dwarf_uint8
= UBInt8
123 self
.Dwarf_uint16
= UBInt16
124 self
.Dwarf_uint32
= UBInt32
125 self
.Dwarf_uint64
= UBInt64
126 self
.Dwarf_offset
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
127 self
.Dwarf_length
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
128 self
.Dwarf_target_addr
= (
129 UBInt32
if self
.address_size
== 4 else UBInt64
)
130 self
.Dwarf_int8
= SBInt8
131 self
.Dwarf_int16
= SBInt16
132 self
.Dwarf_int32
= SBInt32
133 self
.Dwarf_int64
= SBInt64
135 self
._create
_initial
_length
()
136 self
._create
_leb
128()
137 self
._create
_cu
_header
()
138 self
._create
_abbrev
_declaration
()
139 self
._create
_dw
_form
()
140 self
._create
_lineprog
_header
()
141 self
._create
_callframe
_entry
_headers
()
142 self
._create
_aranges
_header
()
143 self
._create
_nameLUT
_header
()
144 self
._create
_string
_offsets
_table
_header
()
145 self
._create
_address
_table
_header
()
146 self
._create
_loclists
_parsers
()
147 self
._create
_rnglists
_parsers
()
149 def _create_initial_length(self
):
150 def _InitialLength(name
):
151 # Adapts a Struct that parses forward a full initial length field.
152 # Only if the first word is the continuation value, the second
153 # word is parsed from the stream.
154 return _InitialLengthAdapter(
156 self
.Dwarf_uint32('first'),
157 If(lambda ctx
: ctx
.first
== 0xFFFFFFFF,
158 self
.Dwarf_uint64('second'),
160 self
.Dwarf_initial_length
= _InitialLength
162 def _create_leb128(self
):
163 self
.Dwarf_uleb128
= ULEB128
164 self
.Dwarf_sleb128
= SLEB128
166 def _create_cu_header(self
):
167 self
.Dwarf_CU_header
= Struct('Dwarf_CU_header',
168 self
.Dwarf_initial_length('unit_length'),
169 self
.Dwarf_uint16('version'),
170 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
171 IfThenElse('', lambda ctx
: ctx
['version'] >= 5,
173 self
.Dwarf_uint8('unit_type'),
174 self
.Dwarf_uint8('address_size'),
175 self
.Dwarf_offset('debug_abbrev_offset'))),
177 self
.Dwarf_offset('debug_abbrev_offset'),
178 self
.Dwarf_uint8('address_size'))),
181 def _create_abbrev_declaration(self
):
182 self
.Dwarf_abbrev_declaration
= Struct('Dwarf_abbrev_entry',
183 Enum(self
.Dwarf_uleb128('tag'), **ENUM_DW_TAG
),
184 Enum(self
.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN
),
185 RepeatUntilExcluding(
187 obj
.name
== 'DW_AT_null' and obj
.form
== 'DW_FORM_null',
189 Enum(self
.Dwarf_uleb128('name'), **ENUM_DW_AT
),
190 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
),
191 If(lambda ctx
: ctx
['form'] == 'DW_FORM_implicit_const',
192 self
.Dwarf_sleb128('value')))))
194 def _create_dw_form(self
):
195 self
.Dwarf_dw_form
= dict(
196 DW_FORM_addr
=self
.Dwarf_target_addr(''),
197 DW_FORM_addrx
=self
.Dwarf_uleb128(''),
198 DW_FORM_addrx1
=self
.Dwarf_uint8(''),
199 DW_FORM_addrx2
=self
.Dwarf_uint16(''),
200 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
201 DW_FORM_addrx4
=self
.Dwarf_uint32(''),
203 DW_FORM_block1
=self
._make
_block
_struct
(self
.Dwarf_uint8
),
204 DW_FORM_block2
=self
._make
_block
_struct
(self
.Dwarf_uint16
),
205 DW_FORM_block4
=self
._make
_block
_struct
(self
.Dwarf_uint32
),
206 DW_FORM_block
=self
._make
_block
_struct
(self
.Dwarf_uleb128
),
208 # All DW_FORM_data<n> forms are assumed to be unsigned
209 DW_FORM_data1
=self
.Dwarf_uint8(''),
210 DW_FORM_data2
=self
.Dwarf_uint16(''),
211 DW_FORM_data4
=self
.Dwarf_uint32(''),
212 DW_FORM_data8
=self
.Dwarf_uint64(''),
213 DW_FORM_sdata
=self
.Dwarf_sleb128(''),
214 DW_FORM_udata
=self
.Dwarf_uleb128(''),
216 DW_FORM_string
=CString(''),
217 DW_FORM_strp
=self
.Dwarf_offset(''),
218 DW_FORM_line_strp
=self
.Dwarf_offset(''),
219 DW_FORM_strx1
=self
.Dwarf_uint8(''),
220 DW_FORM_strx2
=self
.Dwarf_uint16(''),
221 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
222 DW_FORM_strx4
=self
.Dwarf_uint64(''),
223 DW_FORM_flag
=self
.Dwarf_uint8(''),
225 DW_FORM_ref
=self
.Dwarf_uint32(''),
226 DW_FORM_ref1
=self
.Dwarf_uint8(''),
227 DW_FORM_ref2
=self
.Dwarf_uint16(''),
228 DW_FORM_ref4
=self
.Dwarf_uint32(''),
229 DW_FORM_ref8
=self
.Dwarf_uint64(''),
230 DW_FORM_ref_udata
=self
.Dwarf_uleb128(''),
231 DW_FORM_ref_addr
=self
.Dwarf_target_addr('') if self
.dwarf_version
== 2 else self
.Dwarf_offset(''),
233 DW_FORM_indirect
=self
.Dwarf_uleb128(''),
235 # New forms in DWARFv4
236 DW_FORM_flag_present
= StaticField('', 0),
237 DW_FORM_sec_offset
= self
.Dwarf_offset(''),
238 DW_FORM_exprloc
= self
._make
_block
_struct
(self
.Dwarf_uleb128
),
239 DW_FORM_ref_sig8
= self
.Dwarf_uint64(''),
241 DW_FORM_GNU_strp_alt
=self
.Dwarf_offset(''),
242 DW_FORM_GNU_ref_alt
=self
.Dwarf_offset(''),
243 DW_AT_GNU_all_call_sites
=self
.Dwarf_uleb128(''),
245 # New forms in DWARFv5
246 DW_FORM_loclistx
=self
.Dwarf_uleb128(''),
247 DW_FORM_rnglistx
=self
.Dwarf_uleb128('')
250 def _create_aranges_header(self
):
251 self
.Dwarf_aranges_header
= Struct("Dwarf_aranges_header",
252 self
.Dwarf_initial_length('unit_length'),
253 self
.Dwarf_uint16('version'),
254 self
.Dwarf_offset('debug_info_offset'), # a little tbd
255 self
.Dwarf_uint8('address_size'),
256 self
.Dwarf_uint8('segment_size')
259 def _create_nameLUT_header(self
):
260 self
.Dwarf_nameLUT_header
= Struct("Dwarf_nameLUT_header",
261 self
.Dwarf_initial_length('unit_length'),
262 self
.Dwarf_uint16('version'),
263 self
.Dwarf_offset('debug_info_offset'),
264 self
.Dwarf_length('debug_info_length')
267 def _create_string_offsets_table_header(self
):
268 self
.Dwarf_string_offsets_table_header
= Struct(
269 "Dwarf_string_offets_table_header",
270 self
.Dwarf_initial_length('unit_length'),
271 self
.Dwarf_uint16('version'),
272 self
.Dwarf_uint16('padding'),
275 def _create_address_table_header(self
):
276 self
.Dwarf_address_table_header
= Struct("Dwarf_address_table_header",
277 self
.Dwarf_initial_length('unit_length'),
278 self
.Dwarf_uint16('version'),
279 self
.Dwarf_uint8('address_size'),
280 self
.Dwarf_uint8('segment_selector_size'),
283 def _create_lineprog_header(self
):
284 # A file entry is terminated by a NULL byte, so we don't want to parse
285 # past it. Therefore an If is used.
286 self
.Dwarf_lineprog_file_entry
= Struct('file_entry',
288 If(lambda ctx
: len(ctx
.name
) != 0,
290 self
.Dwarf_uleb128('dir_index'),
291 self
.Dwarf_uleb128('mtime'),
292 self
.Dwarf_uleb128('length')))))
294 class FormattedEntry(Construct
):
295 # Generates a parser based on a previously parsed piece,
296 # similar to deprecared Dynamic.
297 # Strings are resolved later, since it potentially requires
298 # looking at another section.
299 def __init__(self
, name
, structs
, format_field
):
300 Construct
.__init
__(self
, name
)
301 self
.structs
= structs
302 self
.format_field
= format_field
304 def _parse(self
, stream
, context
):
305 # Somewhat tricky technique here, explicitly writing back to the context
306 if self
.format_field
+ "_parser" in context
:
307 parser
= context
[self
.format_field
+ "_parser"]
310 Rename(f
.content_type
, self
.structs
.Dwarf_dw_form
[f
.form
])
311 for f
in context
[self
.format_field
])
312 parser
= Struct('formatted_entry', *fields
)
313 context
[self
.format_field
+ "_parser"] = parser
314 return parser
._parse
(stream
, context
)
316 ver5
= lambda ctx
: ctx
.version
>= 5
318 self
.Dwarf_lineprog_header
= Struct('Dwarf_lineprog_header',
319 self
.Dwarf_initial_length('unit_length'),
320 self
.Dwarf_uint16('version'),
322 self
.Dwarf_uint8("address_size"),
325 self
.Dwarf_uint8("segment_selector_size"),
327 self
.Dwarf_offset('header_length'),
328 self
.Dwarf_uint8('minimum_instruction_length'),
329 If(lambda ctx
: ctx
.version
>= 4,
330 self
.Dwarf_uint8("maximum_operations_per_instruction"),
332 self
.Dwarf_uint8('default_is_stmt'),
333 self
.Dwarf_int8('line_base'),
334 self
.Dwarf_uint8('line_range'),
335 self
.Dwarf_uint8('opcode_base'),
336 Array(lambda ctx
: ctx
.opcode_base
- 1,
337 self
.Dwarf_uint8('standard_opcode_lengths')),
340 Struct('directory_entry_format',
341 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
342 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
343 self
.Dwarf_uint8("directory_entry_format_count"))),
344 If(ver5
, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
346 FormattedEntry('directories', self
, "directory_entry_format"),
347 self
.Dwarf_uleb128('directories_count'))),
350 Struct('file_name_entry_format',
351 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
352 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
353 self
.Dwarf_uint8("file_name_entry_format_count"))),
356 FormattedEntry('file_names', self
, "file_name_entry_format"),
357 self
.Dwarf_uleb128('file_names_count'))),
358 # Legacy directories/files - DWARF < 5 only
359 If(lambda ctx
: ctx
.version
< 5,
360 RepeatUntilExcluding(
361 lambda obj
, ctx
: obj
== b
'',
362 CString('include_directory'))),
363 If(lambda ctx
: ctx
.version
< 5,
364 RepeatUntilExcluding(
365 lambda obj
, ctx
: len(obj
.name
) == 0,
366 self
.Dwarf_lineprog_file_entry
)) # array name is file_entry
369 def _create_callframe_entry_headers(self
):
370 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
371 self
.Dwarf_initial_length('length'),
372 self
.Dwarf_offset('CIE_id'),
373 self
.Dwarf_uint8('version'),
374 CString('augmentation'),
375 self
.Dwarf_uleb128('code_alignment_factor'),
376 self
.Dwarf_sleb128('data_alignment_factor'),
377 self
.Dwarf_uleb128('return_address_register'))
378 self
.EH_CIE_header
= self
.Dwarf_CIE_header
380 # The CIE header was modified in DWARFv4.
381 if self
.dwarf_version
== 4:
382 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
383 self
.Dwarf_initial_length('length'),
384 self
.Dwarf_offset('CIE_id'),
385 self
.Dwarf_uint8('version'),
386 CString('augmentation'),
387 self
.Dwarf_uint8('address_size'),
388 self
.Dwarf_uint8('segment_size'),
389 self
.Dwarf_uleb128('code_alignment_factor'),
390 self
.Dwarf_sleb128('data_alignment_factor'),
391 self
.Dwarf_uleb128('return_address_register'))
393 self
.Dwarf_FDE_header
= Struct('Dwarf_FDE_header',
394 self
.Dwarf_initial_length('length'),
395 self
.Dwarf_offset('CIE_pointer'),
396 self
.Dwarf_target_addr('initial_location'),
397 self
.Dwarf_target_addr('address_range'))
399 def _make_block_struct(self
, length_field
):
400 """ Create a struct for DW_FORM_block<size>
402 return PrefixedArray(
403 subcon
=self
.Dwarf_uint8('elem'),
404 length_field
=length_field(''))
406 def _create_loclists_parsers(self
):
407 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
409 self
.Dwarf_loclists_CU_header
= Struct('Dwarf_loclists_CU_header',
410 StreamOffset('cu_offset'),
411 self
.Dwarf_initial_length('unit_length'),
412 Value('is64', lambda ctx
: ctx
.is64
),
413 StreamOffset('offset_after_length'),
414 self
.Dwarf_uint16('version'),
415 self
.Dwarf_uint8('address_size'),
416 self
.Dwarf_uint8('segment_selector_size'),
417 self
.Dwarf_uint32('offset_count'),
418 StreamOffset('offset_table_offset'))
420 cld
= self
.Dwarf_loclists_counted_location_description
= PrefixedArray(self
.Dwarf_uint8('loc_expr'), self
.Dwarf_uleb128(''))
422 self
.Dwarf_loclists_entries
= RepeatUntilExcluding(
423 lambda obj
, ctx
: obj
.entry_type
== 'DW_LLE_end_of_list',
425 StreamOffset('entry_offset'),
426 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_LLE
),
427 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
429 'DW_LLE_end_of_list' : Struct('end_of_list'),
430 'DW_LLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
431 'DW_LLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index'), cld
),
432 'DW_LLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length'), cld
),
433 'DW_LLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset'), cld
),
434 'DW_LLE_default_location' : Struct('default_location', cld
),
435 'DW_LLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
436 'DW_LLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address'), cld
),
437 'DW_LLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'), cld
),
439 StreamOffset('entry_end_offset'),
440 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
442 self
.Dwarf_locview_pair
= Struct('locview_pair',
443 StreamOffset('entry_offset'), self
.Dwarf_uleb128('begin'), self
.Dwarf_uleb128('end'))
445 def _create_rnglists_parsers(self
):
446 self
.Dwarf_rnglists_CU_header
= Struct('Dwarf_rnglists_CU_header',
447 StreamOffset('cu_offset'),
448 self
.Dwarf_initial_length('unit_length'),
449 Value('is64', lambda ctx
: ctx
.is64
),
450 StreamOffset('offset_after_length'),
451 self
.Dwarf_uint16('version'),
452 self
.Dwarf_uint8('address_size'),
453 self
.Dwarf_uint8('segment_selector_size'),
454 self
.Dwarf_uint32('offset_count'),
455 StreamOffset('offset_table_offset'))
457 self
.Dwarf_rnglists_entries
= RepeatUntilExcluding(
458 lambda obj
, ctx
: obj
.entry_type
== 'DW_RLE_end_of_list',
460 StreamOffset('entry_offset'),
461 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_RLE
),
462 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
464 'DW_RLE_end_of_list' : Struct('end_of_list'),
465 'DW_RLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
466 'DW_RLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index')),
467 'DW_RLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length')),
468 'DW_RLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset')),
469 'DW_RLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
470 'DW_RLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address')),
471 'DW_RLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'))
473 StreamOffset('entry_end_offset'),
474 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
477 class _InitialLengthAdapter(Adapter
):
478 """ A standard Construct adapter that expects a sub-construct
479 as a struct with one or two values (first, second).
481 def _decode(self
, obj
, context
):
482 if obj
.first
< 0xFFFFFF00:
483 context
['is64'] = False
486 if obj
.first
== 0xFFFFFFFF:
487 context
['is64'] = True
490 raise ConstructError("Failed decoding initial length for %X" % (