1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from ..construct
import (
11 UBInt8
, UBInt16
, UBInt32
, UBInt64
, ULInt8
, ULInt16
, ULInt32
, ULInt64
,
12 SBInt8
, SBInt16
, SBInt32
, SBInt64
, SLInt8
, SLInt16
, SLInt32
, SLInt64
,
13 Adapter
, Struct
, ConstructError
, If
, RepeatUntil
, Field
, Rename
, Enum
,
14 Array
, PrefixedArray
, CString
, Embed
, StaticField
16 from ..common
.construct_utils
import RepeatUntilExcluding
21 class DWARFStructs(object):
22 """ Exposes Construct structs suitable for parsing information from DWARF
23 sections. Each compile unit in DWARF info can have its own structs
24 object. Keep in mind that these structs have to be given a name (by
25 calling them with a name) before being used for parsing (like other
26 Construct structs). Those that should be used without a name are marked
29 Accessible attributes (mostly as described in chapter 7 of the DWARF
32 Dwarf_[u]int{8,16,32,64):
33 Data chunks of the common sizes
36 32-bit or 64-bit word, depending on dwarf_format
39 32-bit or 64-bit word, depending on address size
42 "Initial length field" encoding
46 ULEB128 and SLEB128 variable-length encoding
49 Compilation unit header
51 Dwarf_abbrev_declaration (+):
52 Abbreviation table declaration - doesn't include the initial
53 code, only the contents.
56 A dictionary mapping 'DW_FORM_*' keys into construct Structs
57 that parse such forms. These Structs have already been given
60 Dwarf_lineprog_header (+):
63 Dwarf_lineprog_file_entry (+):
64 A single file entry in a line program header or instruction
72 See also the documentation of public methods.
75 little_endian
, dwarf_format
, address_size
, dwarf_version
=2):
80 True if the file is little endian, False if big
83 DWARF Format: 32 or 64-bit (see spec section 7.4)
86 Target machine address size, in bytes (4 or 8). (See spec
89 assert dwarf_format
== 32 or dwarf_format
== 64
90 assert address_size
== 8 or address_size
== 4
91 self
.little_endian
= little_endian
92 self
.dwarf_format
= dwarf_format
93 self
.address_size
= address_size
94 self
.dwarf_version
= dwarf_version
95 self
._create
_structs
()
97 def initial_length_field_size(self
):
98 """ Size of an initial length field.
100 return 4 if self
.dwarf_format
== 32 else 12
102 def _create_structs(self
):
103 if self
.little_endian
:
104 self
.Dwarf_uint8
= ULInt8
105 self
.Dwarf_uint16
= ULInt16
106 self
.Dwarf_uint32
= ULInt32
107 self
.Dwarf_uint64
= ULInt64
108 self
.Dwarf_offset
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
109 self
.Dwarf_target_addr
= (
110 ULInt32
if self
.address_size
== 4 else ULInt64
)
111 self
.Dwarf_int8
= SLInt8
112 self
.Dwarf_int16
= SLInt16
113 self
.Dwarf_int32
= SLInt32
114 self
.Dwarf_int64
= SLInt64
116 self
.Dwarf_uint8
= UBInt8
117 self
.Dwarf_uint16
= UBInt16
118 self
.Dwarf_uint32
= UBInt32
119 self
.Dwarf_uint64
= UBInt64
120 self
.Dwarf_offset
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
121 self
.Dwarf_target_addr
= (
122 UBInt32
if self
.address_size
== 4 else UBInt64
)
123 self
.Dwarf_int8
= SBInt8
124 self
.Dwarf_int16
= SBInt16
125 self
.Dwarf_int32
= SBInt32
126 self
.Dwarf_int64
= SBInt64
128 self
._create
_initial
_length
()
129 self
._create
_leb
128()
130 self
._create
_cu
_header
()
131 self
._create
_abbrev
_declaration
()
132 self
._create
_dw
_form
()
133 self
._create
_lineprog
_header
()
134 self
._create
_callframe
_entry
_headers
()
135 self
._create
_aranges
_header
()
137 def _create_initial_length(self
):
138 def _InitialLength(name
):
139 # Adapts a Struct that parses forward a full initial length field.
140 # Only if the first word is the continuation value, the second
141 # word is parsed from the stream.
143 return _InitialLengthAdapter(
145 self
.Dwarf_uint32('first'),
146 If(lambda ctx
: ctx
.first
== 0xFFFFFFFF,
147 self
.Dwarf_uint64('second'),
149 self
.Dwarf_initial_length
= _InitialLength
151 def _create_leb128(self
):
152 self
.Dwarf_uleb128
= _ULEB128
153 self
.Dwarf_sleb128
= _SLEB128
155 def _create_cu_header(self
):
156 self
.Dwarf_CU_header
= Struct('Dwarf_CU_header',
157 self
.Dwarf_initial_length('unit_length'),
158 self
.Dwarf_uint16('version'),
159 self
.Dwarf_offset('debug_abbrev_offset'),
160 self
.Dwarf_uint8('address_size'))
162 def _create_abbrev_declaration(self
):
163 self
.Dwarf_abbrev_declaration
= Struct('Dwarf_abbrev_entry',
164 Enum(self
.Dwarf_uleb128('tag'), **ENUM_DW_TAG
),
165 Enum(self
.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN
),
166 RepeatUntilExcluding(
168 obj
.name
== 'DW_AT_null' and obj
.form
== 'DW_FORM_null',
170 Enum(self
.Dwarf_uleb128('name'), **ENUM_DW_AT
),
171 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
))))
173 def _create_dw_form(self
):
174 self
.Dwarf_dw_form
= dict(
175 DW_FORM_addr
=self
.Dwarf_target_addr(''),
177 DW_FORM_block1
=self
._make
_block
_struct
(self
.Dwarf_uint8
),
178 DW_FORM_block2
=self
._make
_block
_struct
(self
.Dwarf_uint16
),
179 DW_FORM_block4
=self
._make
_block
_struct
(self
.Dwarf_uint32
),
180 DW_FORM_block
=self
._make
_block
_struct
(self
.Dwarf_uleb128
),
182 # All DW_FORM_data<n> forms are assumed to be unsigned
183 DW_FORM_data1
=self
.Dwarf_uint8(''),
184 DW_FORM_data2
=self
.Dwarf_uint16(''),
185 DW_FORM_data4
=self
.Dwarf_uint32(''),
186 DW_FORM_data8
=self
.Dwarf_uint64(''),
187 DW_FORM_sdata
=self
.Dwarf_sleb128(''),
188 DW_FORM_udata
=self
.Dwarf_uleb128(''),
190 DW_FORM_string
=CString(''),
191 DW_FORM_strp
=self
.Dwarf_offset(''),
192 DW_FORM_flag
=self
.Dwarf_uint8(''),
194 DW_FORM_ref1
=self
.Dwarf_uint8(''),
195 DW_FORM_ref2
=self
.Dwarf_uint16(''),
196 DW_FORM_ref4
=self
.Dwarf_uint32(''),
197 DW_FORM_ref8
=self
.Dwarf_uint64(''),
198 DW_FORM_ref_udata
=self
.Dwarf_uleb128(''),
199 DW_FORM_ref_addr
=self
.Dwarf_offset(''),
201 DW_FORM_indirect
=self
.Dwarf_uleb128(''),
203 # New forms in DWARFv4
204 DW_FORM_flag_present
= StaticField('', 0),
205 DW_FORM_sec_offset
= self
.Dwarf_offset(''),
206 DW_FORM_exprloc
= self
._make
_block
_struct
(self
.Dwarf_uleb128
),
207 DW_FORM_ref_sig8
= self
.Dwarf_offset(''),
209 DW_FORM_GNU_strp_alt
=self
.Dwarf_offset(''),
210 DW_FORM_GNU_ref_alt
=self
.Dwarf_offset(''),
211 DW_AT_GNU_all_call_sites
=self
.Dwarf_uleb128(''),
214 def _create_aranges_header(self
):
215 self
.Dwarf_aranges_header
= Struct("Dwarf_aranges_header",
216 self
.Dwarf_initial_length('unit_length'),
217 self
.Dwarf_uint16('version'),
218 self
.Dwarf_offset('debug_info_offset'), # a little tbd
219 self
.Dwarf_uint8('address_size'),
220 self
.Dwarf_uint8('segment_size')
223 def _create_lineprog_header(self
):
224 # A file entry is terminated by a NULL byte, so we don't want to parse
225 # past it. Therefore an If is used.
226 self
.Dwarf_lineprog_file_entry
= Struct('file_entry',
228 If(lambda ctx
: len(ctx
.name
) != 0,
230 self
.Dwarf_uleb128('dir_index'),
231 self
.Dwarf_uleb128('mtime'),
232 self
.Dwarf_uleb128('length')))))
234 self
.Dwarf_lineprog_header
= Struct('Dwarf_lineprog_header',
235 self
.Dwarf_initial_length('unit_length'),
236 self
.Dwarf_uint16('version'),
237 self
.Dwarf_offset('header_length'),
238 self
.Dwarf_uint8('minimum_instruction_length'),
239 If(lambda ctx
: ctx
['version'] >= 4,
240 self
.Dwarf_uint8("maximum_operations_per_instruction"),
242 self
.Dwarf_uint8('default_is_stmt'),
243 self
.Dwarf_int8('line_base'),
244 self
.Dwarf_uint8('line_range'),
245 self
.Dwarf_uint8('opcode_base'),
246 Array(lambda ctx
: ctx
['opcode_base'] - 1,
247 self
.Dwarf_uint8('standard_opcode_lengths')),
248 RepeatUntilExcluding(
249 lambda obj
, ctx
: obj
== b
'',
250 CString('include_directory')),
251 RepeatUntilExcluding(
252 lambda obj
, ctx
: len(obj
.name
) == 0,
253 self
.Dwarf_lineprog_file_entry
),
256 def _create_callframe_entry_headers(self
):
257 # The CIE header was modified in DWARFv4.
258 if self
.dwarf_version
== 4:
259 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
260 self
.Dwarf_initial_length('length'),
261 self
.Dwarf_offset('CIE_id'),
262 self
.Dwarf_uint8('version'),
263 CString('augmentation'),
264 self
.Dwarf_uint8('address_size'),
265 self
.Dwarf_uint8('segment_size'),
266 self
.Dwarf_uleb128('code_alignment_factor'),
267 self
.Dwarf_sleb128('data_alignment_factor'),
268 self
.Dwarf_uleb128('return_address_register'))
270 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
271 self
.Dwarf_initial_length('length'),
272 self
.Dwarf_offset('CIE_id'),
273 self
.Dwarf_uint8('version'),
274 CString('augmentation'),
275 self
.Dwarf_uleb128('code_alignment_factor'),
276 self
.Dwarf_sleb128('data_alignment_factor'),
277 self
.Dwarf_uleb128('return_address_register'))
279 self
.Dwarf_FDE_header
= Struct('Dwarf_FDE_header',
280 self
.Dwarf_initial_length('length'),
281 self
.Dwarf_offset('CIE_pointer'),
282 self
.Dwarf_target_addr('initial_location'),
283 self
.Dwarf_target_addr('address_range'))
285 def _make_block_struct(self
, length_field
):
286 """ Create a struct for DW_FORM_block<size>
288 return PrefixedArray(
289 subcon
=self
.Dwarf_uint8('elem'),
290 length_field
=length_field(''))
293 class _InitialLengthAdapter(Adapter
):
294 """ A standard Construct adapter that expects a sub-construct
295 as a struct with one or two values (first, second).
297 def _decode(self
, obj
, context
):
298 if obj
.first
< 0xFFFFFF00:
301 if obj
.first
== 0xFFFFFFFF:
304 raise ConstructError("Failed decoding initial length for %X" % (
308 def _LEB128_reader():
309 """ Read LEB128 variable-length data from the stream. The data is terminated
310 by a byte with 0 in its highest bit.
313 lambda obj
, ctx
: ord(obj
) < 0x80,
317 class _ULEB128Adapter(Adapter
):
318 """ An adapter for ULEB128, given a sequence of bytes in a sub-construct.
320 def _decode(self
, obj
, context
):
322 for b
in reversed(obj
):
323 value
= (value
<< 7) + (ord(b
) & 0x7F)
327 class _SLEB128Adapter(Adapter
):
328 """ An adapter for SLEB128, given a sequence of bytes in a sub-construct.
330 def _decode(self
, obj
, context
):
332 for b
in reversed(obj
):
333 value
= (value
<< 7) + (ord(b
) & 0x7F)
334 if ord(obj
[-1]) & 0x40:
335 # negative -> sign extend
337 value |
= - (1 << (7 * len(obj
)))
342 """ A construct creator for ULEB128 encoding.
344 return Rename(name
, _ULEB128Adapter(_LEB128_reader()))
348 """ A construct creator for SLEB128 encoding.
350 return Rename(name
, _SLEB128Adapter(_LEB128_reader()))