Merge pull request #108 from dorothychen/aranges
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from ..construct import (
11 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
12 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
13 Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum,
14 Array, PrefixedArray, CString, Embed, StaticField
15 )
16 from ..common.construct_utils import RepeatUntilExcluding
17
18 from .enums import *
19
20
21 class DWARFStructs(object):
22 """ Exposes Construct structs suitable for parsing information from DWARF
23 sections. Each compile unit in DWARF info can have its own structs
24 object. Keep in mind that these structs have to be given a name (by
25 calling them with a name) before being used for parsing (like other
26 Construct structs). Those that should be used without a name are marked
27 by (+).
28
29 Accessible attributes (mostly as described in chapter 7 of the DWARF
30 spec v3):
31
32 Dwarf_[u]int{8,16,32,64):
33 Data chunks of the common sizes
34
35 Dwarf_offset:
36 32-bit or 64-bit word, depending on dwarf_format
37
38 Dwarf_target_addr:
39 32-bit or 64-bit word, depending on address size
40
41 Dwarf_initial_length:
42 "Initial length field" encoding
43 section 7.4
44
45 Dwarf_{u,s}leb128:
46 ULEB128 and SLEB128 variable-length encoding
47
48 Dwarf_CU_header (+):
49 Compilation unit header
50
51 Dwarf_abbrev_declaration (+):
52 Abbreviation table declaration - doesn't include the initial
53 code, only the contents.
54
55 Dwarf_dw_form (+):
56 A dictionary mapping 'DW_FORM_*' keys into construct Structs
57 that parse such forms. These Structs have already been given
58 dummy names.
59
60 Dwarf_lineprog_header (+):
61 Line program header
62
63 Dwarf_lineprog_file_entry (+):
64 A single file entry in a line program header or instruction
65
66 Dwarf_CIE_header (+):
67 A call-frame CIE
68
69 Dwarf_FDE_header (+):
70 A call-frame FDE
71
72 See also the documentation of public methods.
73 """
74 def __init__(self,
75 little_endian, dwarf_format, address_size, dwarf_version=2):
76 """ dwarf_version:
77 Numeric DWARF version
78
79 little_endian:
80 True if the file is little endian, False if big
81
82 dwarf_format:
83 DWARF Format: 32 or 64-bit (see spec section 7.4)
84
85 address_size:
86 Target machine address size, in bytes (4 or 8). (See spec
87 section 7.5.1)
88 """
89 assert dwarf_format == 32 or dwarf_format == 64
90 assert address_size == 8 or address_size == 4
91 self.little_endian = little_endian
92 self.dwarf_format = dwarf_format
93 self.address_size = address_size
94 self.dwarf_version = dwarf_version
95 self._create_structs()
96
97 def initial_length_field_size(self):
98 """ Size of an initial length field.
99 """
100 return 4 if self.dwarf_format == 32 else 12
101
102 def _create_structs(self):
103 if self.little_endian:
104 self.Dwarf_uint8 = ULInt8
105 self.Dwarf_uint16 = ULInt16
106 self.Dwarf_uint32 = ULInt32
107 self.Dwarf_uint64 = ULInt64
108 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
109 self.Dwarf_target_addr = (
110 ULInt32 if self.address_size == 4 else ULInt64)
111 self.Dwarf_int8 = SLInt8
112 self.Dwarf_int16 = SLInt16
113 self.Dwarf_int32 = SLInt32
114 self.Dwarf_int64 = SLInt64
115 else:
116 self.Dwarf_uint8 = UBInt8
117 self.Dwarf_uint16 = UBInt16
118 self.Dwarf_uint32 = UBInt32
119 self.Dwarf_uint64 = UBInt64
120 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
121 self.Dwarf_target_addr = (
122 UBInt32 if self.address_size == 4 else UBInt64)
123 self.Dwarf_int8 = SBInt8
124 self.Dwarf_int16 = SBInt16
125 self.Dwarf_int32 = SBInt32
126 self.Dwarf_int64 = SBInt64
127
128 self._create_initial_length()
129 self._create_leb128()
130 self._create_cu_header()
131 self._create_abbrev_declaration()
132 self._create_dw_form()
133 self._create_lineprog_header()
134 self._create_callframe_entry_headers()
135 self._create_aranges_header()
136
137 def _create_initial_length(self):
138 def _InitialLength(name):
139 # Adapts a Struct that parses forward a full initial length field.
140 # Only if the first word is the continuation value, the second
141 # word is parsed from the stream.
142 #
143 return _InitialLengthAdapter(
144 Struct(name,
145 self.Dwarf_uint32('first'),
146 If(lambda ctx: ctx.first == 0xFFFFFFFF,
147 self.Dwarf_uint64('second'),
148 elsevalue=None)))
149 self.Dwarf_initial_length = _InitialLength
150
151 def _create_leb128(self):
152 self.Dwarf_uleb128 = _ULEB128
153 self.Dwarf_sleb128 = _SLEB128
154
155 def _create_cu_header(self):
156 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
157 self.Dwarf_initial_length('unit_length'),
158 self.Dwarf_uint16('version'),
159 self.Dwarf_offset('debug_abbrev_offset'),
160 self.Dwarf_uint8('address_size'))
161
162 def _create_abbrev_declaration(self):
163 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
164 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
165 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
166 RepeatUntilExcluding(
167 lambda obj, ctx:
168 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
169 Struct('attr_spec',
170 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
171 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM))))
172
173 def _create_dw_form(self):
174 self.Dwarf_dw_form = dict(
175 DW_FORM_addr=self.Dwarf_target_addr(''),
176
177 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
178 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
179 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
180 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
181
182 # All DW_FORM_data<n> forms are assumed to be unsigned
183 DW_FORM_data1=self.Dwarf_uint8(''),
184 DW_FORM_data2=self.Dwarf_uint16(''),
185 DW_FORM_data4=self.Dwarf_uint32(''),
186 DW_FORM_data8=self.Dwarf_uint64(''),
187 DW_FORM_sdata=self.Dwarf_sleb128(''),
188 DW_FORM_udata=self.Dwarf_uleb128(''),
189
190 DW_FORM_string=CString(''),
191 DW_FORM_strp=self.Dwarf_offset(''),
192 DW_FORM_flag=self.Dwarf_uint8(''),
193
194 DW_FORM_ref1=self.Dwarf_uint8(''),
195 DW_FORM_ref2=self.Dwarf_uint16(''),
196 DW_FORM_ref4=self.Dwarf_uint32(''),
197 DW_FORM_ref8=self.Dwarf_uint64(''),
198 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
199 DW_FORM_ref_addr=self.Dwarf_offset(''),
200
201 DW_FORM_indirect=self.Dwarf_uleb128(''),
202
203 # New forms in DWARFv4
204 DW_FORM_flag_present = StaticField('', 0),
205 DW_FORM_sec_offset = self.Dwarf_offset(''),
206 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
207 DW_FORM_ref_sig8 = self.Dwarf_offset(''),
208
209 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
210 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
211 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
212 )
213
214 def _create_aranges_header(self):
215 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
216 self.Dwarf_initial_length('unit_length'),
217 self.Dwarf_uint16('version'),
218 self.Dwarf_offset('debug_info_offset'), # a little tbd
219 self.Dwarf_uint8('address_size'),
220 self.Dwarf_uint8('segment_size')
221 )
222
223 def _create_lineprog_header(self):
224 # A file entry is terminated by a NULL byte, so we don't want to parse
225 # past it. Therefore an If is used.
226 self.Dwarf_lineprog_file_entry = Struct('file_entry',
227 CString('name'),
228 If(lambda ctx: len(ctx.name) != 0,
229 Embed(Struct('',
230 self.Dwarf_uleb128('dir_index'),
231 self.Dwarf_uleb128('mtime'),
232 self.Dwarf_uleb128('length')))))
233
234 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
235 self.Dwarf_initial_length('unit_length'),
236 self.Dwarf_uint16('version'),
237 self.Dwarf_offset('header_length'),
238 self.Dwarf_uint8('minimum_instruction_length'),
239 If(lambda ctx: ctx['version'] >= 4,
240 self.Dwarf_uint8("maximum_operations_per_instruction"),
241 1),
242 self.Dwarf_uint8('default_is_stmt'),
243 self.Dwarf_int8('line_base'),
244 self.Dwarf_uint8('line_range'),
245 self.Dwarf_uint8('opcode_base'),
246 Array(lambda ctx: ctx['opcode_base'] - 1,
247 self.Dwarf_uint8('standard_opcode_lengths')),
248 RepeatUntilExcluding(
249 lambda obj, ctx: obj == b'',
250 CString('include_directory')),
251 RepeatUntilExcluding(
252 lambda obj, ctx: len(obj.name) == 0,
253 self.Dwarf_lineprog_file_entry),
254 )
255
256 def _create_callframe_entry_headers(self):
257 # The CIE header was modified in DWARFv4.
258 if self.dwarf_version == 4:
259 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
260 self.Dwarf_initial_length('length'),
261 self.Dwarf_offset('CIE_id'),
262 self.Dwarf_uint8('version'),
263 CString('augmentation'),
264 self.Dwarf_uint8('address_size'),
265 self.Dwarf_uint8('segment_size'),
266 self.Dwarf_uleb128('code_alignment_factor'),
267 self.Dwarf_sleb128('data_alignment_factor'),
268 self.Dwarf_uleb128('return_address_register'))
269 else:
270 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
271 self.Dwarf_initial_length('length'),
272 self.Dwarf_offset('CIE_id'),
273 self.Dwarf_uint8('version'),
274 CString('augmentation'),
275 self.Dwarf_uleb128('code_alignment_factor'),
276 self.Dwarf_sleb128('data_alignment_factor'),
277 self.Dwarf_uleb128('return_address_register'))
278
279 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
280 self.Dwarf_initial_length('length'),
281 self.Dwarf_offset('CIE_pointer'),
282 self.Dwarf_target_addr('initial_location'),
283 self.Dwarf_target_addr('address_range'))
284
285 def _make_block_struct(self, length_field):
286 """ Create a struct for DW_FORM_block<size>
287 """
288 return PrefixedArray(
289 subcon=self.Dwarf_uint8('elem'),
290 length_field=length_field(''))
291
292
293 class _InitialLengthAdapter(Adapter):
294 """ A standard Construct adapter that expects a sub-construct
295 as a struct with one or two values (first, second).
296 """
297 def _decode(self, obj, context):
298 if obj.first < 0xFFFFFF00:
299 return obj.first
300 else:
301 if obj.first == 0xFFFFFFFF:
302 return obj.second
303 else:
304 raise ConstructError("Failed decoding initial length for %X" % (
305 obj.first))
306
307
308 def _LEB128_reader():
309 """ Read LEB128 variable-length data from the stream. The data is terminated
310 by a byte with 0 in its highest bit.
311 """
312 return RepeatUntil(
313 lambda obj, ctx: ord(obj) < 0x80,
314 Field(None, 1))
315
316
317 class _ULEB128Adapter(Adapter):
318 """ An adapter for ULEB128, given a sequence of bytes in a sub-construct.
319 """
320 def _decode(self, obj, context):
321 value = 0
322 for b in reversed(obj):
323 value = (value << 7) + (ord(b) & 0x7F)
324 return value
325
326
327 class _SLEB128Adapter(Adapter):
328 """ An adapter for SLEB128, given a sequence of bytes in a sub-construct.
329 """
330 def _decode(self, obj, context):
331 value = 0
332 for b in reversed(obj):
333 value = (value << 7) + (ord(b) & 0x7F)
334 if ord(obj[-1]) & 0x40:
335 # negative -> sign extend
336 #
337 value |= - (1 << (7 * len(obj)))
338 return value
339
340
341 def _ULEB128(name):
342 """ A construct creator for ULEB128 encoding.
343 """
344 return Rename(name, _ULEB128Adapter(_LEB128_reader()))
345
346
347 def _SLEB128(name):
348 """ A construct creator for SLEB128 encoding.
349 """
350 return Rename(name, _SLEB128Adapter(_LEB128_reader()))
351
352