cleanups of trailing whitespace
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from ..construct import (
11 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
12 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
13 Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum,
14 Array, PrefixedArray, CString, Embed,
15 )
16 from ..common.construct_utils import RepeatUntilExcluding
17
18 from .enums import *
19
20
21 class DWARFStructs(object):
22 """ Exposes Construct structs suitable for parsing information from DWARF
23 sections. Each compile unit in DWARF info can have its own structs
24 object. Keep in mind that these structs have to be given a name (by
25 calling them with a name) before being used for parsing (like other
26 Construct structs). Those that should be used without a name are marked
27 by (+).
28
29 Accessible attributes (mostly as described in chapter 7 of the DWARF
30 spec v3):
31
32 Dwarf_[u]int{8,16,32,64):
33 Data chunks of the common sizes
34
35 Dwarf_offset:
36 32-bit or 64-bit word, depending on dwarf_format
37
38 Dwarf_target_addr:
39 32-bit or 64-bit word, depending on address size
40
41 Dwarf_initial_length:
42 "Initial length field" encoding
43 section 7.4
44
45 Dwarf_{u,s}leb128:
46 ULEB128 and SLEB128 variable-length encoding
47
48 Dwarf_CU_header (+):
49 Compilation unit header
50
51 Dwarf_abbrev_declaration (+):
52 Abbreviation table declaration - doesn't include the initial
53 code, only the contents.
54
55 Dwarf_dw_form (+):
56 A dictionary mapping 'DW_FORM_*' keys into construct Structs
57 that parse such forms. These Structs have already been given
58 dummy names.
59
60 Dwarf_lineprog_header (+):
61 Line program header
62
63 Dwarf_lineprog_file_entry (+):
64 A single file entry in a line program header or instruction
65
66 Dwarf_CIE_header (+):
67 A call-frame CIE
68
69 Dwarf_FDE_header (+):
70 A call-frame FDE
71
72 See also the documentation of public methods.
73 """
74 def __init__(self, little_endian, dwarf_format, address_size):
75 """ little_endian:
76 True if the file is little endian, False if big
77
78 dwarf_format:
79 DWARF Format: 32 or 64-bit (see spec section 7.4)
80
81 address_size:
82 Target machine address size, in bytes (4 or 8). (See spec
83 section 7.5.1)
84 """
85 assert dwarf_format == 32 or dwarf_format == 64
86 assert address_size == 8 or address_size == 4
87 self.little_endian = little_endian
88 self.dwarf_format = dwarf_format
89 self.address_size = address_size
90 self._create_structs()
91
92 def initial_length_field_size(self):
93 """ Size of an initial length field.
94 """
95 return 4 if self.dwarf_format == 32 else 12
96
97 def _create_structs(self):
98 if self.little_endian:
99 self.Dwarf_uint8 = ULInt8
100 self.Dwarf_uint16 = ULInt16
101 self.Dwarf_uint32 = ULInt32
102 self.Dwarf_uint64 = ULInt64
103 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
104 self.Dwarf_target_addr = (
105 ULInt32 if self.address_size == 4 else ULInt64)
106 self.Dwarf_int8 = SLInt8
107 self.Dwarf_int16 = SLInt16
108 self.Dwarf_int32 = SLInt32
109 self.Dwarf_int64 = SLInt64
110 else:
111 self.Dwarf_uint8 = UBInt8
112 self.Dwarf_uint16 = UBInt16
113 self.Dwarf_uint32 = UBInt32
114 self.Dwarf_uint64 = UBInt64
115 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
116 self.Dwarf_target_addr = (
117 UBInt32 if self.address_size == 4 else UBInt64)
118 self.Dwarf_int8 = SBInt8
119 self.Dwarf_int16 = SBInt16
120 self.Dwarf_int32 = SBInt32
121 self.Dwarf_int64 = SBInt64
122
123 self._create_initial_length()
124 self._create_leb128()
125 self._create_cu_header()
126 self._create_abbrev_declaration()
127 self._create_dw_form()
128 self._create_lineprog_header()
129 self._create_callframe_entry_headers()
130
131 def _create_initial_length(self):
132 def _InitialLength(name):
133 # Adapts a Struct that parses forward a full initial length field.
134 # Only if the first word is the continuation value, the second
135 # word is parsed from the stream.
136 #
137 return _InitialLengthAdapter(
138 Struct(name,
139 self.Dwarf_uint32('first'),
140 If(lambda ctx: ctx.first == 0xFFFFFFFF,
141 self.Dwarf_uint64('second'),
142 elsevalue=None)))
143 self.Dwarf_initial_length = _InitialLength
144
145 def _create_leb128(self):
146 self.Dwarf_uleb128 = _ULEB128
147 self.Dwarf_sleb128 = _SLEB128
148
149 def _create_cu_header(self):
150 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
151 self.Dwarf_initial_length('unit_length'),
152 self.Dwarf_uint16('version'),
153 self.Dwarf_offset('debug_abbrev_offset'),
154 self.Dwarf_uint8('address_size'))
155
156 def _create_abbrev_declaration(self):
157 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
158 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
159 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
160 RepeatUntilExcluding(
161 lambda obj, ctx:
162 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
163 Struct('attr_spec',
164 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
165 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM))))
166
167 def _create_dw_form(self):
168 self.Dwarf_dw_form = dict(
169 DW_FORM_addr=self.Dwarf_target_addr(''),
170
171 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
172 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
173 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
174 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
175
176 # All DW_FORM_data<n> forms are assumed to be unsigned
177 DW_FORM_data1=self.Dwarf_uint8(''),
178 DW_FORM_data2=self.Dwarf_uint16(''),
179 DW_FORM_data4=self.Dwarf_uint32(''),
180 DW_FORM_data8=self.Dwarf_uint64(''),
181 DW_FORM_sdata=self.Dwarf_sleb128(''),
182 DW_FORM_udata=self.Dwarf_uleb128(''),
183
184 DW_FORM_string=CString(''),
185 DW_FORM_strp=self.Dwarf_offset(''),
186 DW_FORM_flag=self.Dwarf_uint8(''),
187
188 DW_FORM_ref1=self.Dwarf_uint8(''),
189 DW_FORM_ref2=self.Dwarf_uint16(''),
190 DW_FORM_ref4=self.Dwarf_uint32(''),
191 DW_FORM_ref8=self.Dwarf_uint64(''),
192 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
193 DW_FORM_ref_addr=self.Dwarf_offset(''),
194
195 DW_FORM_indirect=self.Dwarf_uleb128(''),
196 )
197
198 def _create_lineprog_header(self):
199 # A file entry is terminated by a NULL byte, so we don't want to parse
200 # past it. Therefore an If is used.
201 self.Dwarf_lineprog_file_entry = Struct('file_entry',
202 CString('name'),
203 If(lambda ctx: len(ctx.name) != 0,
204 Embed(Struct('',
205 self.Dwarf_uleb128('dir_index'),
206 self.Dwarf_uleb128('mtime'),
207 self.Dwarf_uleb128('length')))))
208
209 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
210 self.Dwarf_initial_length('unit_length'),
211 self.Dwarf_uint16('version'),
212 self.Dwarf_offset('header_length'),
213 self.Dwarf_uint8('minimum_instruction_length'),
214 self.Dwarf_uint8('default_is_stmt'),
215 self.Dwarf_int8('line_base'),
216 self.Dwarf_uint8('line_range'),
217 self.Dwarf_uint8('opcode_base'),
218 Array(lambda ctx: ctx['opcode_base'] - 1,
219 self.Dwarf_uint8('standard_opcode_lengths')),
220 RepeatUntilExcluding(
221 lambda obj, ctx: obj == b'',
222 CString('include_directory')),
223 RepeatUntilExcluding(
224 lambda obj, ctx: len(obj.name) == 0,
225 self.Dwarf_lineprog_file_entry),
226 )
227
228 def _create_callframe_entry_headers(self):
229 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
230 self.Dwarf_initial_length('length'),
231 self.Dwarf_offset('CIE_id'),
232 self.Dwarf_uint8('version'),
233 CString('augmentation'),
234 self.Dwarf_uleb128('code_alignment_factor'),
235 self.Dwarf_sleb128('data_alignment_factor'),
236 self.Dwarf_uleb128('return_address_register'))
237
238 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
239 self.Dwarf_initial_length('length'),
240 self.Dwarf_offset('CIE_pointer'),
241 self.Dwarf_target_addr('initial_location'),
242 self.Dwarf_target_addr('address_range'))
243
244 def _make_block_struct(self, length_field):
245 """ Create a struct for DW_FORM_block<size>
246 """
247 return PrefixedArray(
248 subcon=self.Dwarf_uint8('elem'),
249 length_field=length_field(''))
250
251
252 class _InitialLengthAdapter(Adapter):
253 """ A standard Construct adapter that expects a sub-construct
254 as a struct with one or two values (first, second).
255 """
256 def _decode(self, obj, context):
257 if obj.first < 0xFFFFFF00:
258 return obj.first
259 else:
260 if obj.first == 0xFFFFFFFF:
261 return obj.second
262 else:
263 raise ConstructError("Failed decoding initial length for %X" % (
264 obj.first))
265
266
267 def _LEB128_reader():
268 """ Read LEB128 variable-length data from the stream. The data is terminated
269 by a byte with 0 in its highest bit.
270 """
271 return RepeatUntil(
272 lambda obj, ctx: ord(obj) < 0x80,
273 Field(None, 1))
274
275
276 class _ULEB128Adapter(Adapter):
277 """ An adapter for ULEB128, given a sequence of bytes in a sub-construct.
278 """
279 def _decode(self, obj, context):
280 value = 0
281 for b in reversed(obj):
282 value = (value << 7) + (ord(b) & 0x7F)
283 return value
284
285
286 class _SLEB128Adapter(Adapter):
287 """ An adapter for SLEB128, given a sequence of bytes in a sub-construct.
288 """
289 def _decode(self, obj, context):
290 value = 0
291 for b in reversed(obj):
292 value = (value << 7) + (ord(b) & 0x7F)
293 if ord(obj[-1]) & 0x40:
294 # negative -> sign extend
295 #
296 value |= - (1 << (7 * len(obj)))
297 return value
298
299
300 def _ULEB128(name):
301 """ A construct creator for ULEB128 encoding.
302 """
303 return Rename(name, _ULEB128Adapter(_LEB128_reader()))
304
305
306 def _SLEB128(name):
307 """ A construct creator for SLEB128 encoding.
308 """
309 return Rename(name, _SLEB128Adapter(_LEB128_reader()))
310
311