* completed implementation of abbrevtable
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from ..construct import (
11 UBInt8, UBInt16, UBInt32, UBInt64,
12 ULInt8, ULInt16, ULInt32, ULInt64,
13 Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum,
14 PrefixedArray, CString,
15 )
16
17 from .enums import *
18
19
20 class DWARFStructs(object):
21 """ Exposes Construct structs suitable for parsing information from DWARF
22 sections. Configurable with endianity and format (32 or 64-bit)
23
24 Accessible attributes (mostly described by in chapter 7 of the DWARF
25 spec v3):
26
27 Dwarf_uint{8,16,32,64):
28 Data chunks of the common sizes
29
30 Dwarf_offset:
31 32-bit or 64-bit word, depending on dwarf_format
32
33 Dwarf_initial_length:
34 "Initial length field" encoding
35 section 7.4
36
37 Dwarf_{u,s}leb128:
38 ULEB128 and SLEB128 variable-length encoding
39
40 Dwarf_CU_header:
41 Compilation unit header
42
43 Dwarf_abbrev_declaration:
44 Abbreviation table declaration - doesn't include the initial
45 code, only the contents.
46
47 Dwarf_dw_form:
48 A dictionary mapping 'DW_FORM_*' keys into construct Structs
49 that parse such forms. These Structs have already been given
50 dummy names.
51
52 See also the documentation of public methods.
53 """
54 def __init__(self, little_endian=True, dwarf_format=32):
55 assert dwarf_format == 32 or dwarf_format == 64
56 self.little_endian = little_endian
57 self.dwarf_format = dwarf_format
58 self._create_structs()
59
60 def initial_lenght_field_size(self):
61 """ Size of an initial length field.
62 """
63 return 4 if self.dwarf_format == 32 else 12
64
65 def _create_structs(self):
66 if self.little_endian:
67 self.Dwarf_uint8 = ULInt8
68 self.Dwarf_uint16 = ULInt16
69 self.Dwarf_uint32 = ULInt32
70 self.Dwarf_uint64 = ULInt64
71 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
72 else:
73 self.Dwarf_uint8 = UBInt8
74 self.Dwarf_uint16 = UBInt16
75 self.Dwarf_uint32 = UBInt32
76 self.Dwarf_uint64 = UBInt64
77 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
78
79 self._create_initial_length()
80 self._create_leb128()
81 self._create_cu_header()
82 self._create_abbrev_declaration()
83 self._create_dw_form()
84
85 def _create_initial_length(self):
86 def _InitialLength(name):
87 # Adapts a Struct that parses forward a full initial length field.
88 # Only if the first word is the continuation value, the second
89 # word is parsed from the stream.
90 #
91 return _InitialLengthAdapter(
92 Struct(name,
93 self.Dwarf_uint32('first'),
94 If(lambda ctx: ctx.first == 0xFFFFFFFF,
95 self.Dwarf_uint64('second'),
96 elsevalue=None)))
97 self.Dwarf_initial_length = _InitialLength
98
99 def _create_leb128(self):
100 self.Dwarf_uleb128 = _ULEB128
101 self.Dwarf_sleb128 = _SLEB128
102
103 def _create_cu_header(self):
104 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
105 self.Dwarf_initial_length('unit_length'),
106 self.Dwarf_uint16('version'),
107 self.Dwarf_offset('debug_abbrev_offset'),
108 self.Dwarf_uint8('address_size'))
109
110 def _create_abbrev_declaration(self):
111 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
112 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
113 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
114 RepeatUntil(
115 lambda obj, ctx:
116 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
117 Struct('attr_spec',
118 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
119 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM))))
120
121 def _create_dw_form(self):
122 self.Dwarf_dw_form = dict(
123 DW_FORM_addr=self.Dwarf_offset(''),
124
125 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
126 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
127 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
128 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
129
130 # All DW_FORM_data<n> forms are assumed to be unsigned
131 DW_FORM_data1=self.Dwarf_uint8(''),
132 DW_FORM_data2=self.Dwarf_uint16(''),
133 DW_FORM_data4=self.Dwarf_uint32(''),
134 DW_FORM_data8=self.Dwarf_uint64(''),
135 DW_FORM_sdata=self.Dwarf_sleb128(''),
136 DW_FORM_udata=self.Dwarf_uleb128(''),
137
138 DW_FORM_string=CString(''),
139 DW_FORM_strp=self.Dwarf_offset(''),
140 DW_FORM_flag=self.Dwarf_uint8(''),
141
142 DW_FORM_ref1=self.Dwarf_uint8(''),
143 DW_FORM_ref2=self.Dwarf_uint16(''),
144 DW_FORM_ref4=self.Dwarf_uint32(''),
145 DW_FORM_ref8=self.Dwarf_uint64(''),
146 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
147 DW_FORM_ref_addr=self.Dwarf_offset(''),
148
149 DW_FORM_indirect=self.Dwarf_uleb128(''),
150 )
151
152 def _make_block_struct(self, length_field):
153 """ Create a struct for DW_FORM_block<size>
154 """
155 return PrefixedArray(
156 subcon=self.Dwarf_uint8('elem'),
157 length_field=length_field(''))
158
159
160 class _InitialLengthAdapter(Adapter):
161 """ A standard Construct adapter that expects a sub-construct
162 as a struct with one or two values (first, second).
163 """
164 def _decode(self, obj, context):
165 if obj.first < 0xFFFFFF00:
166 return obj.first
167 else:
168 if obj.first == 0xFFFFFFFF:
169 return obj.second
170 else:
171 raise ConstructError("Failed decoding initial length for %X" % (
172 obj.first))
173
174
175 def _LEB128_reader():
176 """ Read LEB128 variable-length data from the stream. The data is terminated
177 by a byte with 0 in its highest bit.
178 """
179 return RepeatUntil(
180 lambda obj, ctx: ord(obj) < 0x80,
181 Field(None, 1))
182
183
184 class _ULEB128Adapter(Adapter):
185 """ An adapter for ULEB128, given a sequence of bytes in a sub-construct.
186 """
187 def _decode(self, obj, context):
188 value = 0
189 for b in reversed(obj):
190 value = (value << 7) + (ord(b) & 0x7F)
191 return value
192
193
194 class _SLEB128Adapter(Adapter):
195 """ An adapter for SLEB128, given a sequence of bytes in a sub-construct.
196 """
197 def _decode(self, obj, context):
198 value = 0
199 for b in reversed(obj):
200 value = (value << 7) + (ord(b) & 0x7F)
201 if ord(obj[-1]) & 0x40:
202 # negative -> sign extend
203 #
204 value |= - (1 << (7 * len(obj)))
205 return value
206
207
208 def _ULEB128(name):
209 """ A construct creator for ULEB128 encoding.
210 """
211 return Rename(name, _ULEB128Adapter(_LEB128_reader()))
212
213
214 def _SLEB128(name):
215 """ A construct creator for SLEB128 encoding.
216 """
217 return Rename(name, _SLEB128Adapter(_LEB128_reader()))
218
219