Indirect encoding support (#430)
[pyelftools.git] / elftools / dwarf / die.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/die.py
3 #
4 # DWARF Debugging Information Entry
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections import namedtuple, OrderedDict
10 import os
11
12 from ..common.exceptions import DWARFError
13 from ..common.py3compat import bytes2str, iteritems
14 from ..common.utils import struct_parse, preserve_stream_pos
15 from .enums import DW_FORM_raw2name
16 from .dwarf_util import _resolve_via_offset_table, _get_base_offset
17
18
19 # AttributeValue - describes an attribute value in the DIE:
20 #
21 # name:
22 # The name (DW_AT_*) of this attribute
23 #
24 # form:
25 # The DW_FORM_* name of this attribute
26 #
27 # value:
28 # The value parsed from the section and translated accordingly to the form
29 # (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
30 #
31 # raw_value:
32 # Raw value as parsed from the section - used for debugging and presentation
33 # (e.g. for a DW_FORM_strp it's the raw string offset into the table)
34 #
35 # offset:
36 # Offset of this attribute's value in the stream (absolute offset, relative
37 # the beginning of the whole stream)
38 #
39 AttributeValue = namedtuple(
40 'AttributeValue', 'name form value raw_value offset')
41
42
43 class DIE(object):
44 """ A DWARF debugging information entry. On creation, parses itself from
45 the stream. Each DIE is held by a CU.
46
47 Accessible attributes:
48
49 tag:
50 The DIE tag
51
52 size:
53 The size this DIE occupies in the section
54
55 offset:
56 The offset of this DIE in the stream
57
58 attributes:
59 An ordered dictionary mapping attribute names to values. It's
60 ordered to preserve the order of attributes in the section
61
62 has_children:
63 Specifies whether this DIE has children
64
65 abbrev_code:
66 The abbreviation code pointing to an abbreviation entry (note
67 that this is for informational pusposes only - this object
68 interacts with its abbreviation table transparently).
69
70 See also the public methods.
71 """
72 def __init__(self, cu, stream, offset):
73 """ cu:
74 CompileUnit object this DIE belongs to. Used to obtain context
75 information (structs, abbrev table, etc.)
76
77 stream, offset:
78 The stream and offset into it where this DIE's data is located
79 """
80 self.cu = cu
81 self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
82 self.stream = stream
83 self.offset = offset
84
85 self.attributes = OrderedDict()
86 self.tag = None
87 self.has_children = None
88 self.abbrev_code = None
89 self.size = 0
90 # Null DIE terminator. It can be used to obtain offset range occupied
91 # by this DIE including its whole subtree.
92 self._terminator = None
93 self._parent = None
94
95 self._parse_DIE()
96
97 def is_null(self):
98 """ Is this a null entry?
99 """
100 return self.tag is None
101
102 def get_DIE_from_attribute(self, name):
103 """ Return the DIE referenced by the named attribute of this DIE.
104 The attribute must be in the reference attribute class.
105
106 name:
107 The name of the attribute in the reference class.
108 """
109 attr = self.attributes[name]
110 if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
111 'DW_FORM_ref8', 'DW_FORM_ref'):
112 refaddr = self.cu.cu_offset + attr.raw_value
113 return self.cu.get_DIE_from_refaddr(refaddr)
114 elif attr.form in ('DW_FORM_ref_addr'):
115 return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
116 elif attr.form in ('DW_FORM_ref_sig8'):
117 # Implement search type units for matching signature
118 raise NotImplementedError('%s (type unit by signature)' % attr.form)
119 elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8'):
120 raise NotImplementedError('%s to dwo' % attr.form)
121 else:
122 raise DWARFError('%s is not a reference class form attribute' % attr)
123
124 def get_parent(self):
125 """ Return the parent DIE of this DIE, or None if the DIE has no
126 parent (i.e. is a top-level DIE).
127 """
128 if self._parent is None:
129 self._search_ancestor_offspring()
130 return self._parent
131
132 def get_full_path(self):
133 """ Return the full path filename for the DIE.
134
135 The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
136 either of which may be missing in practice. Note that its value is
137 usually a string taken from the .debug_string section and the
138 returned value will be a string.
139 """
140 comp_dir_attr = self.attributes.get('DW_AT_comp_dir', None)
141 comp_dir = bytes2str(comp_dir_attr.value) if comp_dir_attr else ''
142 fname_attr = self.attributes.get('DW_AT_name', None)
143 fname = bytes2str(fname_attr.value) if fname_attr else ''
144 return os.path.join(comp_dir, fname)
145
146 def iter_children(self):
147 """ Iterates all children of this DIE
148 """
149 return self.cu.iter_DIE_children(self)
150
151 def iter_siblings(self):
152 """ Yield all siblings of this DIE
153 """
154 parent = self.get_parent()
155 if parent:
156 for sibling in parent.iter_children():
157 if sibling is not self:
158 yield sibling
159 else:
160 raise StopIteration()
161
162 # The following methods are used while creating the DIE and should not be
163 # interesting to consumers
164 #
165
166 def set_parent(self, die):
167 self._parent = die
168
169 #------ PRIVATE ------#
170
171 def _search_ancestor_offspring(self):
172 """ Search our ancestors identifying their offspring to find our parent.
173
174 DIEs are stored as a flattened tree. The top DIE is the ancestor
175 of all DIEs in the unit. Each parent is guaranteed to be at
176 an offset less than their children. In each generation of children
177 the sibling with the closest offset not greater than our offset is
178 our ancestor.
179 """
180 # This code is called when get_parent notices that the _parent has
181 # not been identified. To avoid execution for each sibling record all
182 # the children of any parent iterated. Assuming get_parent will also be
183 # called for siblings, it is more efficient if siblings references are
184 # provided and no worse than a single walk if they are missing, while
185 # stopping iteration early could result in O(n^2) walks.
186 search = self.cu.get_top_DIE()
187 while search.offset < self.offset:
188 prev = search
189 for child in search.iter_children():
190 child.set_parent(search)
191 if child.offset <= self.offset:
192 prev = child
193
194 # We also need to check the offset of the terminator DIE
195 if search.has_children and search._terminator.offset <= self.offset:
196 prev = search._terminator
197
198 # If we didn't find a closer parent, give up, don't loop.
199 # Either we mis-parsed an ancestor or someone created a DIE
200 # by an offset that was not actually the start of a DIE.
201 if prev is search:
202 raise ValueError("offset %s not in CU %s DIE tree" %
203 (self.offset, self.cu.cu_offset))
204
205 search = prev
206
207 def __repr__(self):
208 s = 'DIE %s, size=%s, has_children=%s\n' % (
209 self.tag, self.size, self.has_children)
210 for attrname, attrval in iteritems(self.attributes):
211 s += ' |%-18s: %s\n' % (attrname, attrval)
212 return s
213
214 def __str__(self):
215 return self.__repr__()
216
217 def _parse_DIE(self):
218 """ Parses the DIE info from the section, based on the abbreviation
219 table of the CU
220 """
221 structs = self.cu.structs
222
223 # A DIE begins with the abbreviation code. Read it and use it to
224 # obtain the abbrev declaration for this DIE.
225 # Note: here and elsewhere, preserve_stream_pos is used on operations
226 # that manipulate the stream by reading data from it.
227 self.abbrev_code = struct_parse(
228 structs.Dwarf_uleb128(''), self.stream, self.offset)
229
230 # This may be a null entry
231 if self.abbrev_code == 0:
232 self.size = self.stream.tell() - self.offset
233 return
234
235 abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code)
236 self.tag = abbrev_decl['tag']
237 self.has_children = abbrev_decl.has_children()
238
239 # Guided by the attributes listed in the abbreviation declaration, parse
240 # values from the stream.
241 for spec in abbrev_decl['attr_spec']:
242 form = spec.form
243 name = spec.name
244 attr_offset = self.stream.tell()
245 # Special case here: the attribute value is stored in the attribute
246 # definition in the abbreviation spec, not in the DIE itself.
247 if form == 'DW_FORM_implicit_const':
248 value = spec.value
249 raw_value = value
250 else:
251 raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
252 value = self._translate_attr_value(form, raw_value)
253 self.attributes[name] = AttributeValue(
254 name=name,
255 form=form,
256 value=value,
257 raw_value=raw_value,
258 offset=attr_offset)
259
260 self.size = self.stream.tell() - self.offset
261
262 def _translate_attr_value(self, form, raw_value):
263 """ Translate a raw attr value according to the form
264 """
265 # Indirect forms can only be parsed if the top DIE of this CU has already been parsed
266 # and listed in the CU, since the top DIE would have to contain the DW_AT_xxx_base attributes.
267 # This breaks if there is an indirect encoding in the top DIE itself before the
268 # corresponding _base, and it was seen in the wild.
269 # There is a hook in get_top_DIE() to resolve those lazily.
270 translate_indirect = self.cu.has_top_DIE() or self.offset != self.cu.cu_die_offset
271 value = None
272 if form == 'DW_FORM_strp':
273 with preserve_stream_pos(self.stream):
274 value = self.dwarfinfo.get_string_from_table(raw_value)
275 elif form == 'DW_FORM_line_strp':
276 with preserve_stream_pos(self.stream):
277 value = self.dwarfinfo.get_string_from_linetable(raw_value)
278 elif form == 'DW_FORM_flag':
279 value = not raw_value == 0
280 elif form == 'DW_FORM_flag_present':
281 value = True
282 elif form == 'DW_FORM_indirect':
283 try:
284 form = DW_FORM_raw2name[raw_value]
285 except KeyError as err:
286 raise DWARFError(
287 'Found DW_FORM_indirect with unknown raw_value=' +
288 str(raw_value))
289
290 raw_value = struct_parse(
291 self.cu.structs.Dwarf_dw_form[form], self.stream)
292 # Let's hope this doesn't get too deep :-)
293 return self._translate_attr_value(form, raw_value)
294 elif form in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect:
295 value = self.cu.dwarfinfo.get_addr(self.cu, raw_value)
296 elif form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect:
297 stream = self.dwarfinfo.debug_str_offsets_sec.stream
298 base_offset = _get_base_offset(self.cu, 'DW_AT_str_offsets_base')
299 offset_size = 4 if self.cu.structs.dwarf_format == 32 else 8
300 with preserve_stream_pos(stream):
301 str_offset = struct_parse(self.cu.structs.Dwarf_offset(''), stream, base_offset + raw_value*offset_size)
302 value = self.dwarfinfo.get_string_from_table(str_offset)
303 elif form == 'DW_FORM_loclistx' and translate_indirect:
304 value = _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base')
305 elif form == 'DW_FORM_rnglistx' and translate_indirect:
306 value = _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base')
307 else:
308 value = raw_value
309 return value
310
311 def _translate_indirect_attributes(self):
312 """ This is a hook to translate the DW_FORM_...x values in the top DIE
313 once the top DIE is parsed to the end. They can't be translated
314 while the top DIE is being parsed, because they implicitly make a
315 reference to the DW_AT_xxx_base attribute in the same DIE that may
316 not have been parsed yet.
317 """
318 for key in self.attributes:
319 attr = self.attributes[key]
320 if attr.form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4',
321 'DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4',
322 'DW_FORM_loclistx', 'DW_FORM_rnglistx'):
323 # Can't change value in place, got to replace the whole attribute record
324 self.attributes[key] = AttributeValue(
325 name=attr.name,
326 form=attr.form,
327 value=self._translate_attr_value(attr.form, attr.raw_value),
328 raw_value=attr.raw_value,
329 offset=attr.offset)