Implement register name decoding in descriptions. This also allows to remove
[pyelftools.git] / elftools / dwarf / descriptions.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/descriptions.py
3 #
4 # Textual descriptions of the various values and enums of DWARF
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections import defaultdict
10
11 from .constants import *
12 from .dwarf_expr import GenericExprVisitor
13 from .die import DIE
14 from ..common.utils import preserve_stream_pos
15
16
17 def set_global_machine_arch(machine_arch):
18 global _MACHINE_ARCH
19 _MACHINE_ARCH = machine_arch
20
21
22 def describe_attr_value(attr, die, section_offset):
23 """ Given an attribute attr, return the textual representation of its
24 value, suitable for tools like readelf.
25
26 To cover all cases, this function needs some extra arguments:
27
28 die: the DIE this attribute was extracted from
29 section_offset: offset in the stream of the section the DIE belongs to
30 """
31 descr_func = _ATTR_DESCRIPTION_MAP[attr.form]
32 val_description = descr_func(attr, die, section_offset)
33
34 # For some attributes we can display further information
35 extra_info_func = _EXTRA_INFO_DESCRIPTION_MAP[attr.name]
36 extra_info = extra_info_func(attr, die, section_offset)
37 return str(val_description) + '\t' + extra_info
38
39
40 def describe_reg_name(regnum, machine_arch):
41 """ Provide a textual description for a register name, given its serial
42 number. The number is expected to be valid.
43 """
44 if machine_arch == 'x86':
45 return _REG_NAMES_x86[regnum]
46 elif machine_arch == 'x64':
47 return _REG_NAMES_x64[regnum]
48 else:
49 return '<none>'
50
51 #-------------------------------------------------------------------------------
52
53 # The machine architecture. Set globally via set_global_machine_arch
54 #
55 _MACHINE_ARCH = None
56
57
58 def _describe_attr_ref(attr, die, section_offset):
59 return '<0x%x>' % (attr.value + die.cu.cu_offset)
60
61 def _describe_attr_value_passthrough(attr, die, section_offset):
62 return attr.value
63
64 def _describe_attr_hex(attr, die, section_offset):
65 return '0x%x' % (attr.value)
66
67 def _describe_attr_hex_addr(attr, die, section_offset):
68 return '<0x%x>' % (attr.value)
69
70 def _describe_attr_split_64bit(attr, die, section_offset):
71 low_word = attr.value & 0xFFFFFFFF
72 high_word = (attr.value >> 32) & 0xFFFFFFFF
73 return '0x%x 0x%x' % (low_word, high_word)
74
75 def _describe_attr_strp(attr, die, section_offset):
76 return '(indirect string, offset: 0x%x): %s' % (attr.raw_value, attr.value)
77
78 def _describe_attr_debool(attr, die, section_offset):
79 """ To be consistent with readelf, generate 1 for True flags, 0 for False
80 flags.
81 """
82 return '1' if attr.value else '0'
83
84 def _describe_attr_block(attr, die, section_offset):
85 s = '%s byte block: ' % len(attr.value)
86 s += ' '.join('%x' % item for item in attr.value)
87 return s
88
89
90 _ATTR_DESCRIPTION_MAP = defaultdict(
91 lambda: _describe_attr_value_passthrough, # default_factory
92
93 DW_FORM_ref1=_describe_attr_ref,
94 DW_FORM_ref2=_describe_attr_ref,
95 DW_FORM_ref4=_describe_attr_ref,
96 DW_FORM_ref8=_describe_attr_split_64bit,
97 DW_FORM_ref_udata=_describe_attr_ref,
98 DW_FORM_ref_addr=_describe_attr_hex_addr,
99 DW_FORM_data4=_describe_attr_hex,
100 DW_FORM_data8=_describe_attr_split_64bit,
101 DW_FORM_addr=_describe_attr_hex,
102 DW_FORM_sec_offset=_describe_attr_hex,
103 DW_FORM_flag=_describe_attr_debool,
104 DW_FORM_data1=_describe_attr_value_passthrough,
105 DW_FORM_data2=_describe_attr_value_passthrough,
106 DW_FORM_sdata=_describe_attr_value_passthrough,
107 DW_FORM_udata=_describe_attr_value_passthrough,
108 DW_FORM_string=_describe_attr_value_passthrough,
109 DW_FORM_strp=_describe_attr_strp,
110 DW_FORM_block1=_describe_attr_block,
111 DW_FORM_block2=_describe_attr_block,
112 DW_FORM_block4=_describe_attr_block,
113 DW_FORM_block=_describe_attr_block,
114 )
115
116
117 _DESCR_DW_INL = {
118 DW_INL_not_inlined: '(not inlined)',
119 DW_INL_inlined: '(inlined)',
120 DW_INL_declared_not_inlined: '(declared as inline but ignored)',
121 DW_INL_declared_inlined: '(declared as inline and inlined)',
122 }
123
124 _DESCR_DW_LANG = {
125 DW_LANG_C89: '(ANSI C)',
126 DW_LANG_C: '(non-ANSI C)',
127 DW_LANG_Ada83: '(Ada)',
128 DW_LANG_C_plus_plus: '(C++)',
129 DW_LANG_Cobol74: '(Cobol 74)',
130 DW_LANG_Cobol85: '(Cobol 85)',
131 DW_LANG_Fortran77: '(FORTRAN 77)',
132 DW_LANG_Fortran90: '(Fortran 90)',
133 DW_LANG_Pascal83: '(ANSI Pascal)',
134 DW_LANG_Modula2: '(Modula 2)',
135 DW_LANG_Java: '(Java)',
136 DW_LANG_C99: '(ANSI C99)',
137 DW_LANG_Ada95: '(ADA 95)',
138 DW_LANG_Fortran95: '(Fortran 95)',
139 DW_LANG_PLI: '(PLI)',
140 DW_LANG_ObjC: '(Objective C)',
141 DW_LANG_ObjC_plus_plus: '(Objective C++)',
142 DW_LANG_UPC: '(Unified Parallel C)',
143 DW_LANG_D: '(D)',
144 DW_LANG_Python: '(Python)',
145 DW_LANG_Mips_Assembler: '(MIPS assembler)',
146 DW_LANG_Upc: '(nified Parallel C)',
147 DW_LANG_HP_Bliss: '(HP Bliss)',
148 DW_LANG_HP_Basic91: '(HP Basic 91)',
149 DW_LANG_HP_Pascal91: '(HP Pascal 91)',
150 DW_LANG_HP_IMacro: '(HP IMacro)',
151 DW_LANG_HP_Assembler: '(HP assembler)',
152 }
153
154 _DESCR_DW_ATE = {
155 DW_ATE_void: '(void)',
156 DW_ATE_address: '(machine address)',
157 DW_ATE_boolean: '(boolean)',
158 DW_ATE_complex_float: '(complex float)',
159 DW_ATE_float: '(float)',
160 DW_ATE_signed: '(signed)',
161 DW_ATE_signed_char: '(signed char)',
162 DW_ATE_unsigned: '(unsigned)',
163 DW_ATE_unsigned_char: '(unsigned char)',
164 DW_ATE_imaginary_float: '(imaginary float)',
165 DW_ATE_decimal_float: '(decimal float)',
166 DW_ATE_packed_decimal: '(packed_decimal)',
167 DW_ATE_numeric_string: '(numeric_string)',
168 DW_ATE_edited: '(edited)',
169 DW_ATE_signed_fixed: '(signed_fixed)',
170 DW_ATE_unsigned_fixed: '(unsigned_fixed)',
171 DW_ATE_HP_float80: '(HP_float80)',
172 DW_ATE_HP_complex_float80: '(HP_complex_float80)',
173 DW_ATE_HP_float128: '(HP_float128)',
174 DW_ATE_HP_complex_float128: '(HP_complex_float128)',
175 DW_ATE_HP_floathpintel: '(HP_floathpintel)',
176 DW_ATE_HP_imaginary_float80: '(HP_imaginary_float80)',
177 DW_ATE_HP_imaginary_float128: '(HP_imaginary_float128)',
178 }
179
180 _DESCR_DW_ACCESS = {
181 DW_ACCESS_public: '(public)',
182 DW_ACCESS_protected: '(protected)',
183 DW_ACCESS_private: '(private)',
184 }
185
186 _DESCR_DW_VIS = {
187 DW_VIS_local: '(local)',
188 DW_VIS_exported: '(exported)',
189 DW_VIS_qualified: '(qualified)',
190 }
191
192 _DESCR_DW_VIRTUALITY = {
193 DW_VIRTUALITY_none: '(none)',
194 DW_VIRTUALITY_virtual: '(virtual)',
195 DW_VIRTUALITY_pure_virtual: '(pure virtual)',
196 }
197
198 _DESCR_DW_ID_CASE = {
199 DW_ID_case_sensitive: '(case_sensitive)',
200 DW_ID_up_case: '(up_case)',
201 DW_ID_down_case: '(down_case)',
202 DW_ID_case_insensitive: '(case_insensitive)',
203 }
204
205 _DESCR_DW_CC = {
206 DW_CC_normal: '(normal)',
207 DW_CC_program: '(program)',
208 DW_CC_nocall: '(nocall)',
209 }
210
211 _DESCR_DW_ORD = {
212 DW_ORD_row_major: '(row major)',
213 DW_ORD_col_major: '(column major)',
214 }
215
216
217 def _make_extra_mapper(mapping, default, default_interpolate_value=False):
218 """ Create a mapping function from attribute parameters to an extra
219 value that should be displayed.
220 """
221 def mapper(attr, die, section_offset):
222 if default_interpolate_value:
223 d = default % attr.value
224 else:
225 d = default
226 return mapping.get(attr.value, d)
227 return mapper
228
229
230 def _make_extra_string(s=''):
231 """ Create an extra function that just returns a constant string.
232 """
233 def extra(attr, die, section_offset):
234 return s
235 return extra
236
237
238 _DWARF_EXPR_DUMPER_CACHE = {}
239
240 def _location_list_extra(attr, die, section_offset):
241 # According to section 2.6 of the DWARF spec v3, class loclistptr means
242 # a location list, and class block means a location expression.
243 #
244 if attr.form in ('DW_FORM_data4', 'DW_FORM_data8'):
245 return '(location list)'
246 else:
247 # Since this function can be called a lot, initializing a fresh new
248 # LocationExpressionDumper per call is expensive. So a rudimentary
249 # caching scheme is in place to create only one such dumper per
250 # processed CU.
251 cache_key = id(die.cu.structs)
252 if cache_key not in _DWARF_EXPR_DUMPER_CACHE:
253 _DWARF_EXPR_DUMPER_CACHE[cache_key] = \
254 ExprDumper(die.cu.structs)
255 dwarf_expr_dumper = _DWARF_EXPR_DUMPER_CACHE[cache_key]
256 dwarf_expr_dumper.clear()
257 dwarf_expr_dumper.process_expr(attr.value)
258 return '(' + dwarf_expr_dumper.get_str() + ')'
259
260
261 def _import_extra(attr, die, section_offset):
262 # For DW_AT_import the value points to a DIE (that can be either in the
263 # current DIE's CU or in another CU, depending on the FORM). The extra
264 # information for it is the abbreviation number in this DIE and its tag.
265 if attr.form == 'DW_FORM_ref_addr':
266 # Absolute offset value
267 ref_die_offset = section_offset + attr.value
268 else:
269 # Relative offset to the current DIE's CU
270 ref_die_offset = attr.value + die.cu.cu_offset
271
272 # Now find the CU this DIE belongs to (since we have to find its abbrev
273 # table). This is done by linearly scanning through all CUs, looking for
274 # one spanning an address space containing the referred DIE's offset.
275 for cu in die.dwarfinfo.iter_CUs():
276 if cu['unit_length'] + cu.cu_offset > ref_die_offset >= cu.cu_offset:
277 # Once we have the CU, we can actually parse this DIE from the
278 # stream.
279 with preserve_stream_pos(die.stream):
280 ref_die = DIE(cu, die.stream, ref_die_offset)
281 #print '&&& ref_die', ref_die
282 return '[Abbrev Number: %s (%s)]' % (
283 ref_die.abbrev_code, ref_die.tag)
284
285 return '[unknown]'
286
287
288 _EXTRA_INFO_DESCRIPTION_MAP = defaultdict(
289 lambda: _make_extra_string(''), # default_factory
290
291 DW_AT_inline=_make_extra_mapper(
292 _DESCR_DW_INL, '(Unknown inline attribute value: %x',
293 default_interpolate_value=True),
294 DW_AT_language=_make_extra_mapper(
295 _DESCR_DW_LANG, '(Unknown: %x)', default_interpolate_value=True),
296 DW_AT_encoding=_make_extra_mapper(_DESCR_DW_ATE, '(unknown type)'),
297 DW_AT_accessibility=_make_extra_mapper(
298 _DESCR_DW_ACCESS, '(unknown accessibility)'),
299 DW_AT_visibility=_make_extra_mapper(
300 _DESCR_DW_VIS, '(unknown visibility)'),
301 DW_AT_virtuality=_make_extra_mapper(
302 _DESCR_DW_VIRTUALITY, '(unknown virtuality)'),
303 DW_AT_identifier_case=_make_extra_mapper(
304 _DESCR_DW_ID_CASE, '(unknown case)'),
305 DW_AT_calling_convention=_make_extra_mapper(
306 _DESCR_DW_CC, '(unknown convention)'),
307 DW_AT_ordering=_make_extra_mapper(
308 _DESCR_DW_ORD, '(undefined)'),
309 DW_AT_frame_base=_location_list_extra,
310 DW_AT_location=_location_list_extra,
311 DW_AT_string_length=_location_list_extra,
312 DW_AT_return_addr=_location_list_extra,
313 DW_AT_data_member_location=_location_list_extra,
314 DW_AT_vtable_elem_location=_location_list_extra,
315 DW_AT_segment=_location_list_extra,
316 DW_AT_static_link=_location_list_extra,
317 DW_AT_use_location=_location_list_extra,
318 DW_AT_allocated=_location_list_extra,
319 DW_AT_associated=_location_list_extra,
320 DW_AT_data_location=_location_list_extra,
321 DW_AT_stride=_location_list_extra,
322 DW_AT_import=_import_extra,
323 )
324
325 # 8 in a line, for easier counting
326 _REG_NAMES_x86 = [
327 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi',
328 'eip', 'eflags', '<none>', 'st0', 'st1', 'st2', 'st3', 'st4',
329 'st5', 'st6', 'st7', '<none>', '<none>', 'xmm0', 'xmm1', 'xmm2',
330 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'mm0', 'mm1', 'mm2',
331 'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'fcw', 'fsw', 'mxcsr',
332 'es', 'cs', 'ss', 'ds', 'fs', 'gs', '<none>', '<none>', 'tr', 'ldtr'
333 ]
334
335 _REG_NAMES_x64 = [
336 'rax', 'rdx', 'rcx', 'rbx', 'rsi', 'rdi', 'rbp', 'rsp',
337 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15',
338 'rip', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6',
339 'xmm7', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14',
340 'xmm15', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6',
341 'st7', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
342 'mm7', 'rflags', 'es', 'cs', 'ss', 'ds', 'fs', 'gs',
343 '<none>', '<none>', 'fs.base', 'gs.base', '<none>', '<none>', 'tr', 'ldtr',
344 'mxcsr', 'fcw', 'fsw'
345 ]
346
347
348 class ExprDumper(GenericExprVisitor):
349 """ A concrete visitor for DWARF expressions that dumps a textual
350 representation of the complete expression.
351
352 Usage: after creation, call process_expr, and then get_str for a
353 semicolon-delimited string representation of the decoded expression.
354 """
355 def __init__(self, structs):
356 super(ExprDumper, self).__init__(structs)
357 self._init_lookups()
358 self._str_parts = []
359
360 def clear(self):
361 self._str_parts = []
362
363 def get_str(self):
364 return '; '.join(self._str_parts)
365
366 def _init_lookups(self):
367 self._ops_with_decimal_arg = set([
368 'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s',
369 'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_constu', 'DW_OP_consts',
370 'DW_OP_pick', 'DW_OP_plus_uconst', 'DW_OP_bra', 'DW_OP_skip',
371 'DW_OP_fbreg', 'DW_OP_piece', 'DW_OP_deref_size',
372 'DW_OP_xderef_size', 'DW_OP_regx',])
373
374 for n in range(0, 32):
375 self._ops_with_decimal_arg.add('DW_OP_breg%s' % n)
376
377 self._ops_with_two_decimal_args = set([
378 'DW_OP_const8u', 'DW_OP_const8s', 'DW_OP_bregx', 'DW_OP_bit_piece'])
379
380 self._ops_with_hex_arg = set(
381 ['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref'])
382
383 def _after_visit(self, opcode, opcode_name, args):
384 self._str_parts.append(self._dump_to_string(opcode, opcode_name, args))
385
386 def _dump_to_string(self, opcode, opcode_name, args):
387 if len(args) == 0:
388 if opcode_name.startswith('DW_OP_reg'):
389 regnum = int(opcode_name[9:])
390 return '%s (%s)' % (
391 opcode_name,
392 describe_reg_name(regnum, _MACHINE_ARCH))
393 else:
394 return opcode_name
395 elif opcode_name in self._ops_with_decimal_arg:
396 if opcode_name.startswith('DW_OP_breg'):
397 regnum = int(opcode_name[10:])
398 return '%s (%s): %s' % (
399 opcode_name,
400 describe_reg_name(regnum, _MACHINE_ARCH),
401 args[0])
402 elif opcode_name.endswith('regx'):
403 # applies to both regx and bregx
404 return '%s: %s (%s)' % (
405 opcode_name,
406 args[0],
407 describe_reg_name(args[0], _MACHINE_ARCH))
408 else:
409 return '%s: %s' % (opcode_name, args[0])
410 elif opcode_name in self._ops_with_hex_arg:
411 return '%s: %x' % (opcode_name, args[0])
412 elif opcode_name in self._ops_with_two_decimal_args:
413 return '%s: %s %s' % (opcode_name, args[0], args[1])
414 else:
415 return '<unknown %s>' % opcode_name
416
417
418