Initial commit of new expr parsing function.
authorEli Bendersky <eliben@gmail.com>
Fri, 13 Mar 2020 13:17:13 +0000 (06:17 -0700)
committerEli Bendersky <eliben@users.noreply.github.com>
Sat, 14 Mar 2020 12:24:52 +0000 (05:24 -0700)
Basic unit tests pass, but old code is still in place and descriptions is not
yet converted.

elftools/dwarf/dwarf_expr.py
test/test_dwarf_expr.py

index 7cdf9ef7b9b5a6b9e8dbf1d6b8bb013c2b2078af..8ef6b6f0673c40e675883aab9de221f8ff344016 100644 (file)
@@ -6,6 +6,8 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
+from collections import namedtuple
+
 from ..common.py3compat import BytesIO, iteritems
 from ..common.utils import struct_parse, bytelist2string
 
@@ -267,3 +269,108 @@ class GenericExprVisitor(object):
             self._make_visitor_arg_struct(self.structs.Dwarf_uint32('')))
         add('DW_OP_call_ref',
             self._make_visitor_arg_struct(self.structs.Dwarf_offset('')))
+
+
+DwarfExprOp = namedtuple('DwarfExprOp', 'op op_name args')
+
+
+def parse_expr(expr, structs):
+    """TODO
+    """
+    dispatch_table = _init_dispatch_table(structs)
+
+    stream = BytesIO(bytelist2string(expr))
+    parsed = []
+
+    while True:
+        # Get the next opcode from the stream. If nothing is left in the
+        # stream, we're done.
+        byte = stream.read(1)
+        if len(byte) == 0:
+            break
+
+        # Decode the opcode and its name.
+        op = ord(byte)
+        op_name = DW_OP_opcode2name.get(op, 'OP:0x%x' % op)
+
+        # Use dispatch table to parse args.
+        arg_parser = dispatch_table[op]
+        args = arg_parser(stream)
+
+        parsed.append(DwarfExprOp(op=op, op_name=op_name, args=args))
+
+    return parsed
+
+
+def _init_dispatch_table(structs):
+    """Creates a dispatch table for parsing args of an op.
+
+    Returns a dict mapping opcode to a function. The function accepts a stream
+    and return a list of parsed arguments for the opcode from the stream;
+    the stream is advanced by the function as needed.
+    """
+    table = {}
+    def add(opcode_name, func):
+        table[DW_OP_name2opcode[opcode_name]] = func
+
+    def parse_noargs():
+        return lambda stream: []
+
+    def parse_op_addr():
+        return lambda stream: [struct_parse(self.structs.Dwarf_target_addr(''),
+                                            stream)]
+
+    def parse_arg_struct(arg_struct):
+        return lambda stream: [struct_parse(arg_struct, stream)]
+
+    def parse_arg_struct2(arg1_struct, arg2_struct):
+        return lambda stream: [struct_parse(arg1_struct, stream),
+                               struct_parse(arg2_struct, stream)]
+
+    add('DW_OP_addr', parse_op_addr())
+    add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8('')))
+    add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16('')))
+    add('DW_OP_const2s', parse_arg_struct(structs.Dwarf_int16('')))
+    add('DW_OP_const4u', parse_arg_struct(structs.Dwarf_uint32('')))
+    add('DW_OP_const4s', parse_arg_struct(structs.Dwarf_int32('')))
+    add('DW_OP_const8u', parse_arg_struct2(structs.Dwarf_uint32(''),
+                                           structs.Dwarf_uint32('')))
+    add('DW_OP_const8s', parse_arg_struct2(structs.Dwarf_int32(''),
+                                           structs.Dwarf_int32('')))
+    add('DW_OP_constu', parse_arg_struct(structs.Dwarf_uleb128('')))
+    add('DW_OP_consts', parse_arg_struct(structs.Dwarf_sleb128('')))
+    add('DW_OP_pick', parse_arg_struct(structs.Dwarf_uint8('')))
+    add('DW_OP_plus_uconst', parse_arg_struct(structs.Dwarf_uleb128('')))
+    add('DW_OP_bra', parse_arg_struct(structs.Dwarf_int16('')))
+    add('DW_OP_skip', parse_arg_struct(structs.Dwarf_int16('')))
+
+    for opname in [ 'DW_OP_deref', 'DW_OP_dup', 'DW_OP_drop', 'DW_OP_over',
+                    'DW_OP_swap', 'DW_OP_swap', 'DW_OP_rot', 'DW_OP_xderef',
+                    'DW_OP_abs', 'DW_OP_and', 'DW_OP_div', 'DW_OP_minus',
+                    'DW_OP_mod', 'DW_OP_mul', 'DW_OP_neg', 'DW_OP_not',
+                    'DW_OP_plus', 'DW_OP_shl', 'DW_OP_shr', 'DW_OP_shra',
+                    'DW_OP_xor', 'DW_OP_eq', 'DW_OP_ge', 'DW_OP_gt',
+                    'DW_OP_le', 'DW_OP_lt', 'DW_OP_ne', 'DW_OP_nop',
+                    'DW_OP_push_object_address', 'DW_OP_form_tls_address',
+                    'DW_OP_call_frame_cfa']:
+        add(opname, parse_noargs())
+
+    for n in range(0, 32):
+        add('DW_OP_lit%s' % n, parse_noargs())
+        add('DW_OP_reg%s' % n, parse_noargs())
+        add('DW_OP_breg%s' % n, parse_arg_struct(structs.Dwarf_sleb128('')))
+
+    add('DW_OP_fbreg', parse_arg_struct(structs.Dwarf_sleb128('')))
+    add('DW_OP_regx', parse_arg_struct(structs.Dwarf_uleb128('')))
+    add('DW_OP_bregx', parse_arg_struct2(structs.Dwarf_uleb128(''),
+                                         structs.Dwarf_sleb128('')))
+    add('DW_OP_piece', parse_arg_struct(structs.Dwarf_uleb128('')))
+    add('DW_OP_bit_piece', parse_arg_struct2(structs.Dwarf_uleb128(''),
+                                             structs.Dwarf_uleb128('')))
+    add('DW_OP_deref_size', parse_arg_struct(structs.Dwarf_int8('')))
+    add('DW_OP_xderef_size', parse_arg_struct(structs.Dwarf_int8('')))
+    add('DW_OP_call2', parse_arg_struct(structs.Dwarf_uint16('')))
+    add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32('')))
+    add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset('')))
+
+    return table
index a04c20641ab42bb1f210c8d9d757b91e17c8aeb6..1bd80d78de172b32bd96814fba9d1e68f76a360a 100644 (file)
@@ -7,6 +7,7 @@
 import unittest
 
 from elftools.dwarf.descriptions import ExprDumper, set_global_machine_arch
+from elftools.dwarf.dwarf_expr import parse_expr, DwarfExprOp
 from elftools.dwarf.structs import DWARFStructs
 
 
@@ -66,5 +67,22 @@ class TestExprDumper(unittest.TestCase):
             'DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul')
 
 
+class TestParseExpr(unittest.TestCase):
+    structs32 = DWARFStructs(
+            little_endian=True,
+            dwarf_format=32,
+            address_size=4)
+
+    def setUp(self):
+        set_global_machine_arch('x64')
+
+    def test_single(self):
+        lst = parse_expr([0x1b], self.structs32)
+        self.assertEqual(lst, [DwarfExprOp(op=0x1B, op_name='DW_OP_div', args=[])])
+
+        lst = parse_expr([0x90, 16], self.structs32)
+        self.assertEqual(lst, [DwarfExprOp(op=0x90, op_name='DW_OP_regx', args=[16])])
+
+
 if __name__ == '__main__':
     unittest.main()