libopid: refactor opcodes; update naming
authorDmitry Selyutin <ghostmansd@gmail.com>
Wed, 13 Sep 2023 16:52:51 +0000 (19:52 +0300)
committerDmitry Selyutin <ghostmansd@gmail.com>
Wed, 13 Sep 2023 17:26:19 +0000 (20:26 +0300)
src/libopid/Makefile
src/libopid/codegen.py
src/libopid/opid-dis.c
src/libopid/opid-opc.c
src/libopid/opid.h

index b18611bcc729525a667f584ad774762a29ec45f5..21ccf94b0538348cdbd94823bc83916228806948 100644 (file)
@@ -53,11 +53,13 @@ libopid.so: libopid.a
 
 
 libopid.a: $(OBJS)
-       $(AR) rcs $@ $^
+       $(AR) rcs $@ $(OBJS)
 
 
 opid-dis.c: opid-dis-gen.c
 opid-opc.c: opid-opc-gen.c
+opid-dis.o: opid-dis.c opid-dis-gen.c
+opid-opc.o: opid-opc.c opid-opc-gen.c
 
 
  $(GENS): codegen.py
@@ -65,4 +67,4 @@ opid-opc.c: opid-opc-gen.c
 
 
 .c.o:
-       $(CC) $(CPPFLAGS) $(CFLAGS) -fPIC -c $< -o $@
+       $(CC) $(CPPFLAGS) $(CFLAGS) -std=c11 -fPIC -c $< -o $@
index ce9ce78498ae5ba6b2092b12e5a06c975e36600f..f02ceb5570f15fb0f85bce70ba0345198061e600 100644 (file)
@@ -4,8 +4,8 @@ import contextlib
 import dataclasses
 import enum
 import itertools
+import operator
 import pathlib
-import sys
 
 import mdis.dispatcher
 import mdis.visitor
@@ -36,9 +36,34 @@ def fetch(span):
     yield f"UINT64_C(0)"
 
 
+def store(span):
+    bits = len(span)
+    one = "UINT64_C(1)"
+    for (dst, origin) in enumerate(span):
+        src = (32 - (origin + 1))
+        dst = (bits - (dst + 1))
+        dst = f"UINT64_C({dst})"
+        src = f"UINT64_C({src})"
+        yield f"/* {origin:<2} */ (((operand->value >> {dst}) & {one}) << {src}) |"
+    yield f"UINT64_C(0)"
+
+
+def unwrap(integer):
+    for bit in range(31, -1, -1):
+        yield ((integer >> bit) & 1)
+
+
+def wrap(bits):
+    value = 0
+    for (index, bit) in enumerate(reversed(bits)):
+        if bit:
+            value |= (1 << index)
+    return value
+
+
 class Mode(enum.Enum):
-    PPC_DIS_GEN_C = "opid-dis-gen.c"
-    PPC_OPC_GEN_C = "opid-opc-gen.c"
+    DIS_GEN_C = "opid-dis-gen.c"
+    OPC_GEN_C = "opid-opc-gen.c"
 
     def __call__(self, db, **arguments):
         def pairwise(iterable):
@@ -48,8 +73,8 @@ class Mode(enum.Enum):
 
         cache = Cache()
         codegen = {
-            Mode.PPC_DIS_GEN_C: DisGenSource,
-            Mode.PPC_OPC_GEN_C: OpcGenSource,
+            Mode.DIS_GEN_C: DisGenSource,
+            Mode.OPC_GEN_C: OpcGenSource,
         }[self](cache=cache, **arguments)
         for (root, visitor) in pairwise((db, cache, codegen),):
             walker_cls = getattr(visitor, "Walker", Walker)
@@ -69,15 +94,45 @@ class Struct(metaclass=StructMeta):
 class DynamicOperandIds(tuple): pass
 class StaticOperands(tuple): pass
 class DynamicOperands(tuple): pass
-class POTable(tuple): pass
+class OpcodeIds(tuple): pass
+class NameTable(tuple): pass
+class OpcodeTable(tuple): pass
 class RecordTable(tuple): pass
 
 
+class Opcode(insndb.Opcode):
+    @property
+    def PO(self):
+        (*_, result) = itertools.accumulate((
+            (((self.value >> 31) & 1) << 5), # 0
+            (((self.value >> 30) & 1) << 4), # 1
+            (((self.value >> 29) & 1) << 3), # 2
+            (((self.value >> 28) & 1) << 2), # 3
+            (((self.value >> 27) & 1) << 1), # 4
+            (((self.value >> 26) & 1) << 0), # 5
+        ), operator.or_)
+        return result
+
+    @property
+    def weight(self):
+        bits = tuple(unwrap(self.mask)).count(1)
+        return (self.PO, -bits, self.mask)
+
+    def __lt__(self, other):
+        if not isinstance(other, self.__class__):
+            return NotImplemented
+        return self.weight < other.weight
+
 class DynamicOperandId(Struct):
     name: str = "NIL"
     index: int = 0
 
 
+class NameId(Struct):
+    name: str
+    index: int
+
+
 class DynamicOperand(Struct):
     cls: type
     span: tuple
@@ -86,7 +141,7 @@ class DynamicOperand(Struct):
 
 class Record(Struct):
     name: str
-    opcode: insndb.Record.Opcode
+    opcode: Opcode
     dynamic_operand_ids: DynamicOperandIds
     static_operands: StaticOperands
 
@@ -94,7 +149,7 @@ class Record(Struct):
 class Cache(mdis.visitor.ContextVisitor):
     def __init__(self):
         self.__PO = ([0] * (1 << 6))
-        self.__records = collections.defaultdict(list)
+        self.__records = []
         self.__static_operand = collections.defaultdict(list)
         self.__dynamic_operand = collections.defaultdict(set)
         self.__dynamic_operand_id = collections.defaultdict(list)
@@ -102,31 +157,38 @@ class Cache(mdis.visitor.ContextVisitor):
         return super().__init__()
 
     def __iter__(self):
-        table = tuple(self.__dynamic_operand.keys())
+        name_table = {}
+        operands_table = tuple(self.__dynamic_operand.keys())
         nil = DynamicOperandId()
 
         def dynamic_operand_id(item):
             (name, cls, span) = item
-            index = (table.index((cls, span),) + 1)
+            index = (operands_table.index((cls, span),) + 1)
             return DynamicOperandId(name=name, index=index)
 
         def dynamic_operand(item):
             ((cls, span), names) = item
             return DynamicOperand(cls=cls, span=span, names=tuple(sorted(names)))
 
+        def name_id(item):
+            (name, index) = item
+            return NameId(name=name, index=index)
+
         def record(item):
-            (opcode, name) = item
+            (index, (opcode, name)) = item
+            name_table[name] = index
             dynamic_operand_ids = map(dynamic_operand_id, self.__dynamic_operand_id[name])
             dynamic_operand_ids = DynamicOperandIds(tuple(dynamic_operand_ids) + (nil,))
-            static_operands = StaticOperands(self.__static_operand[name])
+            static_operands = StaticOperands(dict.fromkeys(self.__static_operand[name]))
 
             return Record(opcode=opcode, name=name,
                 dynamic_operand_ids=dynamic_operand_ids,
                 static_operands=static_operands)
 
         yield DynamicOperands(map(dynamic_operand, self.__dynamic_operand.items()))
-        yield RecordTable(map(record, sorted(self.__records.items())))
-        yield POTable(self.__PO)
+        yield RecordTable(map(record, enumerate(sorted(self.__records))))
+        yield OpcodeTable(self.__PO)
+        yield NameTable(sorted(map(name_id, name_table.items()), key=lambda item: item.name))
 
     @mdis.dispatcher.Hook(insndb.Record)
     @contextlib.contextmanager
@@ -137,10 +199,28 @@ class Cache(mdis.visitor.ContextVisitor):
     @mdis.dispatcher.Hook(insndb.Record.Opcode)
     @contextlib.contextmanager
     def dispatch_record_opcode(self, node):
-        self.__records[node] = self.__record.name
         self.__PO[self.__record.PO] += 1
         yield node
 
+    @mdis.dispatcher.Hook(insndb.Record.Opcodes)
+    @contextlib.contextmanager
+    def dispatch_record_opcodes(self, node):
+        masks = {subnode.mask for subnode in node}
+        if len(masks) != 1:
+            raise ValueError(masks)
+        mask = list(unwrap(masks.pop()))
+        states = tuple(unwrap(node[0].value))
+        for subnode in node[1:]:
+            for (index, bit) in enumerate(unwrap(subnode.value)):
+                if mask[index] and (states[index] != bit):
+                    mask[index] = 0
+
+        mask = insndb.Record.Opcode.Mask(wrap(mask))
+        opcode = Opcode(node[0].value, mask)
+        self.__records.append((opcode, self.__record.name))
+
+        yield node
+
     @mdis.dispatcher.Hook(insndb.StaticOperand)
     @contextlib.contextmanager
     def dispatch_static_operand(self, node):
@@ -195,6 +275,16 @@ class Header(Codegen):
 
 
 class Source(Codegen):
+    class Walker(Walker):
+        @mdis.dispatcher.Hook(DynamicOperand, RecordTable)
+        def dispatch_ignore(self, node):
+            yield from ()
+
+        @mdis.dispatcher.Hook(Cache)
+        def dispatch_cache(self, node):
+            (operands, _, _, _) = node
+            yield from self([operands])
+
     @mdis.dispatcher.Hook(str)
     @contextlib.contextmanager
     def dispatch_str(self, node, *, path, pathcls):
@@ -239,7 +329,7 @@ class DisGenSource(Source):
 
         @mdis.dispatcher.Hook(Cache)
         def dispatch_cache(self, node):
-            (operands, _, _) = node
+            (operands, _, _, _) = node
             yield from self([operands])
 
     @mdis.dispatcher.Hook(DynamicOperands)
@@ -256,7 +346,7 @@ class DisGenSource(Source):
             yield node
             self.emit("default:")
             with self:
-                self.emit("return OPID_ERROR_OPERAND_0;")
+                self.emit("return OPID_ERROR_OPERAND_0_LOOKUP;")
             self.emit("}")
         self.emit("")
         with self:
@@ -350,7 +440,8 @@ class DisGenSource(Source):
 
 class OpcGenSource(Source):
     class Walker(Walker):
-        @mdis.dispatcher.Hook(DynamicOperandId, DynamicOperands, insndb.StaticOperand, POTable)
+        @mdis.dispatcher.Hook(DynamicOperandId, NameId, Opcode,
+            DynamicOperands, insndb.StaticOperand, OpcodeTable)
         def dispatch_ignore(self, node):
             yield from ()
 
@@ -368,8 +459,8 @@ class OpcGenSource(Source):
 
         @mdis.dispatcher.Hook(Cache)
         def dispatch_cache(self, node):
-            (_, records, potable) = node
-            yield from self([records, potable])
+            (_, records, opcodes, names) = node
+            yield from self([records, opcodes, names])
 
     @mdis.dispatcher.Hook(DynamicOperandId)
     @contextlib.contextmanager
@@ -400,14 +491,17 @@ class OpcGenSource(Source):
         self.emit(f"{pathcls(path)} = UINT64_C(0x{node:016x}),")
         with self: yield node
 
-    @mdis.dispatcher.Hook(insndb.Record.Opcode)
+    @mdis.dispatcher.Hook(Opcode)
     @contextlib.contextmanager
     def dispatch_opcode(self, node):
         self.emit(".opcode = {")
-        with self: yield node
+        with self:
+            self.emit(f".value = UINT64_C(0x{node.value:08x}),")
+            self.emit(f".mask = UINT64_C(0x{node.mask:08x}),")
         self.emit("},")
+        yield node
 
-    @mdis.dispatcher.Hook(POTable)
+    @mdis.dispatcher.Hook(OpcodeTable)
     @contextlib.contextmanager
     def dispatch_potable(self, node):
         heads = ([0] * (1 << 6))
@@ -421,10 +515,25 @@ class OpcGenSource(Source):
             for index in range(64):
                 head = heads[index]
                 tail = tails[index]
-                self.emit(f"[0x{index:02x}] = {{{head}, {tail}}},")
+                self.emit(f"[{index}] = {{{head}, {tail}}},")
         self.emit("};")
+        self.emit("")
+        yield node
+
+    @mdis.dispatcher.Hook(NameId)
+    @contextlib.contextmanager
+    def dispatch_name_id(self, node):
+        self.emit(f"{{\"{node.name}\", &opid_record_table[{node.index}]}},")
         yield node
 
+    @mdis.dispatcher.Hook(NameTable)
+    @contextlib.contextmanager
+    def dispatch_name_table(self, node):
+        self.emit(f"static struct opid_name_id const opid_name_id_table[] = {{")
+        with self:
+            yield node
+        self.emit("};")
+
     @mdis.dispatcher.Hook(RecordTable)
     @contextlib.contextmanager
     def dispatch_records(self, node):
index 37eae8755f4919f718e5407778c316d8c3644a53..5a18f2b2e83c5073f0fd8a15c98b57b823752e67 100644 (file)
@@ -1,5 +1,7 @@
+#include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
+#include <string.h>
 
 #include "opid.h"
 
@@ -14,14 +16,14 @@ static inline enum opid_state
 opid_disassemble_operands(struct opid_ctx *ctx,
         struct opid_record const *record,
         uint64_t insn) {
-    for (size_t id = 0; ((id != OPID_OPERANDS) && record->operands[id]); ++id) {
+    for (size_t id = 0; ((id != OPID_OPERANDS_NR) && record->operands[id]); ++id) {
         enum opid_state state;
 
         state = opid_disassemble_operand(insn,
             record->operands[id],
             &ctx->operands[id]);
         if (state != OPID_SUCCESS) {
-            if (state == OPID_ERROR_OPERAND_0)
+            if (state == OPID_ERROR_OPERAND_0_LOOKUP)
                 state = (enum opid_state)((size_t)state + id);
             return state;
         }
@@ -45,7 +47,9 @@ opid_disassemble(struct opid_ctx *ctx, uint64_t insn) {
     if (state != OPID_SUCCESS)
         return state;
 
-    ctx->name = record->name;
+    _Static_assert(sizeof(record->name) == sizeof(ctx->name),
+        "record name length does not match context name length");
+    memcpy(ctx->name, record->name, sizeof(record->name));
 
     return OPID_SUCCESS;
 }
index 85179781b807ad0704952719c9ddc65c84e55cc4..df8277118e593d1314ea276cbebea3abd5125279 100644 (file)
@@ -3,8 +3,14 @@
 
 #include "opid.h"
 
+struct opid_name_id {
+    char name[16];
+    struct opid_record const *record;
+};
+
 static struct opid_record const opid_record_table[];
 static uint16_t const opid_opcode_table[64][2];
+static struct opid_name_id const opid_name_id_table[];
 
 #include "opid-opc-gen.c"
 
index a6a94597dbb62aa9d6a76fad40691772be48ff02..9f064aff8b7aadfd6a20bb2a8a4511a4a6fa10f4 100644 (file)
@@ -6,17 +6,18 @@
 enum opid_state {
     OPID_SUCCESS,
     OPID_ERROR_LOOKUP,
-    OPID_ERROR_OPERAND_0,
-    OPID_ERROR_OPERAND_1,
-    OPID_ERROR_OPERAND_2,
-    OPID_ERROR_OPERAND_3,
-    OPID_ERROR_OPERAND_4,
-    OPID_ERROR_OPERAND_5,
-    OPID_ERROR_OPERAND_6,
-    OPID_ERROR_OPERAND_7,
+    OPID_ERROR_OPERAND_0_LOOKUP,
+    OPID_ERROR_OPERAND_1_LOOKUP,
+    OPID_ERROR_OPERAND_2_LOOKUP,
+    OPID_ERROR_OPERAND_3_LOOKUP,
+    OPID_ERROR_OPERAND_4_LOOKUP,
+    OPID_ERROR_OPERAND_5_LOOKUP,
+    OPID_ERROR_OPERAND_6_LOOKUP,
+    OPID_ERROR_OPERAND_7_LOOKUP,
 };
 
-#define OPID_OPERANDS 8
+#define OPID_NAME_BYTES  16
+#define OPID_OPERANDS_NR 8
 
 struct opid_opcode {
     uint64_t value;
@@ -25,8 +26,8 @@ struct opid_opcode {
 
 struct opid_record {
     struct opid_opcode opcode;
-    uint8_t operands[OPID_OPERANDS];
-    char name[16];
+    uint8_t operands[OPID_OPERANDS_NR];
+    char name[OPID_NAME_BYTES];
 };
 
 struct opid_operand {
@@ -44,15 +45,15 @@ struct opid_operand {
 #define OPID_OPERAND_ADDRESS   (UINT64_C(1) << UINT64_C(7))
 
 struct opid_ctx {
-    char const *name;
+    char name[OPID_NAME_BYTES];
     size_t nr_operands;
-    struct opid_operand operands[OPID_OPERANDS];
+    struct opid_operand operands[OPID_OPERANDS_NR];
 };
 
 #define opid_foreach_operand(ctx, operand) \
     for (size_t id = 0; \
         (((operand = &(ctx)->operands[id]), 1) && \
-            ((id != OPID_OPERANDS) && (id != (ctx)->nr_operands))); \
+            ((id != OPID_OPERANDS_NR) && (id != (ctx)->nr_operands))); \
         operand = &(ctx)->operands[++id])
 
 enum opid_state