From 4de4739d2e1cca5a84e888657f41fd335cdab9ce Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Thu, 9 Apr 2020 13:57:29 +0100
Subject: [PATCH] experiment morphing ALU to take subset of Decode2ToExecute1

---
 src/soc/experiment/alu_hier.py | 154 +++++++++++++++++++++++++++++----
 src/soc/experiment/compalu.py  |  11 +--
 2 files changed, 139 insertions(+), 26 deletions(-)

diff --git a/src/soc/experiment/alu_hier.py b/src/soc/experiment/alu_hier.py
index 27331dfd..9fd21c49 100644
--- a/src/soc/experiment/alu_hier.py
+++ b/src/soc/experiment/alu_hier.py
@@ -10,22 +10,92 @@ only one cycle (sync)
 """
 
 from nmigen import Elaboratable, Signal, Module, Const, Mux
+from nmigen.hdl.rec import Record, Layout
 from nmigen.cli import main
 from nmigen.cli import verilog, rtlil
-from soc.decoder.power_enums import InternalOp
+from nmigen.compat.sim import run_simulation
+
+from soc.decoder.power_enums import InternalOp, CryIn
 
 import operator
 
 
+class CompALUOpSubset(Record):
+    """CompALUOpSubset
+
+    a copy of the relevant subset information from Decode2Execute1Type
+    needed for ALU operations.
+    """
+    def __init__(self):
+        layout = (('insn_type', InternalOp),
+                  ('nia', 64),
+                  ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
+                    #'cr = Signal(32, reset_less=True) # NO: this is from the CR SPR
+                    #'xerc = XerBits() # NO: this is from the XER SPR
+                  ('lk', 1),
+                  ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
+                  ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
+                  ('invert_a', 1),
+                  ('invert_out', 1),
+                  ('input_carry', CryIn),
+                  ('output_carry', 1),
+                  ('input_cr', 1),
+                  ('output_cr', 1),
+                  ('is_32bit', 1),
+                  ('is_signed', 1),
+                  ('byte_reverse', 1),
+                  ('sign_extend', 1))
+
+        Record.__init__(self, Layout(layout))
+
+        # grrr.  Record does not have kwargs
+        self.insn_type.reset_less = True
+        self.nia.reset_less = True
+        #self.cr = Signal(32, reset_less = True
+        #self.xerc = XerBits(
+        self.lk.reset_less = True
+        self.invert_a.reset_less = True
+        self.invert_out.reset_less = True
+        self.input_carry.reset_less = True
+        self.output_carry.reset_less = True
+        self.input_cr.reset_less = True
+        self.output_cr.reset_less = True
+        self.is_32bit.reset_less = True
+        self.is_signed.reset_less = True
+        self.byte_reverse.reset_less = True
+        self.sign_extend.reset_less = True
+
+    def ports(self):
+        return [self.insn_type,
+                self.nia,
+                #self.cr,
+                #self.xerc,
+                self.lk,
+                self.invert_a,
+                self.invert_out,
+                self.input_carry,
+                self.output_carry,
+                self.input_cr,
+                self.output_cr,
+                self.is_32bit,
+                self.is_signed,
+                self.byte_reverse,
+                self.sign_extend,
+        ]
+
 class Adder(Elaboratable):
     def __init__(self, width):
+        self.invert_a = Signal()
         self.a   = Signal(width)
         self.b   = Signal(width)
         self.o   = Signal(width)
 
     def elaborate(self, platform):
         m = Module()
-        m.d.comb += self.o.eq(self.a + self.b)
+        with m.If(self.invert_a):
+            m.d.comb += self.o.eq((~self.a) + self.b)
+        with m.Else():
+            m.d.comb += self.o.eq(self.a + self.b)
         return m
 
 
@@ -75,7 +145,7 @@ class ALU(Elaboratable):
         self.n_ready_i = Signal()
         self.n_valid_o = Signal()
         self.counter   = Signal(4)
-        self.op  = Signal(InternalOp)
+        self.op  = CompALUOpSubset()
         self.a   = Signal(width)
         self.b   = Signal(width)
         self.o   = Signal(width)
@@ -84,21 +154,23 @@ class ALU(Elaboratable):
     def elaborate(self, platform):
         m = Module()
         add = Adder(self.width)
-        sub = Subtractor(self.width)
         mul = Multiplier(self.width)
         shf = Shifter(self.width)
 
         m.submodules.add = add
-        m.submodules.sub = sub
         m.submodules.mul = mul
         m.submodules.shf = shf
 
         # really should not activate absolutely all ALU inputs like this
-        for mod in [add, sub, mul, shf]:
+        for mod in [add, mul, shf]:
             m.d.comb += [
                 mod.a.eq(self.a),
                 mod.b.eq(self.b),
             ]
+
+        # pass invert (and carry later)
+        m.d.comb += add.invert_a.eq(self.op.invert_a)
+
         go_now = Signal(reset_less=True) # testing no-delay ALU
 
         with m.If(self.p_valid_i):
@@ -108,26 +180,28 @@ class ALU(Elaboratable):
                 m.d.sync += self.p_ready_o.eq(1)
 
                 # as this is a "fake" pipeline, just grab the output right now
-                with m.If(self.op == InternalOp.OP_ADD):
+                with m.If(self.op.insn_type == InternalOp.OP_ADD):
                     m.d.sync += self.o.eq(add.o)
-                with m.Elif(self.op == InternalOp.OP_MUL_L64):
+                with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64):
                     m.d.sync += self.o.eq(mul.o)
-                with m.Elif(self.op == InternalOp.OP_SHR):
+                with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
                     m.d.sync += self.o.eq(shf.o)
                 # TODO: SUB
 
-                with m.Switch(self.op):
-                    for i, mod in enumerate([add, sub, mul, shf]):
-                        with m.Case(i):
-                            m.d.sync += self.o.eq(mod.o)
-                with m.If(self.op == 2): # MUL, to take 5 instructions
+                # NOTE: all of these are fake, just something to test
+
+                # MUL, to take 5 instructions
+                with m.If(self.op.insn_type == InternalOp.OP_MUL_L64.value):
                     m.d.sync += self.counter.eq(5)
-                with m.Elif(self.op == 3): # SHIFT to take 7
+                # SHIFT to take 7
+                with m.Elif(self.op.insn_type == InternalOp.OP_SHR.value):
                     m.d.sync += self.counter.eq(7)
-                with m.Elif(self.op == 1): # SUB to take 1, straight away
+                # SUB to take 1, straight away
+                with m.If(self.op.insn_type == InternalOp.OP_ADD.value):
                     m.d.sync += self.counter.eq(1)
                     m.d.comb += go_now.eq(1)
-                with m.Else(): # ADD to take 2
+                # ADD to take 2
+                with m.Else():
                     m.d.sync += self.counter.eq(2)
         with m.Else():
             # input says no longer valid, so drop ready as well.
@@ -151,7 +225,7 @@ class ALU(Elaboratable):
         return m
 
     def __iter__(self):
-        yield self.op
+        yield from self.op.ports()
         yield self.a
         yield self.b
         yield self.o
@@ -247,13 +321,55 @@ class BranchALU(Elaboratable):
     def ports(self):
         return list(self)
 
+def run_op(dut, a, b, op, inv_a=0):
+    yield dut.a.eq(a)
+    yield dut.b.eq(b)
+    yield dut.op.insn_type.eq(op)
+    yield dut.op.invert_a.eq(inv_a)
+    yield dut.n_ready_i.eq(0)
+    yield dut.p_valid_i.eq(1)
+    yield
+    while True:
+        yield
+        n_valid_o = yield dut.n_valid_o
+        if n_valid_o:
+            break
+    yield
 
-if __name__ == "__main__":
+    result = yield dut.o
+    yield dut.p_valid_i.eq(0)
+    yield dut.n_ready_i.eq(0)
+    yield
+
+    return result
+
+
+def alu_sim(dut):
+    result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD)
+    print ("alu_sim add", result)
+    assert (result == 8)
+
+    result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64)
+    print ("alu_sim mul", result)
+    assert (result == 6)
+
+    result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1)
+    print ("alu_sim add-inv", result)
+    assert (result == 65533)
+
+
+def test_alu():
     alu = ALU(width=16)
+    run_simulation(alu, alu_sim(alu), vcd_name='test_alusim.vcd')
+
     vl = rtlil.convert(alu, ports=alu.ports())
     with open("test_alu.il", "w") as f:
         f.write(vl)
 
+
+if __name__ == "__main__":
+    test_alu()
+
     alu = BranchALU(width=16)
     vl = rtlil.convert(alu, ports=alu.ports())
     with open("test_branch_alu.il", "w") as f:
diff --git a/src/soc/experiment/compalu.py b/src/soc/experiment/compalu.py
index 55e2799f..9fcced73 100644
--- a/src/soc/experiment/compalu.py
+++ b/src/soc/experiment/compalu.py
@@ -6,6 +6,7 @@ from nmutil.latch import SRLatch, latchregister
 from soc.decoder.power_decoder2 import Data
 from soc.decoder.power_enums import InternalOp
 
+from alu_hier import CompALUOpSubset
 
 """ Computation Unit (aka "ALU Manager").
 
@@ -39,13 +40,9 @@ from soc.decoder.power_enums import InternalOp
       register is placed combinatorially onto the output, and (2) the
       req_l latch is cleared, busy is dropped, and the Comp Unit is back
       through its revolving door to do another task.
-
-    Notes on oper_i:
-
-    * bits[0:2] are for the ALU, add=0, sub=1, shift=2, mul=3
-    * bit[2] are the immediate (bit[2]=1 == immediate mode)
 """
 
+
 class ComputationUnitNoDelay(Elaboratable):
     def __init__(self, rwid, e, alu):
         self.rwid = rwid
@@ -60,8 +57,8 @@ class ComputationUnitNoDelay(Elaboratable):
         self.go_die_i = Signal() # go die (reset)
 
         # operation / data input
-        self.oper_i = e.insn_type    # operand
-        self.imm_i =  e.imm_data      # immediate in
+        self.oper_i = CompALUOpSubset() # operand
+        self.imm_i =  self.oper_i.imm_data      # immediate in
         self.src1_i = Signal(rwid, reset_less=True) # oper1 in
         self.src2_i = Signal(rwid, reset_less=True) # oper2 in
 
-- 
2.30.2