From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Mon, 6 Jul 2020 16:02:09 +0000 (+0100)
Subject: first cut at mul test pipeline
X-Git-Tag: div_pipeline~162^2~31
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2acd91f1ee5412a0b47609cdad2356211987516e;p=soc.git

first cut at mul test pipeline
---

diff --git a/src/soc/fu/div/setup_stage.py b/src/soc/fu/div/setup_stage.py
index a0ea42ed..9b0455be 100644
--- a/src/soc/fu/div/setup_stage.py
+++ b/src/soc/fu/div/setup_stage.py
@@ -12,10 +12,7 @@ from soc.decoder.power_fields import DecodeFields
 from soc.decoder.power_fieldsn import SignalBitRange
 from soc.fu.div.pipe_data import CoreInputData
 from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation
-
-def eq32(is_32bit, dest, src):
-    return [dest[0:32].eq(src[0:32]),
-            dest[32:64].eq(Mux(is_32bit, 0, src[32:64]))]
+from nmutil.util import eq32
 
 
 class DivSetupStage(PipeModBase):
diff --git a/src/soc/fu/mul/main_stage.py b/src/soc/fu/mul/main_stage.py
index 97ba81d7..ccdd0d35 100644
--- a/src/soc/fu/mul/main_stage.py
+++ b/src/soc/fu/mul/main_stage.py
@@ -28,9 +28,10 @@ class MulMainStage2(PipeModBase):
 
         ###### xer and context, all pass-through #####
 
-        comb += self.o.xer_ca.data.eq(self.i.xer_ca)
-        comb += self.o.neg_res.data.eq(self.i.neg_res)
-        comb += self.o.xer_so.data.eq(self.i.xer_so)
+        comb += self.o.xer_ca.eq(self.i.xer_ca)
+        comb += self.o.neg_res.eq(self.i.neg_res)
+        comb += self.o.neg_res32.eq(self.i.neg_res32)
+        comb += self.o.xer_so.eq(self.i.xer_so)
         comb += self.o.ctx.eq(self.i.ctx)
 
         return m
diff --git a/src/soc/fu/mul/pipe_data.py b/src/soc/fu/mul/pipe_data.py
index 429be008..1d047bb8 100644
--- a/src/soc/fu/mul/pipe_data.py
+++ b/src/soc/fu/mul/pipe_data.py
@@ -1,14 +1,17 @@
 from soc.fu.alu.alu_input_record import CompALUOpSubset
 from soc.fu.pipe_data import IntegerData, CommonPipeSpec
 from soc.fu.alu.pipe_data import ALUOutputData, ALUInputData
+from nmigen import Signal
 
 
 class MulIntermediateData(ALUInputData):
     def __init__(self, pspec):
         super().__init__(pspec)
 
-        neg_result = Signal(reset_less=True)
-        self.data.append(neg_result)
+        self.neg_res = Signal(reset_less=True)
+        self.neg_res32 = Signal(reset_less=True)
+        self.data.append(self.neg_res)
+        self.data.append(self.neg_res32)
 
 
 class MulOutputData(IntegerData):
@@ -18,8 +21,10 @@ class MulOutputData(IntegerData):
     def __init__(self, pspec):
         super().__init__(pspec, False)
 
-        neg_result = Signal(reset_less=True)
-        self.data.append(neg_result)
+        self.neg_res = Signal(reset_less=True)
+        self.neg_res32 = Signal(reset_less=True)
+        self.data.append(self.neg_res)
+        self.data.append(self.neg_res32)
 
 
 class MulPipeSpec(CommonPipeSpec):
diff --git a/src/soc/fu/mul/pipeline.py b/src/soc/fu/mul/pipeline.py
index d32d7529..a557c90e 100644
--- a/src/soc/fu/mul/pipeline.py
+++ b/src/soc/fu/mul/pipeline.py
@@ -1,9 +1,10 @@
 from nmutil.singlepipe import ControlBase
 from nmutil.pipemodbase import PipeModBaseChain
-from soc.fu.shift_rot.input_stage import ShiftRotInputStage
-from soc.fu.shift_rot.main_stage import ShiftRotMainStage
+from soc.fu.alu.input_stage import ALUInputStage
 from soc.fu.alu.output_stage import ALUOutputStage
-from soc.fu.mul.main_stage import MulMainStage1, MulMainStage2, MulMainStage3
+from soc.fu.mul.pre_stage import MulMainStage1
+from soc.fu.mul.main_stage import MulMainStage2
+from soc.fu.mul.post_stage import MulMainStage3
 
 
 class MulStages1(PipeModBaseChain):
@@ -26,17 +27,19 @@ class MulStages3(PipeModBaseChain):
         return [main3, out]
 
 
-class ShiftRotBasePipe(ControlBase):
+class MulBasePipe(ControlBase):
     def __init__(self, pspec):
         ControlBase.__init__(self)
         self.pspec = pspec
         self.pipe1 = MulStages1(pspec)
         self.pipe2 = MulStages2(pspec)
-        self.pipe2 = MulStages3(pspec)
-        self._eqs = self.connect([self.pipe1, self.pipe2])
+        self.pipe3 = MulStages3(pspec)
+        self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
 
     def elaborate(self, platform):
         m = ControlBase.elaborate(self, platform)
-        m.submodules.pipe = self.pipe1
+        m.submodules.mul_pipe1 = self.pipe1
+        m.submodules.mul_pipe2 = self.pipe2
+        m.submodules.mul_pipe3 = self.pipe3
         m.d.comb += self._eqs
         return m
diff --git a/src/soc/fu/mul/post_stage.py b/src/soc/fu/mul/post_stage.py
index 501b4ed5..f2464085 100644
--- a/src/soc/fu/mul/post_stage.py
+++ b/src/soc/fu/mul/post_stage.py
@@ -32,8 +32,8 @@ class MulMainStage3(PipeModBase):
         comb += is_32bit.eq(op.is_32bit)
 
         # check negate: select signed/unsigned
-        o_s = Signal(signed(o.width * 2), reset_less=True)
-        mul_o = Signal(o.width * 2, reset_less=True)
+        o_s = Signal(signed(o.data.width * 2), reset_less=True)
+        mul_o = Signal(o.data.width * 2, reset_less=True)
         comb += o_s.eq(-o_i)
         comb += mul_o.eq(Mux(self.i.neg_res, o_s, o_i))
         comb += o.ok.eq(1)
@@ -67,8 +67,8 @@ class MulMainStage3(PipeModBase):
 
         # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
         ca = Signal(2, reset_less=True)
-        comb += ca[0].eq(add_o[-1])                   # XER.CA
-        comb += ca[1].eq(add_o[33] ^ (a[32] ^ b[32])) # XER.CA32
+        comb += ca[0].eq(mul_o[-1])                      # XER.CA
+        comb += ca[1].eq(mul_o[33] ^ (self.i.neg_res32)) # XER.CA32
         comb += cry_o.data.eq(ca)
         comb += cry_o.ok.eq(1)
 
diff --git a/src/soc/fu/mul/pre_stage.py b/src/soc/fu/mul/pre_stage.py
index ff1e3220..3ce2f933 100644
--- a/src/soc/fu/mul/pre_stage.py
+++ b/src/soc/fu/mul/pre_stage.py
@@ -4,7 +4,7 @@ from nmutil.pipemodbase import PipeModBase
 from soc.fu.alu.pipe_data import ALUInputData
 from soc.fu.mul.pipe_data import MulIntermediateData
 from ieee754.part.partsig import PartitionedSignal
-
+from nmutil.util import eq32
 
 class MulMainStage1(PipeModBase):
     def __init__(self, pspec):
@@ -21,32 +21,44 @@ class MulMainStage1(PipeModBase):
         comb = m.d.comb
 
         # convenience variables
-        a, b = self.i.a, self.i.b
+        a, b, op = self.i.a, self.i.b, self.i.ctx.op
         a_o, b_o, neg_res_o = self.o.a, self.o.b, self.o.neg_res
+        neg_res_o, neg_res32_o = self.o.neg_res, self.o.neg_res32
 
         # check if op is 32-bit, and get sign bit from operand a
         is_32bit = Signal(reset_less=True)
         sign_a = Signal(reset_less=True)
         sign_b = Signal(reset_less=True)
+        sign32_a = Signal(reset_less=True)
+        sign32_b = Signal(reset_less=True)
         comb += is_32bit.eq(op.is_32bit)
 
         # work out if a/b are negative (check 32-bit / signed)
         comb += sign_a.eq(Mux(op.is_32bit, a[31], a[63]) & op.is_signed)
         comb += sign_b.eq(Mux(op.is_32bit, b[31], b[63]) & op.is_signed)
+        comb += sign32_a.eq(a[31] & op.is_signed)
+        comb += sign32_b.eq(b[31] & op.is_signed)
 
         # work out if result is negative sign
         comb += neg_res_o.eq(sign_a ^ sign_b)
+        comb += neg_res32_o.eq(sign32_a ^ sign32_b) # pass through for OV32
 
         # negation of a 64-bit value produces the same lower 32-bit
         # result as negation of just the lower 32-bits, so we don't
         # need to do anything special before negating
-        comb += a_o.eq(Mux(sign_a, -a, a))
-        comb += b_o.eq(Mux(sign_b, -b, b))
+        abs_a = Signal(64, reset_less=True)
+        abs_b = Signal(64, reset_less=True)
+        comb += abs_a.eq(Mux(sign_a, -a, a))
+        comb += abs_b.eq(Mux(sign_b, -b, b))
+
+        # set up 32/64 bit inputs
+        comb += eq32(is_32bit, a_o, abs_a)
+        comb += eq32(is_32bit, b_o, abs_b)
 
         ###### XER and context, both pass-through #####
 
-        comb += self.o.xer_ca.data.eq(self.i.xer_ca)
-        comb += self.o.xer_so.data.eq(self.i.xer_so)
+        comb += self.o.xer_ca.eq(self.i.xer_ca)
+        comb += self.o.xer_so.eq(self.i.xer_so)
         comb += self.o.ctx.eq(self.i.ctx)
 
         return m
diff --git a/src/soc/fu/mul/test/test_pipe_caller.py b/src/soc/fu/mul/test/test_pipe_caller.py
index 88ac5499..5fa0779d 100644
--- a/src/soc/fu/mul/test/test_pipe_caller.py
+++ b/src/soc/fu/mul/test/test_pipe_caller.py
@@ -6,66 +6,46 @@ import unittest
 from soc.decoder.isa.caller import ISACaller, special_sprs
 from soc.decoder.power_decoder import (create_pdecode)
 from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.power_enums import (XER_bits, Function, InternalOp, CryIn)
 from soc.decoder.selectable_int import SelectableInt
 from soc.simulator.program import Program
 from soc.decoder.isa.all import ISA
 
+
+from soc.fu.test.common import (TestCase, ALUHelpers)
 from soc.fu.mul.pipeline import MulBasePipe
-from soc.fu.alu.alu_input_record import CompALUOpSubset
 from soc.fu.mul.pipe_data import MulPipeSpec
 import random
 
 
-class TestCase:
-    def __init__(self, program, regs, sprs, name):
-        self.program = program
-        self.regs = regs
-        self.sprs = sprs
-        self.name = name
+def get_cu_inputs(dec2, sim):
+    """naming (res) must conform to MulFunctionUnit input regspec
+    """
+    res = {}
+
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
+    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca
+    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so
+
+    print ("alu get_cu_inputs", res)
+
+    return res
+
 
 
 def set_alu_inputs(alu, dec2, sim):
-    inputs = []
     # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
     # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
     # and place it into data_i.b
 
-    reg3_ok = yield dec2.e.read_reg3.ok
-    if reg3_ok:
-        reg3_sel = yield dec2.e.read_reg3.data
-        data3 = sim.gpr(reg3_sel).value
-    else:
-        data3 = 0
-    reg1_ok = yield dec2.e.read_reg1.ok
-    if reg1_ok:
-        reg1_sel = yield dec2.e.read_reg1.data
-        data1 = sim.gpr(reg1_sel).value
-    else:
-        data1 = 0
-    reg2_ok = yield dec2.e.read_reg2.ok
-    imm_ok = yield dec2.e.imm_data.ok
-    if reg2_ok:
-        reg2_sel = yield dec2.e.read_reg2.data
-        data2 = sim.gpr(reg2_sel).value
-    elif imm_ok:
-        data2 = yield dec2.e.imm_data.imm
-    else:
-        data2 = 0
-
-    yield alu.p.data_i.ra.eq(data1)
-    yield alu.p.data_i.rb.eq(data2)
-    yield alu.p.data_i.rs.eq(data3)
-
-
-def set_extra_alu_inputs(alu, dec2, sim):
-    carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
-    carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0
-    yield alu.p.data_i.xer_ca[0].eq(carry)
-    yield alu.p.data_i.xer_ca[1].eq(carry32)
-    so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
-    yield alu.p.data_i.xer_so.eq(so)
-    
+    inp = yield from get_cu_inputs(dec2, sim)
+    yield from ALUHelpers.set_int_ra(alu, dec2, inp)
+    yield from ALUHelpers.set_int_rb(alu, dec2, inp)
+
+    yield from ALUHelpers.set_xer_ca(alu, dec2, inp)
+    yield from ALUHelpers.set_xer_so(alu, dec2, inp)
+
 
 # This test bench is a bit different than is usual. Initially when I
 # was writing it, I had all of the tests call a function to create a
@@ -85,92 +65,26 @@ def set_extra_alu_inputs(alu, dec2, sim):
 # massively. Before, it took around 1 minute on my computer, now it
 # takes around 3 seconds
 
-test_data = []
-
 
 class MulTestCase(FHDLTestCase):
+    test_data = []
+
     def __init__(self, name):
         super().__init__(name)
         self.test_name = name
-    def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
-        tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
-        test_data.append(tc)
 
+    def run_tst_program(self, prog, initial_regs=None, initial_sprs=None):
+        tc = TestCase(prog, self.test_name, initial_regs, initial_sprs)
+        self.test_data.append(tc)
 
-    def test_shift(self):
-        insns = ["slw", "sld", "srw", "srd", "sraw", "srad"]
-        for i in range(20):
+    def test_rand_mullw(self):
+        insns = ["mullw", "mullw.", "mullwo", "mullwo."]
+        for i in range(40):
             choice = random.choice(insns)
             lst = [f"{choice} 3, 1, 2"]
             initial_regs = [0] * 32
             initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, 63)
-            print(initial_regs[1], initial_regs[2])
-            self.run_tst_program(Program(lst), initial_regs)
-
-
-    def test_shift_arith(self):
-        lst = ["sraw 3, 1, 2"]
-        initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
-        initial_regs[2] = random.randint(0, 63)
-        print(initial_regs[1], initial_regs[2])
-        self.run_tst_program(Program(lst), initial_regs)
-
-    def test_shift_once(self):
-        lst = ["slw 3, 1, 4",
-               "slw 3, 1, 2"]
-        initial_regs = [0] * 32
-        initial_regs[1] = 0x80000000
-        initial_regs[2] = 0x40
-        initial_regs[4] = 0x00
-        self.run_tst_program(Program(lst), initial_regs)
-
-    def test_rlwinm(self):
-        for i in range(10):
-            mb = random.randint(0,31)
-            me = random.randint(0,31)
-            sh = random.randint(0,31)
-            lst = [f"rlwinm 3, 1, {mb}, {me}, {sh}"]
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_rlwimi(self):
-        lst = ["rlwimi 3, 1, 5, 20, 6"]
-        initial_regs = [0] * 32
-        initial_regs[1] = 0xdeadbeef
-        initial_regs[3] = 0x12345678
-        self.run_tst_program(Program(lst), initial_regs)
-
-    def test_rlwnm(self):
-        lst = ["rlwnm 3, 1, 2, 20, 6"]
-        initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
-        initial_regs[2] = random.randint(0, 63)
-        self.run_tst_program(Program(lst), initial_regs)
-
-    def test_rldicl(self):
-        lst = ["rldicl 3, 1, 5, 20"]
-        initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
-        self.run_tst_program(Program(lst), initial_regs)
-
-    def test_rldicr(self):
-        lst = ["rldicr 3, 1, 5, 20"]
-        initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
-        self.run_tst_program(Program(lst), initial_regs)
-
-    def test_rlc(self):
-        insns = ["rldic", "rldicl", "rldicr"]
-        for i in range(20):
-            choice = random.choice(insns)
-            sh = random.randint(0, 63)
-            m = random.randint(0, 63)
-            lst = [f"{choice} 3, 1, {sh}, {m}"]
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[2] = random.randint(0, (1<<64)-1)
             self.run_tst_program(Program(lst), initial_regs)
 
     def test_ilang(self):
@@ -210,61 +124,93 @@ class TestRunner(FHDLTestCase):
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
-                simulator = ISA(pdecode2, test.regs, test.sprs, 0)
+                sim = ISA(pdecode2, test.regs, test.sprs, test.cr,
+                                test.mem, test.msr)
                 gen = program.generate_instructions()
                 instructions = list(zip(gen, program.assembly.splitlines()))
 
-                index = simulator.pc.CIA.value//4
+                index = sim.pc.CIA.value//4
                 while index < len(instructions):
                     ins, code = instructions[index]
 
-                    print("0x{:X}".format(ins & 0xffffffff))
+                    print("instruction: 0x{:X}".format(ins & 0xffffffff))
                     print(code)
+                    if 'XER' in sim.spr:
+                        so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+                        ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
+                        ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
+                        print ("before: so/ov/32", so, ov, ov32)
 
                     # ask the decoder to decode this binary data (endian'd)
                     yield pdecode2.dec.bigendian.eq(0)  # little / big?
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
-                    self.assertEqual(fn_unit, Function.SHIFT_ROT.value)
-                    yield from set_alu_inputs(alu, pdecode2, simulator)
-                    yield from set_extra_alu_inputs(alu, pdecode2, simulator)
-                    yield 
+                    fn_unit = yield pdecode2.e.do.fn_unit
+                    self.assertEqual(fn_unit, Function.MUL.value)
+                    yield from set_alu_inputs(alu, pdecode2, sim)
+                    yield
                     opname = code.split(' ')[0]
-                    yield from simulator.call(opname)
-                    index = simulator.pc.CIA.value//4
+                    yield from sim.call(opname)
+                    index = sim.pc.CIA.value//4
 
                     vld = yield alu.n.valid_o
                     while not vld:
                         yield
                         vld = yield alu.n.valid_o
                     yield
-                    alu_out = yield alu.n.data_o.o
-                    out_reg_valid = yield pdecode2.e.write_reg.ok
-                    if out_reg_valid:
-                        write_reg_idx = yield pdecode2.e.write_reg.data
-                        expected = simulator.gpr(write_reg_idx).value
-                        msg = f"expected {expected:x}, actual: {alu_out:x}"
-                        self.assertEqual(expected, alu_out, msg)
-                    yield from self.check_extra_alu_outputs(alu, pdecode2,
-                                                            simulator)
+
+                    yield from self.check_alu_outputs(alu, pdecode2, sim, code)
 
         sim.add_sync_process(process)
-        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+        with sim.write_vcd("div_simulator.vcd", "div_simulator.gtkw",
                             traces=[]):
             sim.run()
-    def check_extra_alu_outputs(self, alu, dec2, sim):
-        rc = yield dec2.e.rc.data
+
+    def check_alu_outputs(self, alu, dec2, sim, code):
+
+        rc = yield dec2.e.do.rc.data
+        cridx_ok = yield dec2.e.write_cr.ok
+        cridx = yield dec2.e.write_cr.data
+
+        print ("check extra output", repr(code), cridx_ok, cridx)
         if rc:
-            cr_expected = sim.crl[0].get_range().value
-            cr_actual = yield alu.n.data_o.cr0
-            self.assertEqual(cr_expected, cr_actual)
+            self.assertEqual(cridx, 0, code)
+
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
+        if not oe or not oe_ok:
+            # if OE not enabled, XER SO and OV must correspondingly be false
+            so_ok = yield alu.n.data_o.xer_so.ok
+            ov_ok = yield alu.n.data_o.xer_ov.ok
+            self.assertEqual(so_ok, False, code)
+            self.assertEqual(ov_ok, False, code)
+
+        sim_o = {}
+        res = {}
+
+        yield from ALUHelpers.get_cr_a(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ov(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ca(res, alu, dec2)
+        yield from ALUHelpers.get_int_o(res, alu, dec2)
+        yield from ALUHelpers.get_xer_so(res, alu, dec2)
+
+        yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_cr_a(sim_o, sim, dec2)
+        yield from ALUHelpers.get_sim_xer_ov(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2)
+        yield from ALUHelpers.get_sim_xer_so(sim_o, sim, dec2)
+
+        ALUHelpers.check_int_o(self, res, sim_o, code)
+        ALUHelpers.check_xer_ov(self, res, sim_o, code)
+        ALUHelpers.check_xer_ca(self, res, sim_o, code)
+        ALUHelpers.check_xer_so(self, res, sim_o, code)
+        ALUHelpers.check_cr_a(self, res, sim_o, "CR%d %s" % (cridx, code))
 
 
 if __name__ == "__main__":
     unittest.main(exit=False)
     suite = unittest.TestSuite()
-    suite.addTest(TestRunner(test_data))
+    suite.addTest(TestRunner(MulTestCase.test_data))
 
     runner = unittest.TextTestRunner()
     runner.run(suite)