rename pipe to fu
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 18 May 2020 03:52:43 +0000 (04:52 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 18 May 2020 03:52:43 +0000 (04:52 +0100)
100 files changed:
src/soc/fu/__init__.py [new file with mode: 0644]
src/soc/fu/alu/alu_input_record.py [new file with mode: 0644]
src/soc/fu/alu/formal/.gitignore [new file with mode: 0644]
src/soc/fu/alu/formal/proof_input_stage.py [new file with mode: 0644]
src/soc/fu/alu/formal/proof_main_stage.py [new file with mode: 0644]
src/soc/fu/alu/formal/proof_output_stage.py [new file with mode: 0644]
src/soc/fu/alu/input_stage.py [new file with mode: 0644]
src/soc/fu/alu/main_stage.py [new file with mode: 0644]
src/soc/fu/alu/output_stage.py [new file with mode: 0644]
src/soc/fu/alu/pipe_data.py [new file with mode: 0644]
src/soc/fu/alu/pipeline.py [new file with mode: 0644]
src/soc/fu/alu/test/test_pipe_caller.py [new file with mode: 0644]
src/soc/fu/branch/__init__.py [new file with mode: 0644]
src/soc/fu/branch/br_input_record.py [new file with mode: 0644]
src/soc/fu/branch/formal/proof_input_stage.py [new file with mode: 0644]
src/soc/fu/branch/formal/proof_main_stage.py [new file with mode: 0644]
src/soc/fu/branch/input_stage.py [new file with mode: 0644]
src/soc/fu/branch/main_stage.py [new file with mode: 0644]
src/soc/fu/branch/pipe_data.py [new file with mode: 0644]
src/soc/fu/branch/pipeline.py [new file with mode: 0644]
src/soc/fu/branch/test/test_pipe_caller.py [new file with mode: 0644]
src/soc/fu/countzero/countzero.py [new file with mode: 0644]
src/soc/fu/countzero/test/test_countzero.py [new file with mode: 0644]
src/soc/fu/cr/main_stage.py [new file with mode: 0644]
src/soc/fu/cr/pipe_data.py [new file with mode: 0644]
src/soc/fu/cr/pipeline.py [new file with mode: 0644]
src/soc/fu/cr/test/test_pipe_caller.py [new file with mode: 0644]
src/soc/fu/logical/__init__.py [new file with mode: 0644]
src/soc/fu/logical/bperm.py [new file with mode: 0644]
src/soc/fu/logical/formal/.gitignore [new file with mode: 0644]
src/soc/fu/logical/formal/proof_bperm.py [new file with mode: 0644]
src/soc/fu/logical/formal/proof_input_stage.py [new file with mode: 0644]
src/soc/fu/logical/formal/proof_main_stage.py [new file with mode: 0644]
src/soc/fu/logical/input_stage.py [new file with mode: 0644]
src/soc/fu/logical/main_stage.py [new file with mode: 0644]
src/soc/fu/logical/pipe_data.py [new file with mode: 0644]
src/soc/fu/logical/pipeline.py [new file with mode: 0644]
src/soc/fu/logical/test/test_bperm.py [new file with mode: 0644]
src/soc/fu/logical/test/test_pipe_caller.py [new file with mode: 0644]
src/soc/fu/shift_rot/formal/.gitignore [new file with mode: 0644]
src/soc/fu/shift_rot/formal/proof_main_stage.py [new file with mode: 0644]
src/soc/fu/shift_rot/input_stage.py [new file with mode: 0644]
src/soc/fu/shift_rot/main_stage.py [new file with mode: 0644]
src/soc/fu/shift_rot/maskgen.py [new file with mode: 0644]
src/soc/fu/shift_rot/pipe_data.py [new file with mode: 0644]
src/soc/fu/shift_rot/pipeline.py [new file with mode: 0644]
src/soc/fu/shift_rot/rotator.py [new file with mode: 0644]
src/soc/fu/shift_rot/rotl.py [new file with mode: 0644]
src/soc/fu/shift_rot/test/test_maskgen.py [new file with mode: 0644]
src/soc/fu/shift_rot/test/test_pipe_caller.py [new file with mode: 0644]
src/soc/pipe/__init__.py [deleted file]
src/soc/pipe/alu/alu_input_record.py [deleted file]
src/soc/pipe/alu/formal/.gitignore [deleted file]
src/soc/pipe/alu/formal/proof_input_stage.py [deleted file]
src/soc/pipe/alu/formal/proof_main_stage.py [deleted file]
src/soc/pipe/alu/formal/proof_output_stage.py [deleted file]
src/soc/pipe/alu/input_stage.py [deleted file]
src/soc/pipe/alu/main_stage.py [deleted file]
src/soc/pipe/alu/output_stage.py [deleted file]
src/soc/pipe/alu/pipe_data.py [deleted file]
src/soc/pipe/alu/pipeline.py [deleted file]
src/soc/pipe/alu/test/test_pipe_caller.py [deleted file]
src/soc/pipe/branch/__init__.py [deleted file]
src/soc/pipe/branch/br_input_record.py [deleted file]
src/soc/pipe/branch/formal/proof_input_stage.py [deleted file]
src/soc/pipe/branch/formal/proof_main_stage.py [deleted file]
src/soc/pipe/branch/input_stage.py [deleted file]
src/soc/pipe/branch/main_stage.py [deleted file]
src/soc/pipe/branch/pipe_data.py [deleted file]
src/soc/pipe/branch/pipeline.py [deleted file]
src/soc/pipe/branch/test/test_pipe_caller.py [deleted file]
src/soc/pipe/countzero/countzero.py [deleted file]
src/soc/pipe/countzero/test/test_countzero.py [deleted file]
src/soc/pipe/cr/main_stage.py [deleted file]
src/soc/pipe/cr/pipe_data.py [deleted file]
src/soc/pipe/cr/pipeline.py [deleted file]
src/soc/pipe/cr/test/test_pipe_caller.py [deleted file]
src/soc/pipe/logical/__init__.py [deleted file]
src/soc/pipe/logical/bperm.py [deleted file]
src/soc/pipe/logical/formal/.gitignore [deleted file]
src/soc/pipe/logical/formal/proof_bperm.py [deleted file]
src/soc/pipe/logical/formal/proof_input_stage.py [deleted file]
src/soc/pipe/logical/formal/proof_main_stage.py [deleted file]
src/soc/pipe/logical/input_stage.py [deleted file]
src/soc/pipe/logical/main_stage.py [deleted file]
src/soc/pipe/logical/pipe_data.py [deleted file]
src/soc/pipe/logical/pipeline.py [deleted file]
src/soc/pipe/logical/test/test_bperm.py [deleted file]
src/soc/pipe/logical/test/test_pipe_caller.py [deleted file]
src/soc/pipe/shift_rot/formal/.gitignore [deleted file]
src/soc/pipe/shift_rot/formal/proof_main_stage.py [deleted file]
src/soc/pipe/shift_rot/input_stage.py [deleted file]
src/soc/pipe/shift_rot/main_stage.py [deleted file]
src/soc/pipe/shift_rot/maskgen.py [deleted file]
src/soc/pipe/shift_rot/pipe_data.py [deleted file]
src/soc/pipe/shift_rot/pipeline.py [deleted file]
src/soc/pipe/shift_rot/rotator.py [deleted file]
src/soc/pipe/shift_rot/rotl.py [deleted file]
src/soc/pipe/shift_rot/test/test_maskgen.py [deleted file]
src/soc/pipe/shift_rot/test/test_pipe_caller.py [deleted file]

diff --git a/src/soc/fu/__init__.py b/src/soc/fu/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/soc/fu/alu/alu_input_record.py b/src/soc/fu/alu/alu_input_record.py
new file mode 100644 (file)
index 0000000..41a40eb
--- /dev/null
@@ -0,0 +1,80 @@
+from nmigen.hdl.rec import Record, Layout
+
+from soc.decoder.power_enums import InternalOp, Function, CryIn
+
+
+class CompALUOpSubset(Record):
+    """CompALUOpSubset
+
+    a copy of the relevant subset information from Decode2Execute1Type
+    needed for ALU operations.  use with eq_from_execute1 (below) to
+    grab subsets.
+    """
+    def __init__(self, name=None):
+        layout = (('insn_type', InternalOp),
+                  ('fn_unit', Function),
+                  ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
+                    #'cr = Signal(32, reset_less=True) # NO: this is from the CR SPR
+                    #'xerc = XerBits() # NO: this is from the XER SPR
+                  ('lk', 1),
+                  ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
+                  ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
+                  ('invert_a', 1),
+                  ('invert_out', 1),
+                  ('input_carry', CryIn),
+                  ('output_carry', 1),
+                  ('input_cr', 1),
+                  ('output_cr', 1),
+                  ('is_32bit', 1),
+                  ('is_signed', 1),
+                  ('data_len', 4), # TODO: should be in separate CompLDSTSubset
+                  ('insn', 32),
+                  ('byte_reverse', 1),
+                  ('sign_extend', 1))
+
+        Record.__init__(self, Layout(layout), name=name)
+
+        # grrr.  Record does not have kwargs
+        self.insn_type.reset_less = True
+        self.fn_unit.reset_less = True
+        #self.cr = Signal(32, reset_less = True
+        #self.xerc = XerBits(
+        self.lk.reset_less = True
+        self.invert_a.reset_less = True
+        self.invert_out.reset_less = True
+        self.input_carry.reset_less = True
+        self.output_carry.reset_less = True
+        self.input_cr.reset_less = True
+        self.output_cr.reset_less = True
+        self.is_32bit.reset_less = True
+        self.is_signed.reset_less = True
+        self.data_len.reset_less = True
+        self.byte_reverse.reset_less = True
+        self.sign_extend.reset_less = True
+
+    def eq_from_execute1(self, other):
+        """ use this to copy in from Decode2Execute1Type
+        """
+        res = []
+        for fname, sig in self.fields.items():
+            eqfrom = other.fields[fname]
+            res.append(sig.eq(eqfrom))
+        return res
+
+    def ports(self):
+        return [self.insn_type,
+                #self.cr,
+                #self.xerc,
+                self.lk,
+                self.invert_a,
+                self.invert_out,
+                self.input_carry,
+                self.output_carry,
+                self.input_cr,
+                self.output_cr,
+                self.is_32bit,
+                self.is_signed,
+                self.data_len,
+                self.byte_reverse,
+                self.sign_extend,
+        ]
diff --git a/src/soc/fu/alu/formal/.gitignore b/src/soc/fu/alu/formal/.gitignore
new file mode 100644 (file)
index 0000000..150f68c
--- /dev/null
@@ -0,0 +1 @@
+*/*
diff --git a/src/soc/fu/alu/formal/proof_input_stage.py b/src/soc/fu/alu/formal/proof_input_stage.py
new file mode 100644 (file)
index 0000000..347ab7d
--- /dev/null
@@ -0,0 +1,77 @@
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import Module, Signal, Elaboratable, Mux
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.input_stage import ALUInputStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+    def __init__(self):
+        # inputs and outputs
+        pass
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        rec = CompALUOpSubset()
+        recwidth = 0
+        # Setup random inputs for dut.op
+        for p in rec.ports():
+            width = p.width
+            recwidth += width
+            comb += p.eq(AnyConst(width))
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+        m.submodules.dut = dut = ALUInputStage(pspec)
+
+        a = Signal(64)
+        b = Signal(64)
+        comb += [dut.i.a.eq(a),
+                 dut.i.b.eq(b),
+                 a.eq(AnyConst(64)),
+                 b.eq(AnyConst(64))]
+
+        comb += dut.i.ctx.op.eq(rec)
+
+        # Assert that op gets copied from the input to output
+        for p in rec.ports():
+            name = p.name
+            rec_sig = p
+            dut_sig = getattr(dut.o.ctx.op, name)
+            comb += Assert(dut_sig == rec_sig)
+
+        with m.If(rec.invert_a):
+            comb += Assert(dut.o.a == ~a)
+        with m.Else():
+            comb += Assert(dut.o.a == a)
+
+        comb += Assert(dut.o.b == b)
+
+        return m
+
+
+class GTCombinerTestCase(FHDLTestCase):
+    def test_formal(self):
+        module = Driver()
+        self.assertFormal(module, mode="bmc", depth=4)
+        self.assertFormal(module, mode="cover", depth=4)
+    def test_ilang(self):
+        dut = Driver()
+        vl = rtlil.convert(dut, ports=[])
+        with open("input_stage.il", "w") as f:
+            f.write(vl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/alu/formal/proof_main_stage.py b/src/soc/fu/alu/formal/proof_main_stage.py
new file mode 100644 (file)
index 0000000..f102fc2
--- /dev/null
@@ -0,0 +1,88 @@
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+                    signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.main_stage import ALUMainStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+    def __init__(self):
+        # inputs and outputs
+        pass
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        rec = CompALUOpSubset()
+        recwidth = 0
+        # Setup random inputs for dut.op
+        for p in rec.ports():
+            width = p.width
+            recwidth += width
+            comb += p.eq(AnyConst(width))
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+        m.submodules.dut = dut = ALUMainStage(pspec)
+
+        # convenience variables
+        a = dut.i.a
+        b = dut.i.b
+        carry_in = dut.i.carry_in
+        so_in = dut.i.so
+        carry_out = dut.o.carry_out
+        o = dut.o.o
+
+        # setup random inputs
+        comb += [a.eq(AnyConst(64)),
+                 b.eq(AnyConst(64)),
+                 carry_in.eq(AnyConst(1)),
+                 so_in.eq(AnyConst(1))]
+
+        comb += dut.i.ctx.op.eq(rec)
+
+        # Assert that op gets copied from the input to output
+        for rec_sig in rec.ports():
+            name = rec_sig.name
+            dut_sig = getattr(dut.o.ctx.op, name)
+            comb += Assert(dut_sig == rec_sig)
+
+        # signed and signed/32 versions of input a
+        a_signed = Signal(signed(64))
+        a_signed_32 = Signal(signed(32))
+        comb += a_signed.eq(a)
+        comb += a_signed_32.eq(a[0:32])
+
+        # main assertion of arithmetic operations
+        with m.Switch(rec.insn_type):
+            with m.Case(InternalOp.OP_ADD):
+                comb += Assert(Cat(o, carry_out) == (a + b + carry_in))
+
+        return m
+
+
+class ALUTestCase(FHDLTestCase):
+    def test_formal(self):
+        module = Driver()
+        self.assertFormal(module, mode="bmc", depth=2)
+        self.assertFormal(module, mode="cover", depth=2)
+    def test_ilang(self):
+        dut = Driver()
+        vl = rtlil.convert(dut, ports=[])
+        with open("main_stage.il", "w") as f:
+            f.write(vl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/alu/formal/proof_output_stage.py b/src/soc/fu/alu/formal/proof_output_stage.py
new file mode 100644 (file)
index 0000000..288da07
--- /dev/null
@@ -0,0 +1,115 @@
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import Module, Signal, Elaboratable, Mux, Cat, signed
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.output_stage import ALUOutputStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+    def __init__(self):
+        # inputs and outputs
+        pass
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        rec = CompALUOpSubset()
+        recwidth = 0
+        # Setup random inputs for dut.op
+        for p in rec.ports():
+            width = p.width
+            recwidth += width
+            comb += p.eq(AnyConst(width))
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+        m.submodules.dut = dut = ALUOutputStage(pspec)
+
+        o = Signal(64)
+        carry_out = Signal()
+        carry_out32 = Signal()
+        ov = Signal()
+        ov32 = Signal()
+        cr0 = Signal(4)
+        so = Signal()
+        comb += [dut.i.o.eq(o),
+                 dut.i.carry_out.eq(carry_out),
+                 dut.i.so.eq(so),
+                 dut.i.carry_out32.eq(carry_out32),
+                 dut.i.cr0.eq(cr0),
+                 dut.i.ov.eq(ov),
+                 dut.i.ov32.eq(ov32),
+                 o.eq(AnyConst(64)),
+                 carry_out.eq(AnyConst(1)),
+                 carry_out32.eq(AnyConst(1)),
+                 ov.eq(AnyConst(1)),
+                 ov32.eq(AnyConst(1)),
+                 cr0.eq(AnyConst(4)),
+                 so.eq(AnyConst(1))]
+
+        comb += dut.i.ctx.op.eq(rec)
+
+        with m.If(dut.i.ctx.op.invert_out):
+            comb += Assert(dut.o.o == ~o)
+        with m.Else():
+            comb += Assert(dut.o.o == o)
+
+        cr_out = Signal.like(cr0)
+        comb += cr_out.eq(dut.o.cr0)
+
+        o_signed = Signal(signed(64))
+        comb += o_signed.eq(dut.o.o)
+        # Assert only one of the comparison bits is set
+        comb += Assert(cr_out[3] + cr_out[2] + cr_out[1] == 1)
+        with m.If(o_signed == 0):
+            comb += Assert(cr_out[1] == 1)
+        with m.Elif(o_signed > 0):
+            # sigh.  see https://bugs.libre-soc.org/show_bug.cgi?id=305#c61
+            # for OP_CMP we do b-a rather than a-b (just like ADD) and
+            # then invert the *test condition*.
+            with m.If(rec.insn_type == InternalOp.OP_CMP):
+                comb += Assert(cr_out[3] == 1)
+            with m.Else():
+                comb += Assert(cr_out[2] == 1)
+        with m.Elif(o_signed < 0):
+            # ditto as above
+            with m.If(rec.insn_type == InternalOp.OP_CMP):
+                comb += Assert(cr_out[2] == 1)
+            with m.Else():
+                comb += Assert(cr_out[3] == 1)
+
+
+        # Assert that op gets copied from the input to output
+        for p in rec.ports():
+            name = p.name
+            rec_sig = p
+            dut_sig = getattr(dut.o.ctx.op, name)
+            comb += Assert(dut_sig == rec_sig)
+
+
+        return m
+
+class GTCombinerTestCase(FHDLTestCase):
+    def test_formal(self):
+        module = Driver()
+        self.assertFormal(module, mode="bmc", depth=4)
+        self.assertFormal(module, mode="cover", depth=4)
+    def test_ilang(self):
+        dut = Driver()
+        vl = rtlil.convert(dut, ports=[])
+        with open("output_stage.il", "w") as f:
+            f.write(vl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/alu/input_stage.py b/src/soc/fu/alu/input_stage.py
new file mode 100644 (file)
index 0000000..7520732
--- /dev/null
@@ -0,0 +1,57 @@
+# This stage is intended to adjust the input data before sending it to
+# the acutal ALU. Things like handling inverting the input, carry_in
+# generation for subtraction, and handling of immediates should happen
+# here
+from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
+                    unsigned)
+from nmutil.pipemodbase import PipeModBase
+from soc.decoder.power_enums import InternalOp
+from soc.alu.pipe_data import ALUInputData
+from soc.decoder.power_enums import CryIn
+
+
+class ALUInputStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "input")
+
+    def ispec(self):
+        return ALUInputData(self.pspec)
+
+    def ospec(self):
+        return ALUInputData(self.pspec)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+        ctx = self.i.ctx
+
+        ##### operand A #####
+
+        # operand a to be as-is or inverted
+        a = Signal.like(self.i.a)
+
+        with m.If(ctx.op.invert_a):
+            comb += a.eq(~self.i.a)
+        with m.Else():
+            comb += a.eq(self.i.a)
+
+        comb += self.o.a.eq(a)
+        comb += self.o.b.eq(self.i.b)
+
+        ##### carry-in #####
+
+        # either copy incoming carry or set to 1/0 as defined by op
+        with m.Switch(ctx.op.input_carry):
+            with m.Case(CryIn.ZERO):
+                comb += self.o.carry_in.eq(0)
+            with m.Case(CryIn.ONE):
+                comb += self.o.carry_in.eq(1)
+            with m.Case(CryIn.CA):
+                comb += self.o.carry_in.eq(self.i.carry_in)
+
+        ##### sticky overflow and context (both pass-through) #####
+
+        comb += self.o.so.eq(self.i.so)
+        comb += self.o.ctx.eq(ctx)
+
+        return m
diff --git a/src/soc/fu/alu/main_stage.py b/src/soc/fu/alu/main_stage.py
new file mode 100644 (file)
index 0000000..5100166
--- /dev/null
@@ -0,0 +1,84 @@
+# This stage is intended to do most of the work of executing the Arithmetic
+# instructions. This would be like the additions, compares, and sign-extension
+# as well as carry and overflow generation. This module
+# however should not gate the carry or overflow, that's up to the
+# output stage
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
+from nmutil.pipemodbase import PipeModBase
+from soc.alu.pipe_data import ALUInputData, ALUOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+
+class ALUMainStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "main")
+
+    def ispec(self):
+        return ALUInputData(self.pspec)
+
+    def ospec(self):
+        return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+        carry_out, o = self.o.carry_out, self.o.o
+
+        # check if op is 32-bit, and get sign bit from operand a
+        is_32bit = Signal(reset_less=True)
+        sign_bit = Signal(reset_less=True)
+        comb += is_32bit.eq(self.i.ctx.op.is_32bit)
+        comb += sign_bit.eq(Mux(is_32bit, self.i.a[31], self.i.a[63]))
+
+        # little trick: do the add using only one add (not 2)
+        add_a = Signal(self.i.a.width + 2, reset_less=True)
+        add_b = Signal(self.i.a.width + 2, reset_less=True)
+        add_output = Signal(self.i.a.width + 2, reset_less=True)
+        with m.If((self.i.ctx.op.insn_type == InternalOp.OP_ADD) |
+                  (self.i.ctx.op.insn_type == InternalOp.OP_CMP)):
+            # in bit 0, 1+carry_in creates carry into bit 1 and above
+            comb += add_a.eq(Cat(self.i.carry_in, self.i.a, Const(0, 1)))
+            comb += add_b.eq(Cat(Const(1, 1), self.i.b, Const(0, 1)))
+            comb += add_output.eq(add_a + add_b)
+
+        ##########################
+        # main switch-statement for handling arithmetic operations
+
+        with m.Switch(self.i.ctx.op.insn_type):
+            #### CMP, CMPL ####
+            with m.Case(InternalOp.OP_CMP):
+                # this is supposed to be inverted (b-a, not a-b)
+                # however we have a trick: instead of adding either 2x 64-bit
+                # MUXes to invert a and b, or messing with a 64-bit output,
+                # swap +ve and -ve test in the *output* stage using an XOR gate
+                comb += o.eq(add_output[1:-1])
+
+            #### add ####
+            with m.Case(InternalOp.OP_ADD):
+                # bit 0 is not part of the result, top bit is the carry-out
+                comb += o.eq(add_output[1:-1])
+                comb += carry_out.eq(add_output[-1])
+
+            #### exts (sign-extend) ####
+            with m.Case(InternalOp.OP_EXTS):
+                with m.If(self.i.ctx.op.data_len == 1):
+                    comb += o.eq(Cat(self.i.a[0:8], Repl(self.i.a[7], 64-8)))
+                with m.If(self.i.ctx.op.data_len == 2):
+                    comb += o.eq(Cat(self.i.a[0:16], Repl(self.i.a[15], 64-16)))
+                with m.If(self.i.ctx.op.data_len == 4):
+                    comb += o.eq(Cat(self.i.a[0:32], Repl(self.i.a[31], 64-32)))
+            with m.Case(InternalOp.OP_CMPEQB):
+                eqs = Signal(8, reset_less=True)
+                src1 = Signal(8, reset_less=True)
+                comb += src1.eq(self.i.a[0:8])
+                for i in range(8):
+                    comb += eqs[i].eq(src1 == self.i.b[8*i:8*(i+1)])
+                comb += self.o.cr0.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
+
+        ###### sticky overflow and context, both pass-through #####
+
+        comb += self.o.so.eq(self.i.so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/alu/output_stage.py b/src/soc/fu/alu/output_stage.py
new file mode 100644 (file)
index 0000000..1253795
--- /dev/null
@@ -0,0 +1,61 @@
+# This stage is intended to handle the gating of carry and overflow
+# out, summary overflow generation, and updating the condition
+# register
+from nmigen import (Module, Signal, Cat, Repl)
+from nmutil.pipemodbase import PipeModBase
+from soc.alu.pipe_data import ALUInputData, ALUOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+
+class ALUOutputStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "output")
+
+    def ispec(self):
+        return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+
+    def ospec(self):
+        return ALUOutputData(self.pspec)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        # op requests inversion of the output
+        o = Signal.like(self.i.o)
+        with m.If(self.i.ctx.op.invert_out):
+            comb += o.eq(~self.i.o)
+        with m.Else():
+            comb += o.eq(self.i.o)
+
+        # create condition register cr0 and sticky-overflow
+        is_zero = Signal(reset_less=True)
+        is_positive = Signal(reset_less=True)
+        is_negative = Signal(reset_less=True)
+        msb_test = Signal(reset_less=True) # set equal to MSB, invert if OP=CMP
+        is_cmp = Signal(reset_less=True)   # true if OP=CMP
+        so = Signal(reset_less=True)
+
+        # TODO: if o[63] is XORed with "operand == OP_CMP"
+        # that can be used as a test
+        # see https://bugs.libre-soc.org/show_bug.cgi?id=305#c60
+
+        comb += is_cmp.eq(self.i.ctx.op.insn_type == InternalOp.OP_CMP)
+        comb += msb_test.eq(o[-1] ^ is_cmp)
+        comb += is_zero.eq(o == 0)
+        comb += is_positive.eq(~is_zero & ~msb_test)
+        comb += is_negative.eq(~is_zero & msb_test)
+        comb += so.eq(self.i.so | self.i.ov)
+
+        comb += self.o.o.eq(o)
+        with m.If(self.i.ctx.op.insn_type != InternalOp.OP_CMPEQB):
+            comb += self.o.cr0.eq(Cat(so, is_zero, is_positive, is_negative))
+        with m.Else():
+            comb += self.o.cr0.eq(self.i.cr0)
+            
+        comb += self.o.so.eq(so)
+
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/alu/pipe_data.py b/src/soc/fu/alu/pipe_data.py
new file mode 100644 (file)
index 0000000..c386397
--- /dev/null
@@ -0,0 +1,90 @@
+from nmigen import Signal, Const
+from nmutil.dynamicpipe import SimpleHandshakeRedir
+from soc.alu.alu_input_record import CompALUOpSubset
+from ieee754.fpcommon.getop import FPPipeContext
+
+
+class IntegerData:
+
+    def __init__(self, pspec):
+        self.ctx = FPPipeContext(pspec)
+        self.muxid = self.ctx.muxid
+
+    def __iter__(self):
+        yield from self.ctx
+
+    def eq(self, i):
+        return [self.ctx.eq(i.ctx)]
+
+    def ports(self):
+        return self.ctx.ports()
+
+
+class ALUInputData(IntegerData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+        self.a = Signal(64, reset_less=True) # RA
+        self.b = Signal(64, reset_less=True) # RB/immediate
+        self.so = Signal(reset_less=True)
+        self.carry_in = Signal(reset_less=True)
+
+    def __iter__(self):
+        yield from super().__iter__()
+        yield self.a
+        yield self.b
+        yield self.carry_in
+        yield self.so
+
+    def eq(self, i):
+        lst = super().eq(i)
+        return lst + [self.a.eq(i.a), self.b.eq(i.b),
+                      self.carry_in.eq(i.carry_in),
+                      self.so.eq(i.so)]
+
+# TODO: ALUIntermediateData which does not have
+# cr0, ov, ov32 in it (because they are generated as outputs by
+# the final output stage, not by the intermediate stage)
+# https://bugs.libre-soc.org/show_bug.cgi?id=305#c19
+
+class ALUOutputData(IntegerData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+        self.o = Signal(64, reset_less=True, name="stage_o")
+        self.carry_out = Signal(reset_less=True)
+        self.carry_out32 = Signal(reset_less=True)
+        self.cr0 = Signal(4, reset_less=True)
+        self.ov = Signal(reset_less=True)
+        self.ov32 = Signal(reset_less=True)
+        self.so = Signal(reset_less=True)
+
+    def __iter__(self):
+        yield from super().__iter__()
+        yield self.o
+        yield self.carry_out
+        yield self.carry_out32
+        yield self.cr0
+        yield self.ov
+        yield self.ov32
+        yield self.so
+
+    def eq(self, i):
+        lst = super().eq(i)
+        return lst + [self.o.eq(i.o),
+                      self.carry_out.eq(i.carry_out),
+                      self.carry_out32.eq(i.carry_out32),
+                      self.cr0.eq(i.cr0), self.ov.eq(i.ov),
+                      self.ov32.eq(i.ov32), self.so.eq(i.so)]
+
+
+class IntPipeSpec:
+    def __init__(self, id_wid=2, op_wid=1):
+        self.id_wid = id_wid
+        self.op_wid = op_wid
+        self.opkls = lambda _: CompALUOpSubset(name="op")
+        self.stage = None
+
+
+class ALUPipeSpec(IntPipeSpec):
+    def __init__(self, id_wid, op_wid):
+        super().__init__(id_wid, op_wid)
+        self.pipekls = SimpleHandshakeRedir
diff --git a/src/soc/fu/alu/pipeline.py b/src/soc/fu/alu/pipeline.py
new file mode 100644 (file)
index 0000000..e8dd199
--- /dev/null
@@ -0,0 +1,25 @@
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.alu.input_stage import ALUInputStage
+from soc.alu.main_stage import ALUMainStage
+from soc.alu.output_stage import ALUOutputStage
+
+class ALUStages(PipeModBaseChain):
+    def get_chain(self):
+        inp = ALUInputStage(self.pspec)
+        main = ALUMainStage(self.pspec)
+        out = ALUOutputStage(self.pspec)
+        return [inp, main, out]
+
+
+class ALUBasePipe(ControlBase):
+    def __init__(self, pspec):
+        ControlBase.__init__(self)
+        self.pipe1 = ALUStages(pspec)
+        self._eqs = self.connect([self.pipe1])
+
+    def elaborate(self, platform):
+        m = ControlBase.elaborate(self, platform)
+        m.submodules.pipe = self.pipe1
+        m.d.comb += self._eqs
+        return m
diff --git a/src/soc/fu/alu/test/test_pipe_caller.py b/src/soc/fu/alu/test/test_pipe_caller.py
new file mode 100644 (file)
index 0000000..f42112e
--- /dev/null
@@ -0,0 +1,270 @@
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function, InternalOp)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.alu.pipeline import ALUBasePipe
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+class TestCase:
+    def __init__(self, program, regs, sprs, name):
+        self.program = program
+        self.regs = regs
+        self.sprs = sprs
+        self.name = name
+
+def get_rec_width(rec):
+    recwidth = 0
+    # Setup random inputs for dut.op
+    for p in rec.ports():
+        width = p.width
+        recwidth += width
+    return recwidth
+
+def set_alu_inputs(alu, dec2, sim):
+    # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+    # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
+    # and place it into data_i.b
+
+    reg3_ok = yield dec2.e.read_reg3.ok
+    reg1_ok = yield dec2.e.read_reg1.ok
+    assert reg3_ok != reg1_ok
+    if reg3_ok:
+        data1 = yield dec2.e.read_reg3.data
+        data1 = sim.gpr(data1).value
+    elif reg1_ok:
+        data1 = yield dec2.e.read_reg1.data
+        data1 = sim.gpr(data1).value
+    else:
+        data1 = 0
+
+    yield alu.p.data_i.a.eq(data1)
+
+    # If there's an immediate, set the B operand to that
+    reg2_ok = yield dec2.e.read_reg2.ok
+    imm_ok = yield dec2.e.imm_data.imm_ok
+    if imm_ok:
+        data2 = yield dec2.e.imm_data.imm
+    elif reg2_ok:
+        data2 = yield dec2.e.read_reg2.data
+        data2 = sim.gpr(data2).value
+    else:
+        data2 = 0
+    yield alu.p.data_i.b.eq(data2)
+
+
+
+def set_extra_alu_inputs(alu, dec2, sim):
+    carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
+    yield alu.p.data_i.carry_in.eq(carry)
+    so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+    yield alu.p.data_i.so.eq(so)
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class ALUTestCase(FHDLTestCase):
+    def __init__(self, name):
+        super().__init__(name)
+        self.test_name = name
+    def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
+        tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
+        test_data.append(tc)
+
+    def test_rand(self):
+        insns = ["add", "add.", "subf"]
+        for i in range(40):
+            choice = random.choice(insns)
+            lst = [f"{choice} 3, 1, 2"]
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[2] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_rand_imm(self):
+        insns = ["addi", "addis", "subfic"]
+        for i in range(10):
+            choice = random.choice(insns)
+            imm = random.randint(-(1<<15), (1<<15)-1)
+            lst = [f"{choice} 3, 1, {imm}"]
+            print(lst)
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_adde(self):
+        lst = ["adde. 5, 6, 7"]
+        initial_regs = [0] * 32
+        initial_regs[6] = random.randint(0, (1<<64)-1)
+        initial_regs[7] = random.randint(0, (1<<64)-1)
+        initial_sprs = {}
+        xer = SelectableInt(0, 64)
+        xer[XER_bits['CA']] = 1
+        initial_sprs[special_sprs['XER']] = xer
+        self.run_tst_program(Program(lst), initial_regs, initial_sprs)
+
+    def test_cmp(self):
+        lst = ["subf. 1, 6, 7",
+               "cmp cr2, 1, 6, 7"]
+        initial_regs = [0] * 32
+        initial_regs[6] = 0x10
+        initial_regs[7] = 0x05
+        self.run_tst_program(Program(lst), initial_regs, {})
+
+    def test_extsb(self):
+        insns = ["extsb", "extsh", "extsw"]
+        for i in range(10):
+            choice = random.choice(insns)
+            lst = [f"{choice} 3, 1"]
+            print(lst)
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_cmpeqb(self):
+        lst = ["cmpeqb cr0, 1, 2"]
+        for i in range(20):
+            initial_regs = [0] * 32
+            initial_regs[1] = i
+            initial_regs[2] = 0x01030507090b0d0f11
+            self.run_tst_program(Program(lst), initial_regs, {})
+
+    def test_ilang(self):
+        rec = CompALUOpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        alu = ALUBasePipe(pspec)
+        vl = rtlil.convert(alu, ports=alu.ports())
+        with open("pipeline.il", "w") as f:
+            f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+    def __init__(self, test_data):
+        super().__init__("run_all")
+        self.test_data = test_data
+
+    def run_all(self):
+        m = Module()
+        comb = m.d.comb
+        instruction = Signal(32)
+
+        pdecode = create_pdecode()
+
+        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+        rec = CompALUOpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        m.submodules.alu = alu = ALUBasePipe(pspec)
+
+        comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+        comb += alu.p.valid_i.eq(1)
+        comb += alu.n.ready_i.eq(1)
+        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+        sim = Simulator(m)
+
+        sim.add_clock(1e-6)
+        def process():
+            for test in self.test_data:
+                print(test.name)
+                program = test.program
+                self.subTest(test.name)
+                simulator = ISA(pdecode2, test.regs, test.sprs, 0)
+                gen = program.generate_instructions()
+                instructions = list(zip(gen, program.assembly.splitlines()))
+
+                index = simulator.pc.CIA.value//4
+                while index < len(instructions):
+                    ins, code = instructions[index]
+
+                    print("0x{:X}".format(ins & 0xffffffff))
+                    print(code)
+
+                    # ask the decoder to decode this binary data (endian'd)
+                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
+                    yield instruction.eq(ins)          # raw binary instr.
+                    yield Settle()
+                    fn_unit = yield pdecode2.e.fn_unit
+                    self.assertEqual(fn_unit, Function.ALU.value)
+                    yield from set_alu_inputs(alu, pdecode2, simulator)
+                    yield from set_extra_alu_inputs(alu, pdecode2, simulator)
+                    yield
+                    opname = code.split(' ')[0]
+                    yield from simulator.call(opname)
+                    index = simulator.pc.CIA.value//4
+
+                    vld = yield alu.n.valid_o
+                    while not vld:
+                        yield
+                        vld = yield alu.n.valid_o
+                    yield
+                    alu_out = yield alu.n.data_o.o
+                    out_reg_valid = yield pdecode2.e.write_reg.ok
+                    if out_reg_valid:
+                        write_reg_idx = yield pdecode2.e.write_reg.data
+                        expected = simulator.gpr(write_reg_idx).value
+                        print(f"expected {expected:x}, actual: {alu_out:x}")
+                        self.assertEqual(expected, alu_out)
+                    yield from self.check_extra_alu_outputs(alu, pdecode2,
+                                                            simulator, code)
+
+        sim.add_sync_process(process)
+        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+                            traces=[]):
+            sim.run()
+
+    def check_extra_alu_outputs(self, alu, dec2, sim, code):
+        rc = yield dec2.e.rc.data
+        if rc:
+            cr_expected = sim.crl[0].get_range().value
+            cr_actual = yield alu.n.data_o.cr0
+            self.assertEqual(cr_expected, cr_actual, code)
+
+        op = yield dec2.e.insn_type
+        if op == InternalOp.OP_CMP.value or \
+           op == InternalOp.OP_CMPEQB.value:
+            bf = yield dec2.dec.BF
+            cr_actual = yield alu.n.data_o.cr0
+            cr_expected = sim.crl[bf].get_range().value
+            self.assertEqual(cr_expected, cr_actual, code)
+
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(TestRunner(test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
diff --git a/src/soc/fu/branch/__init__.py b/src/soc/fu/branch/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/soc/fu/branch/br_input_record.py b/src/soc/fu/branch/br_input_record.py
new file mode 100644 (file)
index 0000000..d4f039c
--- /dev/null
@@ -0,0 +1,81 @@
+from nmigen.hdl.rec import Record, Layout
+
+from soc.decoder.power_enums import InternalOp, Function, CryIn
+
+
+class CompBROpSubset(Record):
+    """CompBROpSubset
+
+    TODO: remove anything not needed by the Branch pipeline (determine this
+    after all branch operations have been written.  see
+    https://bugs.libre-soc.org/show_bug.cgi?id=313#c3)
+
+    a copy of the relevant subset information from Decode2Execute1Type
+    needed for Branch operations.  use with eq_from_execute1 (below) to
+    grab subsets.
+    """
+    def __init__(self, name=None):
+        layout = (('insn_type', InternalOp),
+                  ('fn_unit', Function),
+                  ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
+                   #'cr = Signal(32) # NO: this is from the CR SPR
+                    #'xerc = XerBits() # NO: this is from the XER SPR
+                  ('lk', 1),
+                  ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
+                  ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
+                  ('invert_a', 1),
+                  ('invert_out', 1),
+                  ('input_carry', CryIn),
+                  ('output_carry', 1),
+                  ('input_cr', 1),
+                  ('output_cr', 1),
+                  ('is_32bit', 1),
+                  ('is_signed', 1),
+                  ('insn', 32),
+                  ('byte_reverse', 1),
+                  ('sign_extend', 1))
+
+        Record.__init__(self, Layout(layout), name=name)
+
+        # grrr.  Record does not have kwargs
+        self.insn_type.reset_less = True
+        self.fn_unit.reset_less = True
+        #self.cr = Signal(32, reset_less = True
+        #self.xerc = XerBits(
+        self.lk.reset_less = True
+        self.invert_a.reset_less = True
+        self.invert_out.reset_less = True
+        self.input_carry.reset_less = True
+        self.output_carry.reset_less = True
+        self.input_cr.reset_less = True
+        self.output_cr.reset_less = True
+        self.is_32bit.reset_less = True
+        self.is_signed.reset_less = True
+        self.byte_reverse.reset_less = True
+        self.sign_extend.reset_less = True
+
+    def eq_from_execute1(self, other):
+        """ use this to copy in from Decode2Execute1Type
+        """
+        res = []
+        for fname, sig in self.fields.items():
+            eqfrom = other.fields[fname]
+            res.append(sig.eq(eqfrom))
+        return res
+
+    def ports(self):
+        return [self.insn_type,
+                #self.cr,
+                #self.xerc,
+                self.lk,
+                self.invert_a,
+                self.invert_out,
+                self.input_carry,
+                self.output_carry,
+                self.input_cr,
+                self.output_cr,
+                self.is_32bit,
+                self.is_signed,
+                self.byte_reverse,
+                self.sign_extend,
+        ]
diff --git a/src/soc/fu/branch/formal/proof_input_stage.py b/src/soc/fu/branch/formal/proof_input_stage.py
new file mode 100644 (file)
index 0000000..fb097c8
--- /dev/null
@@ -0,0 +1,80 @@
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import Module, Signal, Elaboratable, Mux
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.input_stage import ALUInputStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.branch.br_input_record import CompBROpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+    def __init__(self):
+        # inputs and outputs
+        pass
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        rec = CompBROpSubset()
+        recwidth = 0
+        # Setup random inputs for dut.op
+        for p in rec.ports():
+            width = p.width
+            recwidth += width
+            comb += p.eq(AnyConst(width))
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+        m.submodules.dut = dut = ALUInputStage(pspec)
+
+        a = Signal(64)
+        b = Signal(64)
+        comb += [dut.i.a.eq(a),
+                 dut.i.b.eq(b),
+                 a.eq(AnyConst(64)),
+                 b.eq(AnyConst(64))]
+
+        comb += dut.i.ctx.op.eq(rec)
+
+        # Assert that op gets copied from the input to output
+        for p in rec.ports():
+            name = p.name
+            rec_sig = p
+            dut_sig = getattr(dut.o.ctx.op, name)
+            comb += Assert(dut_sig == rec_sig)
+
+        with m.If(rec.invert_a):
+            comb += Assert(dut.o.a == ~a)
+        with m.Else():
+            comb += Assert(dut.o.a == a)
+
+        with m.If(rec.imm_data.imm_ok &
+                  ~(rec.insn_type == InternalOp.OP_RLC)):
+            comb += Assert(dut.o.b == rec.imm_data.imm)
+        with m.Else():
+            comb += Assert(dut.o.b == b)
+
+        return m
+
+class GTCombinerTestCase(FHDLTestCase):
+    def test_formal(self):
+        module = Driver()
+        self.assertFormal(module, mode="bmc", depth=4)
+        self.assertFormal(module, mode="cover", depth=4)
+    def test_ilang(self):
+        dut = Driver()
+        vl = rtlil.convert(dut, ports=[])
+        with open("input_stage.il", "w") as f:
+            f.write(vl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/branch/formal/proof_main_stage.py b/src/soc/fu/branch/formal/proof_main_stage.py
new file mode 100644 (file)
index 0000000..5ca9481
--- /dev/null
@@ -0,0 +1,92 @@
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+                    signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.logical.main_stage import LogicalMainStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+    def __init__(self):
+        # inputs and outputs
+        pass
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        rec = CompALUOpSubset()
+        recwidth = 0
+        # Setup random inputs for dut.op
+        for p in rec.ports():
+            width = p.width
+            recwidth += width
+            comb += p.eq(AnyConst(width))
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+        m.submodules.dut = dut = LogicalMainStage(pspec)
+
+        # convenience variables
+        a = dut.i.a
+        b = dut.i.b
+        carry_in = dut.i.carry_in
+        so_in = dut.i.so
+        carry_out = dut.o.carry_out
+        o = dut.o.o
+
+        # setup random inputs
+        comb += [a.eq(AnyConst(64)),
+                 b.eq(AnyConst(64)),
+                 carry_in.eq(AnyConst(1)),
+                 so_in.eq(AnyConst(1))]
+
+        comb += dut.i.ctx.op.eq(rec)
+
+        # Assert that op gets copied from the input to output
+        for rec_sig in rec.ports():
+            name = rec_sig.name
+            dut_sig = getattr(dut.o.ctx.op, name)
+            comb += Assert(dut_sig == rec_sig)
+
+        # signed and signed/32 versions of input a
+        a_signed = Signal(signed(64))
+        a_signed_32 = Signal(signed(32))
+        comb += a_signed.eq(a)
+        comb += a_signed_32.eq(a[0:32])
+
+        # main assertion of arithmetic operations
+        with m.Switch(rec.insn_type):
+            with m.Case(InternalOp.OP_AND):
+                comb += Assert(dut.o.o == a & b)
+            with m.Case(InternalOp.OP_OR):
+                comb += Assert(dut.o.o == a | b)
+            with m.Case(InternalOp.OP_XOR):
+                comb += Assert(dut.o.o == a ^ b)
+
+        return m
+
+
+class LogicalTestCase(FHDLTestCase):
+    def test_formal(self):
+        module = Driver()
+        self.assertFormal(module, mode="bmc", depth=2)
+        self.assertFormal(module, mode="cover", depth=2)
+    def test_ilang(self):
+        dut = Driver()
+        vl = rtlil.convert(dut, ports=[])
+        with open("main_stage.il", "w") as f:
+            f.write(vl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/branch/input_stage.py b/src/soc/fu/branch/input_stage.py
new file mode 100644 (file)
index 0000000..e6ab48e
--- /dev/null
@@ -0,0 +1,63 @@
+# This stage is intended to adjust the input data before sending it to
+# the acutal ALU. Things like handling inverting the input, carry_in
+# generation for subtraction, and handling of immediates should happen
+# here
+from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
+                    unsigned)
+from nmutil.pipemodbase import PipeModBase
+from soc.decoder.power_enums import InternalOp
+from soc.alu.pipe_data import ALUInputData
+from soc.decoder.power_enums import CryIn
+
+
+class ALUInputStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "input")
+
+    def ispec(self):
+        return ALUInputData(self.pspec)
+
+    def ospec(self):
+        return ALUInputData(self.pspec)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        ##### operand A #####
+
+        # operand a to be as-is or inverted
+        a = Signal.like(self.i.a)
+
+        with m.If(self.i.ctx.op.invert_a):
+            comb += a.eq(~self.i.a)
+        with m.Else():
+            comb += a.eq(self.i.a)
+
+        comb += self.o.a.eq(a)
+
+        ##### operand B #####
+
+        # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+        # remove this, just do self.o.b.eq(self.i.b) and move the
+        # immediate-detection into set_alu_inputs in the unit test
+        # If there's an immediate, set the B operand to that
+        comb += self.o.b.eq(self.i.b)
+
+        ##### carry-in #####
+
+        # either copy incoming carry or set to 1/0 as defined by op
+        with m.Switch(self.i.ctx.op.input_carry):
+            with m.Case(CryIn.ZERO):
+                comb += self.o.carry_in.eq(0)
+            with m.Case(CryIn.ONE):
+                comb += self.o.carry_in.eq(1)
+            with m.Case(CryIn.CA):
+                comb += self.o.carry_in.eq(self.i.carry_in)
+
+        ##### sticky overflow and context (both pass-through) #####
+
+        comb += self.o.so.eq(self.i.so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/branch/main_stage.py b/src/soc/fu/branch/main_stage.py
new file mode 100644 (file)
index 0000000..6f6d488
--- /dev/null
@@ -0,0 +1,139 @@
+# This stage is intended to do most of the work of executing Logical
+# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ
+# however input and output stages also perform bit-negation on input(s)
+# and output, as well as carry and overflow generation.
+# This module however should not gate the carry or overflow, that's up
+# to the output stage
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.branch.pipe_data import BranchInputData, BranchOutputData
+from soc.decoder.power_enums import InternalOp
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+
+def br_ext(bd):
+    return Cat(Const(0, 2), bd, Repl(bd[-1], 64-(bd.shape().width + 2)))
+
+"""
+Notes on BO Field:
+
+BO    Description
+0000z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=0
+0001z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=0
+001at Branch if CR[BI]=0
+0100z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=1
+0101z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=1
+011at Branch if CR[BI]=1
+1a00t Decrement the CTR, then branch if decremented CTR[M:63]!=0
+1a01t Decrement the CTR, then branch if decremented CTR[M:63]=0
+1z1zz Branch always
+"""
+
+class BranchMainStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "main")
+        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+        self.fields.create_specs()
+
+    def ispec(self):
+        return BranchInputData(self.pspec)
+
+    def ospec(self):
+        return BranchOutputData(self.pspec) # TODO: ALUIntermediateData
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+        op = self.i.ctx.op
+        lk = op.lk # see PowerDecode2 as to why this is done
+        nia_o, lr_o = self.o.nia, self.o.lr
+
+        # obtain relevant instruction fields
+        i_fields = self.fields.FormI
+        aa = Signal(i_fields.AA[0:-1].shape())
+        comb += aa.eq(i_fields.AA[0:-1])
+
+        br_imm_addr = Signal(64, reset_less=True)
+        br_addr = Signal(64, reset_less=True)
+        br_taken = Signal(reset_less=True)
+
+        # Handle absolute or relative branches
+        with m.If(aa):
+            comb += br_addr.eq(br_imm_addr)
+        with m.Else():
+            comb += br_addr.eq(br_imm_addr + self.i.cia)
+
+        # fields for conditional branches (BO and BI are same for BC and BCREG)
+        # NOTE: here, BO and BI we would like be treated as CR regfile
+        # selectors (similar to RA, RB, RS, RT).  see comment here:
+        # https://bugs.libre-soc.org/show_bug.cgi?id=313#c2
+        b_fields = self.fields.FormB
+        BO = b_fields.BO[0:-1]
+        BI = b_fields.BI[0:-1]
+
+        # The bit of CR selected by BI
+        cr_bit = Signal(reset_less=True)
+        comb += cr_bit.eq((self.i.cr & (1<<(31-BI))) != 0)
+
+        # Whether the conditional branch should be taken
+        bc_taken = Signal(reset_less=True)
+        with m.If(BO[2]):
+            comb += bc_taken.eq((cr_bit == BO[3]) | BO[4])
+        with m.Else():
+            # decrement the counter and place into output
+            ctr = Signal(64, reset_less=True)
+            comb += ctr.eq(self.i.ctr - 1)
+            comb += self.o.ctr.data.eq(ctr)
+            comb += self.o.ctr.ok.eq(1)
+            # take either all 64 bits or only 32 of post-incremented counter
+            ctr_m = Signal(64, reset_less=True)
+            with m.If((op.is_32bit):
+                comb += ctr_m.eq(ctr[:32])
+            with m.Else():
+                comb += ctr_m.eq(ctr)
+            # check CTR zero/non-zero against BO[1]
+            ctr_zero_bo1 = Signal(reset_less=True) # BO[1] == (ctr==0)
+            comb += ctr_zero_bo1.eq(BO[1] ^ ctr_m.any())
+            with m.If(BO[3:5] == 0b00):
+                comb += bc_taken.eq(ctr_zero_bo1 & ~cr_bit)
+            with m.Elif(BO[3:5] == 0b01):
+                comb += bc_taken.eq(ctr_zero_bo1 & cr_bit)
+            with m.Elif(BO[4] == 1):
+                comb += bc_taken.eq(ctr_zero_bo1)
+
+        ### Main Switch Statement ###
+        with m.Switch(op.insn_type):
+            #### branch ####
+            with m.Case(InternalOp.OP_B):
+                LI = i_fields.LI[0:-1]
+                comb += br_imm_addr.eq(br_ext(LI))
+                comb += br_taken.eq(1)
+            #### branch conditional ####
+            with m.Case(InternalOp.OP_BC):
+                BD = b_fields.BD[0:-1]
+                comb += br_imm_addr.eq(br_ext(BD))
+                comb += br_taken.eq(bc_taken)
+            #### branch conditional reg ####
+            with m.Case(InternalOp.OP_BCREG):
+                comb += br_imm_addr.eq(self.i.spr1) # SPR1 is set by decode unit
+                comb += br_taken.eq(bc_taken)
+
+        ###### output next instruction address #####
+
+        comb += nia_o.data.eq(br_addr)
+        comb += nia_o.ok.eq(br_taken)
+
+        ###### link register - only activate on operations marked as "lk" #####
+
+        with m.If(lk):
+            # ctx.op.lk is the AND of the insn LK field *and* whether the
+            # op is to "listen" to the link field
+            comb += lr_o.data.eq(self.i.cia + 4)
+            comb += lr_o.ok.eq(1)
+
+        ###### and context #####
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/branch/pipe_data.py b/src/soc/fu/branch/pipe_data.py
new file mode 100644 (file)
index 0000000..0ef4f00
--- /dev/null
@@ -0,0 +1,90 @@
+"""
+    Optional Register allocation listed below.  mandatory input
+    (CompBROpSubset, CIA) not included.
+
+    * CR is Condition Register (not an SPR)
+    * SPR1, SPR2 and SPR3 are all from the SPR regfile.  3 ports are needed
+
+    insn       CR  SPR1  SPR2    SPR3
+    ----       --  ----  ----    ----
+    op_b       xx  xx     xx     xx
+    op_ba      xx  xx     xx     xx
+    op_bl      xx  xx     xx     xx
+    op_bla     xx  xx     xx     xx
+    op_bc      CR, xx,    CTR    xx
+    op_bca     CR, xx,    CTR    xx
+    op_bcl     CR, xx,    CTR    xx
+    op_bcla    CR, xx,    CTR    xx
+    op_bclr    CR, LR,    CTR    xx
+    op_bclrl   CR, LR,    CTR    xx
+    op_bcctr   CR, xx,    CTR    xx
+    op_bcctrl  CR, xx,    CTR    xx
+    op_bctar   CR, TAR,   CTR,   xx
+    op_bctarl  CR, TAR,   CTR,   xx
+
+    op_sc      xx  xx     xx     MSR
+    op_scv     xx  LR,    SRR1,  MSR
+    op_rfscv   xx  LR,    CTR,   MSR
+    op_rfid    xx  SRR0,  SRR1,  MSR
+    op_hrfid   xx  HSRR0, HSRR1, MSR
+"""
+
+from nmigen import Signal, Const
+from ieee754.fpcommon.getop import FPPipeContext
+from soc.decoder.power_decoder2 import Data
+from soc.alu.pipe_data import IntegerData
+
+
+class BranchInputData(IntegerData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+        # Note: for OP_BCREG, SPR1 will either be CTR, LR, or TAR
+        # this involves the *decode* unit selecting the register, based
+        # on detecting the operand being bcctr, bclr or bctar
+
+        self.spr1 = Signal(64, reset_less=True) # see table above, SPR1
+        self.spr2 = Signal(64, reset_less=True) # see table above, SPR2
+        self.spr3 = Signal(64, reset_less=True) # see table above, SPR3
+        self.cr = Signal(32, reset_less=True)   # Condition Register(s) CR0-7
+        self.cia = Signal(64, reset_less=True)  # Current Instruction Address
+
+        # convenience variables.  not all of these are used at once
+        self.ctr = self.srr0 = self.hsrr0 = self.spr2
+        self.lr = self.tar = self.srr1 = self.hsrr1 = self.spr1
+        self.msr = self.spr3
+
+    def __iter__(self):
+        yield from super().__iter__()
+        yield self.spr1
+        yield self.spr2
+        yield self.spr3
+        yield self.cr
+        yield self.cia
+
+    def eq(self, i):
+        lst = super().eq(i)
+        return lst + [self.spr1.eq(i.spr1), self.spr2.eq(i.spr2),
+                      self.spr3.eq(i.spr3),
+                      self.cr.eq(i.cr), self.cia.eq(i.cia)]
+
+
+class BranchOutputData(IntegerData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+        self.lr = Data(64, name="lr")
+        self.spr = Data(64, name="spr")
+        self.nia = Data(64, name="nia")
+
+        # convenience variables.
+        self.ctr = self.spr
+
+    def __iter__(self):
+        yield from super().__iter__()
+        yield from self.lr
+        yield from self.spr
+        yield from self.nia
+
+    def eq(self, i):
+        lst = super().eq(i)
+        return lst + [self.lr.eq(i.lr), self.spr.eq(i.spr),
+                      self.nia.eq(i.nia)]
diff --git a/src/soc/fu/branch/pipeline.py b/src/soc/fu/branch/pipeline.py
new file mode 100644 (file)
index 0000000..ac132f7
--- /dev/null
@@ -0,0 +1,21 @@
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.branch.main_stage import BranchMainStage
+
+class BranchStages(PipeModBaseChain):
+    def get_chain(self):
+        main = BranchMainStage(self.pspec)
+        return [main]
+
+
+class BranchBasePipe(ControlBase):
+    def __init__(self, pspec):
+        ControlBase.__init__(self)
+        self.pipe1 = BranchStages(pspec)
+        self._eqs = self.connect([self.pipe1])
+
+    def elaborate(self, platform):
+        m = ControlBase.elaborate(self, platform)
+        m.submodules.pipe = self.pipe1
+        m.d.comb += self._eqs
+        return m
diff --git a/src/soc/fu/branch/test/test_pipe_caller.py b/src/soc/fu/branch/test/test_pipe_caller.py
new file mode 100644 (file)
index 0000000..10d2bba
--- /dev/null
@@ -0,0 +1,210 @@
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.branch.pipeline import BranchBasePipe
+from soc.branch.br_input_record import CompBROpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+
+class TestCase:
+    def __init__(self, program, regs, sprs, cr, name):
+        self.program = program
+        self.regs = regs
+        self.sprs = sprs
+        self.name = name
+        self.cr = cr
+
+def get_rec_width(rec):
+    recwidth = 0
+    # Setup random inputs for dut.op
+    for p in rec.ports():
+        width = p.width
+        recwidth += width
+    return recwidth
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class BranchTestCase(FHDLTestCase):
+    def __init__(self, name):
+        super().__init__(name)
+        self.test_name = name
+    def run_tst_program(self, prog, initial_regs=[0] * 32,
+                        initial_sprs={}, initial_cr=0):
+        tc = TestCase(prog, initial_regs, initial_sprs, initial_cr,
+                      self.test_name)
+        test_data.append(tc)
+
+    def test_unconditional(self):
+        choices = ["b", "ba", "bl", "bla"]
+        for i in range(20):
+            choice = random.choice(choices)
+            imm = random.randrange(-1<<23, (1<<23)-1) * 4
+            lst = [f"{choice} {imm}"]
+            initial_regs = [0] * 32
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_bc_cr(self):
+        for i in range(20):
+            bc = random.randrange(-1<<13, (1<<13)-1) * 4
+            bo = random.choice([0b01100, 0b00100, 0b10100])
+            bi = random.randrange(0, 31)
+            cr = random.randrange(0, (1<<32)-1)
+            lst = [f"bc {bo}, {bi}, {bc}"]
+            initial_regs = [0] * 32
+            self.run_tst_program(Program(lst), initial_cr=cr)
+
+    def test_bc_ctr(self):
+        for i in range(20):
+            bc = random.randrange(-1<<13, (1<<13)-1) * 4
+            bo = random.choice([0, 2, 8, 10, 16, 18])
+            bi = random.randrange(0, 31)
+            cr = random.randrange(0, (1<<32)-1)
+            ctr = random.randint(0, (1<<32)-1)
+            lst = [f"bc {bo}, {bi}, {bc}"]
+            initial_sprs={9: SelectableInt(ctr, 64)}
+            self.run_tst_program(Program(lst),
+                                 initial_sprs=initial_sprs,
+                                 initial_cr=cr)
+
+    def test_ilang(self):
+        rec = CompBROpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        alu = BranchBasePipe(pspec)
+        vl = rtlil.convert(alu, ports=alu.ports())
+        with open("logical_pipeline.il", "w") as f:
+            f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+    def __init__(self, test_data):
+        super().__init__("run_all")
+        self.test_data = test_data
+
+    def run_all(self):
+        m = Module()
+        comb = m.d.comb
+        instruction = Signal(32)
+
+        pdecode = create_pdecode()
+
+        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+        rec = CompBROpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        m.submodules.branch = branch = BranchBasePipe(pspec)
+
+        comb += branch.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+        comb += branch.p.valid_i.eq(1)
+        comb += branch.n.ready_i.eq(1)
+        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+        sim = Simulator(m)
+
+        sim.add_clock(1e-6)
+        def process():
+            for test in self.test_data:
+                print(test.name)
+                program = test.program
+                self.subTest(test.name)
+                simulator = ISA(pdecode2, test.regs, test.sprs, test.cr)
+                initial_cia = 0x2000
+                simulator.set_pc(initial_cia)
+                gen = program.generate_instructions()
+                instructions = list(zip(gen, program.assembly.splitlines()))
+
+                index = (simulator.pc.CIA.value - initial_cia)//4
+                while index < len(instructions) and index >= 0:
+                    print(index)
+                    ins, code = instructions[index]
+
+                    print("0x{:X}".format(ins & 0xffffffff))
+                    print(code)
+
+                    # ask the decoder to decode this binary data (endian'd)
+                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
+                    yield instruction.eq(ins)          # raw binary instr.
+                    yield branch.p.data_i.cia.eq(simulator.pc.CIA.value)
+                    yield branch.p.data_i.cr.eq(simulator.cr.get_range().value)
+                    # note, here, the op will need further decoding in order
+                    # to set the correct SPRs on SPR1/2/3.  op_bc* require
+                    # spr2 to be set to CTR, op_bctar require spr1 to be
+                    # set to TAR, op_bclr* require spr1 to be set to LR.
+                    # if op_sc*, op_rf* and op_hrfid are to be added here
+                    # then additional op-decoding is required, accordingly
+                    yield branch.p.data_i.spr2.eq(simulator.spr['CTR'].value)
+                    print(f"cr0: {simulator.crl[0].get_range()}")
+                    yield Settle()
+                    fn_unit = yield pdecode2.e.fn_unit
+                    self.assertEqual(fn_unit, Function.BRANCH.value, code)
+                    yield
+                    yield
+                    opname = code.split(' ')[0]
+                    prev_nia = simulator.pc.NIA.value
+                    yield from simulator.call(opname)
+                    index = (simulator.pc.CIA.value - initial_cia)//4
+
+                    yield from self.assert_outputs(branch, pdecode2,
+                                                   simulator, prev_nia, code)
+
+
+        sim.add_sync_process(process)
+        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+                            traces=[]):
+            sim.run()
+
+    def assert_outputs(self, branch, dec2, sim, prev_nia, code):
+        branch_taken = yield branch.n.data_o.nia.ok
+        sim_branch_taken = prev_nia != sim.pc.CIA
+        self.assertEqual(branch_taken, sim_branch_taken, code)
+        if branch_taken:
+            branch_addr = yield branch.n.data_o.nia.data
+            self.assertEqual(branch_addr, sim.pc.CIA.value, code)
+
+        lk = yield dec2.e.lk
+        branch_lk = yield branch.n.data_o.lr.ok
+        self.assertEqual(lk, branch_lk, code)
+        if lk:
+            branch_lr = yield branch.n.data_o.lr.data
+            self.assertEqual(sim.spr['LR'], branch_lr, code)
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(TestRunner(test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
diff --git a/src/soc/fu/countzero/countzero.py b/src/soc/fu/countzero/countzero.py
new file mode 100644 (file)
index 0000000..bd61f57
--- /dev/null
@@ -0,0 +1,136 @@
+# https://github.com/antonblanchard/microwatt/blob/master/countzero.vhdl
+from nmigen import Memory, Module, Signal, Cat, Elaboratable
+from nmigen.hdl.rec import Record, Layout
+from nmigen.cli import main
+
+
+def or4(a, b, c, d):
+    return Cat(a.any(), b.any(), c.any(), d.any())
+
+
+class IntermediateResult(Record):
+    def __init__(self, name=None):
+        layout = (('v16', 15),
+                  ('sel_hi', 2),
+                  ('is_32bit', 1),
+                  ('count_right', 1))
+        Record.__init__(self, Layout(layout), name=name)
+
+
+class ZeroCounter(Elaboratable):
+    def __init__(self):
+        self.rs_i = Signal(64, reset_less=True)
+        self.count_right_i = Signal(1, reset_less=True)
+        self.is_32bit_i = Signal(1, reset_less=True)
+        self.result_o = Signal(64, reset_less=True)
+
+    def ports(self):
+        return [self.rs_i, self.count_right_i, self.is_32bit_i, self.result_o]
+
+    def elaborate(self, platform):
+        m = Module()
+
+        # TODO: replace this with m.submodule.pe1 = PriorityEncoder(4)
+        # m.submodule.pe2 = PriorityEncoder(4)
+        # m.submodule.pe3 = PriorityEncoder(4)
+        # etc.
+        # and where right will assign input to v and !right will assign v[::-1]
+        # so as to reverse the order of the input bits.
+
+        def encoder(v, right):
+            """
+            Return the index of the leftmost or rightmost 1 in a set of 4 bits.
+            Assumes v is not "0000"; if it is, return (right ? "11" : "00").
+            """
+            ret = Signal(2, reset_less=True)
+            with m.If(right):
+                with m.If(v[0]):
+                    m.d.comb += ret.eq(0)
+                with m.Elif(v[1]):
+                    m.d.comb += ret.eq(1)
+                with m.Elif(v[2]):
+                    m.d.comb += ret.eq(2)
+                with m.Else():
+                    m.d.comb += ret.eq(3)
+            with m.Else():
+                with m.If(v[3]):
+                    m.d.comb += ret.eq(3)
+                with m.Elif(v[2]):
+                    m.d.comb += ret.eq(2)
+                with m.Elif(v[1]):
+                    m.d.comb += ret.eq(1)
+                with m.Else():
+                    m.d.comb += ret.eq(0)
+            return ret
+
+        r = IntermediateResult()
+        r_in = IntermediateResult()
+
+        m.d.comb += r.eq(r_in) # make the module entirely combinatorial for now
+
+        v = IntermediateResult()
+        y = Signal(4, reset_less=True)
+        z = Signal(4, reset_less=True)
+        sel = Signal(6, reset_less=True)
+        v4 = Signal(4, reset_less=True)
+
+        # Test 4 groups of 16 bits each.
+        # The top 2 groups are considered to be zero in 32-bit mode.
+        m.d.comb += z.eq(or4(self.rs_i[0:16], self.rs_i[16:32],
+                             self.rs_i[32:48], self.rs_i[48:64]))
+        with m.If(self.is_32bit_i):
+            m.d.comb += v.sel_hi[1].eq(0)
+            with m.If(self.count_right_i):
+                m.d.comb += v.sel_hi[0].eq(~z[0])
+            with m.Else():
+                m.d.comb += v.sel_hi[0].eq(z[1])
+        with m.Else():
+            m.d.comb += v.sel_hi.eq(encoder(z, self.count_right_i))
+
+        # Select the leftmost/rightmost non-zero group of 16 bits
+        with m.Switch(v.sel_hi):
+            with m.Case(0):
+                m.d.comb += v.v16.eq(self.rs_i[0:16])
+            with m.Case(1):
+                m.d.comb += v.v16.eq(self.rs_i[16:32])
+            with m.Case(2):
+                m.d.comb += v.v16.eq(self.rs_i[32:48])
+            with m.Case(3):
+                m.d.comb += v.v16.eq(self.rs_i[48:64])
+
+        # Latch this and do the rest in the next cycle, for the sake of timing
+        m.d.comb += v.is_32bit.eq(self.is_32bit_i)
+        m.d.comb += v.count_right.eq(self.count_right_i)
+        m.d.comb += r_in.eq(v)
+        m.d.comb += sel[4:6].eq(r.sel_hi)
+
+        # Test 4 groups of 4 bits
+        m.d.comb += y.eq(or4(r.v16[0:4], r.v16[4:8],
+                             r.v16[8:12], r.v16[12:16]))
+        m.d.comb += sel[2:4].eq(encoder(y, r.count_right))
+
+        # Select the leftmost/rightmost non-zero group of 4 bits
+        with m.Switch(sel[2:4]):
+            with m.Case(0):
+                m.d.comb += v4.eq(r.v16[0:4])
+            with m.Case(1):
+                m.d.comb += v4.eq(r.v16[4:8])
+            with m.Case(2):
+                m.d.comb += v4.eq(r.v16[8:12])
+            with m.Case(3):
+                m.d.comb += v4.eq(r.v16[12:16])
+
+        m.d.comb += sel[0:2].eq(encoder(v4, r.count_right))
+
+        # sel is now the index of the leftmost/rightmost 1 bit in rs
+        o = self.result_o
+        with m.If(v4 == 0):
+            # operand is zero, return 32 for 32-bit, else 64
+            m.d.comb += o[5:7].eq(Cat(r.is_32bit, ~r.is_32bit))
+        with m.Elif(r.count_right):
+            # return (63 - sel), trimmed to 5 bits in 32-bit mode
+            m.d.comb += o.eq(Cat(~sel[0:5], ~(sel[5] | r.is_32bit)))
+        with m.Else():
+            m.d.comb += o.eq(sel)
+
+        return m
diff --git a/src/soc/fu/countzero/test/test_countzero.py b/src/soc/fu/countzero/test/test_countzero.py
new file mode 100644 (file)
index 0000000..6018519
--- /dev/null
@@ -0,0 +1,105 @@
+# https://github.com/antonblanchard/microwatt/blob/master/countzero_tb.vhdl
+from nmigen import Module, Signal
+from nmigen.cli import rtlil
+from nmigen.back.pysim import Simulator, Delay
+from nmigen.test.utils import FHDLTestCase
+import unittest
+from soc.countzero.countzero import ZeroCounter
+
+
+class ZeroCounterTestCase(FHDLTestCase):
+    def test_zerocounter(self):
+        m = Module()
+        comb = m.d.comb
+        m.submodules.dut = dut = ZeroCounter()
+
+        sim = Simulator(m)
+        # sim.add_clock(1e-6)
+
+        def process():
+            print("test zero input")
+            yield dut.rs_i.eq(0)
+            yield dut.is_32bit_i.eq(0)
+            yield dut.count_right_i.eq(0)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            assert result == 0x40
+            # report "bad cntlzd 0 = " & to_hstring(result);
+            assert(result == 0x40)
+            yield dut.count_right_i.eq(1)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            # report "bad cntlzd 0 = " & to_hstring(result);
+            assert(result == 0x40)
+            yield dut.is_32bit_i.eq(1)
+            yield dut.count_right_i.eq(0)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            # report "bad cntlzw 0 = " & to_hstring(result);
+            assert(result == 0x20)
+            yield dut.count_right_i.eq(1)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            # report "bad cntlzw 0 = " & to_hstring(result);
+            assert(result == 0x20)
+            # TODO next tests
+
+            yield dut.rs_i.eq(0b00010000)
+            yield dut.is_32bit_i.eq(0)
+            yield dut.count_right_i.eq(0)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            assert result == 4, "result %d" % result
+
+            yield dut.count_right_i.eq(1)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            assert result == 59, "result %d" % result
+
+            yield dut.is_32bit_i.eq(1)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            assert result == 27, "result %d" % result
+
+            yield dut.rs_i.eq(0b1100000100000000)
+            yield dut.is_32bit_i.eq(0)
+            yield dut.count_right_i.eq(0)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            assert result == 14, "result %d" % result
+
+            yield dut.count_right_i.eq(1)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            assert result == 55, "result %d" % result
+
+            yield dut.is_32bit_i.eq(1)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            assert result == 23, "result %d" % result
+
+            yield dut.count_right_i.eq(0)
+            yield Delay(1e-6)
+            result = yield dut.result_o
+            assert result == 14, "result %d" % result
+
+
+        sim.add_process(process)  # or sim.add_sync_process(process), see below
+
+        # run test and write vcd
+        fn = "genullnau"
+        with sim.write_vcd(fn+".vcd", fn+".gtkw", traces=dut.ports()):
+            sim.run()
+
+    # cntlzd_w
+    # cnttzd_w
+
+
+if __name__ == "__main__":
+
+    dut = ZeroCounter()
+    vl = rtlil.convert(dut, ports=dut.ports())
+    with open("countzero.il", "w") as f:
+        f.write(vl)
+
+    unittest.main()
diff --git a/src/soc/fu/cr/main_stage.py b/src/soc/fu/cr/main_stage.py
new file mode 100644 (file)
index 0000000..67bd78e
--- /dev/null
@@ -0,0 +1,124 @@
+# This stage is intended to do Condition Register instructions
+# and output, as well as carry and overflow generation.
+# NOTE: with the exception of mtcrf and mfcr, we really should be doing
+# the field decoding which
+# selects which bits of CR are to be read / written, back in the
+# decoder / insn-isue, have both self.i.cr and self.o.cr
+# be broken down into 4-bit-wide "registers", with their
+# own "Register File" (indexed by bt, ba and bb),
+# exactly how INT regs are done (by RA, RB, RS and RT)
+# however we are pushed for time so do it as *one* register.
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.cr.pipe_data import CRInputData, CROutputData
+from soc.decoder.power_enums import InternalOp
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+
+
+class CRMainStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "main")
+        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+        self.fields.create_specs()
+
+    def ispec(self):
+        return CRInputData(self.pspec)
+
+    def ospec(self):
+        return CROutputData(self.pspec)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+        op = self.i.ctx.op
+        xl_fields = self.fields.FormXL
+        xfx_fields = self.fields.FormXFX
+        # default: cr_o remains same as cr input unless modified, below
+        cr_o = Signal.like(self.i.cr)
+        comb += cr_o.eq(self.i.cr)
+
+        ##### prepare inputs / temp #####
+
+        # Generate array for cr input so bits can be selected
+        cr_arr = Array([Signal(name=f"cr_arr_{i}") for i in range(32)])
+        for i in range(32):
+            comb += cr_arr[i].eq(self.i.cr[31-i])
+
+        # Generate array for cr output so the bit to write to can be
+        # selected by a signal
+        cr_out_arr = Array([Signal(name=f"cr_out_{i}") for i in range(32)])
+        for i in range(32):
+            comb += cr_o[31-i].eq(cr_out_arr[i])
+            comb += cr_out_arr[i].eq(cr_arr[i])
+
+        # Generate the mask for mtcrf, mtocrf, and mfocrf
+        # replicate every fxm field in the insn to 4-bit, as a mask
+        FXM = xfx_fields.FXM[0:-1]
+        mask = Signal(32, reset_less=True)
+        comb += mask.eq(Cat(*[Repl(FXM[i], 4) for i in range(8)]))
+
+        #################################
+        ##### main switch statement #####
+
+        with m.Switch(op.insn_type):
+            ##### mcrf #####
+            with m.Case(InternalOp.OP_MCRF):
+                # MCRF copies the 4 bits of crA to crB (for instance
+                # copying cr2 to cr1)
+                BF = xl_fields.BF[0:-1]   # destination CR
+                BFA = xl_fields.BFA[0:-1] # source CR
+
+                for i in range(4):
+                    comb += cr_out_arr[BF*4 + i].eq(cr_arr[BFA*4 + i])
+
+            ##### crand, cror, crnor etc. #####
+            with m.Case(InternalOp.OP_CROP):
+                # crand/cror and friends get decoded to the same opcode, but
+                # one of the fields inside the instruction is a 4 bit lookup
+                # table. This lookup table gets indexed by bits a and b from
+                # the CR to determine what the resulting bit should be.
+
+                # Grab the lookup table for cr_op type instructions
+                lut = Array([Signal(name=f"lut{i}") for i in range(4)])
+                # There's no field, just have to grab it directly from the insn
+                for i in range(4):
+                    comb += lut[i].eq(self.i.ctx.op.insn[6+i])
+
+                # Get the bit selector fields from the instruction
+                BT = xl_fields.BT[0:-1]
+                BA = xl_fields.BA[0:-1]
+                BB = xl_fields.BB[0:-1]
+
+                # Use the two input bits to look up the result in the LUT
+                comb += cr_out_arr[BT].eq(lut[Cat(cr_arr[BB], cr_arr[BA])])
+
+            ##### mtcrf #####
+            with m.Case(InternalOp.OP_MTCRF):
+                # mtocrf and mtcrf are essentially identical
+                # put input (RA) - mask-selected - into output CR, leave
+                # rest of CR alone.
+                comb += cr_o.eq((self.i.a[0:32] & mask) | (self.i.cr & ~mask))
+
+            ##### mfcr #####
+            with m.Case(InternalOp.OP_MFCR):
+                # Ugh. mtocrf and mtcrf have one random bit differentiating
+                # them. This bit is not in any particular field, so this
+                # extracts that bit from the instruction
+                move_one = Signal(reset_less=True)
+                comb += move_one.eq(self.i.ctx.op.insn[20])
+
+                # mfocrf
+                with m.If(move_one):
+                    comb += self.o.o.eq(self.i.cr & mask)
+                # mfcrf
+                with m.Else():
+                    comb += self.o.o.eq(self.i.cr)
+
+        # output and context
+        comb += self.o.cr.eq(cr_o)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/cr/pipe_data.py b/src/soc/fu/cr/pipe_data.py
new file mode 100644 (file)
index 0000000..d56c8f3
--- /dev/null
@@ -0,0 +1,36 @@
+from nmigen import Signal, Const
+from ieee754.fpcommon.getop import FPPipeContext
+from soc.alu.pipe_data import IntegerData
+
+
+class CRInputData(IntegerData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+        self.a = Signal(64, reset_less=True) # RA
+        self.cr = Signal(64, reset_less=True) # CR in
+
+    def __iter__(self):
+        yield from super().__iter__()
+        yield self.a
+        yield self.cr
+
+    def eq(self, i):
+        lst = super().eq(i)
+        return lst + [self.a.eq(i.a),
+                      self.cr.eq(i.cr)]
+
+class CROutputData(IntegerData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+        self.o = Signal(64, reset_less=True) # RA
+        self.cr = Signal(64, reset_less=True) # CR in
+
+    def __iter__(self):
+        yield from super().__iter__()
+        yield self.o
+        yield self.cr
+
+    def eq(self, i):
+        lst = super().eq(i)
+        return lst + [self.o.eq(i.o),
+                      self.cr.eq(i.cr)]
diff --git a/src/soc/fu/cr/pipeline.py b/src/soc/fu/cr/pipeline.py
new file mode 100644 (file)
index 0000000..121cdf8
--- /dev/null
@@ -0,0 +1,21 @@
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.cr.main_stage import CRMainStage
+
+class CRStages(PipeModBaseChain):
+    def get_chain(self):
+        main = CRMainStage(self.pspec)
+        return [main]
+
+
+class CRBasePipe(ControlBase):
+    def __init__(self, pspec):
+        ControlBase.__init__(self)
+        self.pipe1 = CRStages(pspec)
+        self._eqs = self.connect([self.pipe1])
+
+    def elaborate(self, platform):
+        m = ControlBase.elaborate(self, platform)
+        m.submodules.pipe = self.pipe1
+        m.d.comb += self._eqs
+        return m
diff --git a/src/soc/fu/cr/test/test_pipe_caller.py b/src/soc/fu/cr/test/test_pipe_caller.py
new file mode 100644 (file)
index 0000000..fa08fb6
--- /dev/null
@@ -0,0 +1,232 @@
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.cr.pipeline import CRBasePipe
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+
+class TestCase:
+    def __init__(self, program, regs, sprs, cr, name):
+        self.program = program
+        self.regs = regs
+        self.sprs = sprs
+        self.name = name
+        self.cr = cr
+
+def get_rec_width(rec):
+    recwidth = 0
+    # Setup random inputs for dut.op
+    for p in rec.ports():
+        width = p.width
+        recwidth += width
+    return recwidth
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class CRTestCase(FHDLTestCase):
+    def __init__(self, name):
+        super().__init__(name)
+        self.test_name = name
+    def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={},
+                        initial_cr=0):
+        tc = TestCase(prog, initial_regs, initial_sprs, initial_cr,
+                      self.test_name)
+        test_data.append(tc)
+
+    def test_crop(self):
+        insns = ["crand", "cror", "crnand", "crnor", "crxor", "creqv",
+                 "crandc", "crorc"]
+        for i in range(40):
+            choice = random.choice(insns)
+            ba = random.randint(0, 31)
+            bb = random.randint(0, 31)
+            bt = random.randint(0, 31)
+            lst = [f"{choice} {ba}, {bb}, {bt}"]
+            cr = random.randint(0, 7)
+            self.run_tst_program(Program(lst), initial_cr=cr)
+
+    def test_mcrf(self):
+        lst = ["mcrf 0, 5"]
+        cr = 0xffff0000
+        self.run_tst_program(Program(lst), initial_cr=cr)
+
+    def test_mtcrf(self):
+        for i in range(20):
+            mask = random.randint(0, 255)
+            lst = [f"mtcrf {mask}, 2"]
+            cr = random.randint(0, (1<<32)-1)
+            initial_regs = [0] * 32
+            initial_regs[2] = random.randint(0, (1<<32)-1)
+            self.run_tst_program(Program(lst), initial_regs=initial_regs,
+                                 initial_cr=cr)
+    def test_mtocrf(self):
+        for i in range(20):
+            mask = 1<<random.randint(0, 7)
+            lst = [f"mtocrf {mask}, 2"]
+            cr = random.randint(0, (1<<32)-1)
+            initial_regs = [0] * 32
+            initial_regs[2] = random.randint(0, (1<<32)-1)
+            self.run_tst_program(Program(lst), initial_regs=initial_regs,
+                                 initial_cr=cr)
+
+    def test_mfcr(self):
+        for i in range(5):
+            lst = ["mfcr 2"]
+            cr = random.randint(0, (1<<32)-1)
+            self.run_tst_program(Program(lst), initial_cr=cr)
+
+    def test_mfocrf(self):
+        for i in range(20):
+            mask = 1<<random.randint(0, 7)
+            lst = [f"mfocrf 2, {mask}"]
+            cr = random.randint(0, (1<<32)-1)
+            self.run_tst_program(Program(lst), initial_cr=cr)
+        
+
+    def test_ilang(self):
+        rec = CompALUOpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        alu = CRBasePipe(pspec)
+        ports = alu.ports()
+        vl = rtlil.convert(alu, ports=alu.ports())
+        with open("logical_pipeline.il", "w") as f:
+            f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+    def __init__(self, test_data):
+        super().__init__("run_all")
+        self.test_data = test_data
+
+    def set_inputs(self, alu, dec2, simulator):
+        yield alu.p.data_i.cr.eq(simulator.cr.get_range().value)
+
+        reg3_ok = yield dec2.e.read_reg3.ok
+        if reg3_ok:
+            reg3_sel = yield dec2.e.read_reg3.data
+            reg3 = simulator.gpr(reg3_sel).value
+            yield alu.p.data_i.a.eq(reg3)
+
+    def run_all(self):
+        m = Module()
+        comb = m.d.comb
+        instruction = Signal(32)
+
+        pdecode = create_pdecode()
+
+        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+        rec = CompALUOpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        m.submodules.alu = alu = CRBasePipe(pspec)
+
+        comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+        comb += alu.p.valid_i.eq(1)
+        comb += alu.n.ready_i.eq(1)
+        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+        sim = Simulator(m)
+
+        sim.add_clock(1e-6)
+        def process():
+            for test in self.test_data:
+                print(test.name)
+                program = test.program
+                self.subTest(test.name)
+                simulator = ISA(pdecode2, test.regs, test.sprs, test.cr)
+                gen = program.generate_instructions()
+                instructions = list(zip(gen, program.assembly.splitlines()))
+
+                index = simulator.pc.CIA.value//4
+                while index < len(instructions):
+                    ins, code = instructions[index]
+
+                    print("0x{:X}".format(ins & 0xffffffff))
+                    print(code)
+
+                    # ask the decoder to decode this binary data (endian'd)
+                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
+                    yield instruction.eq(ins)          # raw binary instr.
+                    yield Settle()
+                    yield from self.set_inputs(alu, pdecode2, simulator)
+                    fn_unit = yield pdecode2.e.fn_unit
+                    self.assertEqual(fn_unit, Function.CR.value, code)
+                    yield 
+                    opname = code.split(' ')[0]
+                    yield from simulator.call(opname)
+                    index = simulator.pc.CIA.value//4
+
+                    vld = yield alu.n.valid_o
+                    while not vld:
+                        yield
+                        vld = yield alu.n.valid_o
+                    yield
+                    cr_out = yield pdecode2.e.output_cr
+                    if cr_out:
+                        cr_expected = simulator.cr.get_range().value
+                        cr_real = yield alu.n.data_o.cr
+                        msg = f"real: {cr_expected:x}, actual: {cr_real:x}"
+                        msg += " code: %s" % code
+                        self.assertEqual(cr_expected, cr_real, msg)
+
+                    reg_out = yield pdecode2.e.write_reg.ok
+                    if reg_out:
+                        reg_sel = yield pdecode2.e.write_reg.data
+                        reg_data = simulator.gpr(reg_sel).value
+                        output = yield alu.n.data_o.o
+                        msg = f"real: {reg_data:x}, actual: {output:x}"
+                        self.assertEqual(reg_data, output)
+
+        sim.add_sync_process(process)
+        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+                            traces=[]):
+            sim.run()
+    def check_extra_alu_outputs(self, alu, dec2, sim):
+        rc = yield dec2.e.rc.data
+        if rc:
+            cr_expected = sim.crl[0].get_range().value
+            cr_actual = yield alu.n.data_o.cr0
+            self.assertEqual(cr_expected, cr_actual)
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(TestRunner(test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
diff --git a/src/soc/fu/logical/__init__.py b/src/soc/fu/logical/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/soc/fu/logical/bperm.py b/src/soc/fu/logical/bperm.py
new file mode 100644 (file)
index 0000000..674555b
--- /dev/null
@@ -0,0 +1,55 @@
+from nmigen import Elaboratable, Signal, Module, Repl, Cat, Const, Array
+from nmigen.cli import main
+
+
+class Bpermd(Elaboratable):
+    """This class does a Bit Permute on a Doubleword
+
+   X-form bpermd RA,RS,RB]
+
+   Eight permuted bits are produced. For each permuted bit i where i ranges
+   from 0 to 7 and for each byte i of RS, do the following. If byte i of RS
+   is less than 64, permuted bit i is setto the bit of RB specified by byte
+   i of RS; otherwise permuted bit i is set to 0. The  permuted  bits are
+   placed in the least-significantbyte of RA, and the remaining bits are
+   filled with 0s.
+   Special Registers Altered: None
+
+   Programming note:
+   The fact that the permuted bit is 0 if the corresponding index value
+   exceeds 63 permits the permuted bits to be selected from a 128-bit
+   quantity, using a single index register. For example, assume that the
+   128-bit quantity Q, from which the permuted bits are to be selected, is
+   in registers r2(high-order 64 bits of Q) and r3 (low-order 64 bits of Q),
+   that the index values are in register r1, with each byte of r1 containing
+   a value in the range 0:127, and that each byte of register r4 contains
+   the value 64. The following code sequence selects eight permuted bits
+   from Q and places them into the low-order byte of r6.
+    """
+
+    def __init__(self, width):
+        self.width = width
+        self.rs = Signal(width, reset_less=True)
+        self.ra = Signal(width, reset_less=True)
+        self.rb = Signal(width, reset_less=True)
+
+    def elaborate(self, platform):
+        m = Module()
+        perm = Signal(self.width, reset_less=True)
+        rb64 = [Signal(1, reset_less=True, name=f"rb64_{i}") for i in range(64)]
+        for i in range(64):
+            m.d.comb += rb64[i].eq(self.rb[i])
+        rb64 = Array(rb64)
+        for i in range(8):
+            index = self.rs[8*i:8*i+8]
+            idx = Signal(8, name=f"idx_{i}", reset_less=True)
+            m.d.comb += idx.eq(index)
+            with m.If(idx < 64):
+                m.d.comb += perm[i].eq(rb64[idx])
+        m.d.comb += self.ra[0:8].eq(perm)
+        return m
+
+
+if __name__ == "__main__":
+    bperm = Bpermd(width=64)
+    main(bperm, ports=[bperm.rs, bperm.ra, bperm.rb])
diff --git a/src/soc/fu/logical/formal/.gitignore b/src/soc/fu/logical/formal/.gitignore
new file mode 100644 (file)
index 0000000..150f68c
--- /dev/null
@@ -0,0 +1 @@
+*/*
diff --git a/src/soc/fu/logical/formal/proof_bperm.py b/src/soc/fu/logical/formal/proof_bperm.py
new file mode 100644 (file)
index 0000000..da19894
--- /dev/null
@@ -0,0 +1,125 @@
+# Proof of correctness for bit permute module
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+                    signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.logical.bperm import Bpermd
+
+import unittest
+
+
+# So formal verification is a little different than writing a test
+# case, as you're actually generating logic around your module to
+# check that it behaves a certain way. So here, I'm going to create a
+# module to put my formal assertions in
+class Driver(Elaboratable):
+    def __init__(self):
+        # We don't need any inputs and outputs here, so I won't
+        # declare any
+        pass
+
+    def elaborate(self, platform):
+        # standard stuff
+        m = Module()
+        comb = m.d.comb
+
+        # instantiate the device under test as a submodule
+        m.submodules.bperm = bperm = Bpermd(64)
+
+        # Grab the inputs and outputs of the DUT to make them more
+        # convenient to access
+        rs = bperm.rs
+        rb = bperm.rb
+        ra = bperm.ra
+
+        # Before we prove any properties about the DUT, we need to set
+        # up its inputs. There's a couple ways to do this, you could
+        # define some inputs and outputs for the driver module and
+        # wire them up to the DUT, but that's kind of a pain. The
+        # other option is to use AnyConst/AnySeq, which tells yosys
+        # that those inputs can take on any value.
+
+        # AnyConst should be used when the input should take on a
+        # random value, but that value should be constant throughout
+        # the test.
+        # AnySeq should be used when the input can change on every
+        # cycle
+
+        # Since this is a combinatorial circuit, it really doesn't
+        # matter which one you choose, so I chose AnyConst. If this
+        # was a sequential circuit, (especially a state machine) you'd
+        # want to use AnySeq
+        comb += [rs.eq(AnyConst(64)),
+                 rb.eq(AnyConst(64))]
+
+
+        # The pseudocode in the Power ISA manual (v3.1) is as follows:
+        # do i = 0 to 7
+        #    index <- RS[8*i:8*i+8]
+        #    if index < 64:
+        #        perm[i] <- RB[index]
+        #    else:
+        #        perm[i] <- 0
+        # RA <- 56'b0 || perm[0:8]  # big endian though
+
+        # Looking at this, I can identify 3 properties that the bperm
+        # module should keep:
+        #   1. RA[8:64] should always equal 0
+        #   2. If RB[i*8:i*8+8] >= 64 then RA[i] should equal 0
+        #   3. If RB[i*8:i*8+8] < 64 then RA[i] should RS[index]
+
+        # Now we need to Assert that the properties above hold:
+
+        # Property 1: RA[8:64] should always equal 0
+        comb += Assert(ra[8:] == 0)
+        # Notice how we're adding Assert to comb like it's a circuit?
+        # That's because it kind of is. If you run this proof and have
+        # yosys graph the ilang, you'll be able to see an equals
+        # comparison cell feeding into an assert cell
+
+        # Now we need to prove property #2. I'm going to leave this to
+        # you Cole. I'd start by writing a for loop and extracting the
+        # 8 indices into signals. Then I'd write an if statement
+        # checking if the index is >= 64 (it's hardware, so use an
+        # m.If()). Finally, I'd add an assert that checks whether
+        # ra[i] is equal to 0
+
+
+
+        return m
+
+
+class TestCase(FHDLTestCase):
+    # This bit here is actually in charge of running the formal
+    # proof. It has nmigen spit out the ilang, and feeds it to
+    # SymbiYosys to run the proof. If the proof fails, yosys will
+    # generate a .vcd file showing how it was able to violate your
+    # assertions in proof_bperm_formal/engine_0/trace.vcd. From that
+    # you should be able to figure out what went wrong, and either
+    # correct the assertion or fix the DUT
+    def test_formal(self):
+        module = Driver()
+        # This runs a Bounded Model Check on the driver module
+        # above. What that does is it starts at some initial state,
+        # and steps it through `depth` cycles, checking that the
+        # assertions hold at every cycle. Since this is a
+        # combinatorial module, it only needs 1 cycle to prove
+        # everything. 
+        self.assertFormal(module, mode="bmc", depth=2)
+        self.assertFormal(module, mode="cover", depth=2)
+
+    # As mentioned above, you can look at the graph in yosys and see
+    # all the assertion cells
+    def test_ilang(self):
+        dut = Driver()
+        vl = rtlil.convert(dut, ports=[])
+        with open("bperm.il", "w") as f:
+            f.write(vl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/logical/formal/proof_input_stage.py b/src/soc/fu/logical/formal/proof_input_stage.py
new file mode 100644 (file)
index 0000000..bb62fb6
--- /dev/null
@@ -0,0 +1,85 @@
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import Module, Signal, Elaboratable, Mux
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.input_stage import ALUInputStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+    def __init__(self):
+        # inputs and outputs
+        pass
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        rec = CompALUOpSubset()
+        recwidth = 0
+        # Setup random inputs for dut.op
+        for p in rec.ports():
+            width = p.width
+            recwidth += width
+            comb += p.eq(AnyConst(width))
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+        m.submodules.dut = dut = ALUInputStage(pspec)
+
+        a = Signal(64)
+        b = Signal(64)
+        comb += [dut.i.a.eq(a),
+                 dut.i.b.eq(b),
+                 a.eq(AnyConst(64)),
+                 b.eq(AnyConst(64))]
+                      
+
+        comb += dut.i.ctx.op.eq(rec)
+
+
+        # Assert that op gets copied from the input to output
+        for p in rec.ports():
+            name = p.name
+            rec_sig = p
+            dut_sig = getattr(dut.o.ctx.op, name)
+            comb += Assert(dut_sig == rec_sig)
+
+        with m.If(rec.invert_a):
+            comb += Assert(dut.o.a == ~a)
+        with m.Else():
+            comb += Assert(dut.o.a == a)
+
+        with m.If(rec.imm_data.imm_ok &
+                  ~(rec.insn_type == InternalOp.OP_RLC)):
+            comb += Assert(dut.o.b == rec.imm_data.imm)
+        with m.Else():
+            comb += Assert(dut.o.b == b)
+
+
+
+
+        return m
+
+class GTCombinerTestCase(FHDLTestCase):
+    def test_formal(self):
+        module = Driver()
+        self.assertFormal(module, mode="bmc", depth=4)
+        self.assertFormal(module, mode="cover", depth=4)
+    def test_ilang(self):
+        dut = Driver()
+        vl = rtlil.convert(dut, ports=[])
+        with open("input_stage.il", "w") as f:
+            f.write(vl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/logical/formal/proof_main_stage.py b/src/soc/fu/logical/formal/proof_main_stage.py
new file mode 100644 (file)
index 0000000..5ca9481
--- /dev/null
@@ -0,0 +1,92 @@
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+                    signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.logical.main_stage import LogicalMainStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+    def __init__(self):
+        # inputs and outputs
+        pass
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        rec = CompALUOpSubset()
+        recwidth = 0
+        # Setup random inputs for dut.op
+        for p in rec.ports():
+            width = p.width
+            recwidth += width
+            comb += p.eq(AnyConst(width))
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+        m.submodules.dut = dut = LogicalMainStage(pspec)
+
+        # convenience variables
+        a = dut.i.a
+        b = dut.i.b
+        carry_in = dut.i.carry_in
+        so_in = dut.i.so
+        carry_out = dut.o.carry_out
+        o = dut.o.o
+
+        # setup random inputs
+        comb += [a.eq(AnyConst(64)),
+                 b.eq(AnyConst(64)),
+                 carry_in.eq(AnyConst(1)),
+                 so_in.eq(AnyConst(1))]
+
+        comb += dut.i.ctx.op.eq(rec)
+
+        # Assert that op gets copied from the input to output
+        for rec_sig in rec.ports():
+            name = rec_sig.name
+            dut_sig = getattr(dut.o.ctx.op, name)
+            comb += Assert(dut_sig == rec_sig)
+
+        # signed and signed/32 versions of input a
+        a_signed = Signal(signed(64))
+        a_signed_32 = Signal(signed(32))
+        comb += a_signed.eq(a)
+        comb += a_signed_32.eq(a[0:32])
+
+        # main assertion of arithmetic operations
+        with m.Switch(rec.insn_type):
+            with m.Case(InternalOp.OP_AND):
+                comb += Assert(dut.o.o == a & b)
+            with m.Case(InternalOp.OP_OR):
+                comb += Assert(dut.o.o == a | b)
+            with m.Case(InternalOp.OP_XOR):
+                comb += Assert(dut.o.o == a ^ b)
+
+        return m
+
+
+class LogicalTestCase(FHDLTestCase):
+    def test_formal(self):
+        module = Driver()
+        self.assertFormal(module, mode="bmc", depth=2)
+        self.assertFormal(module, mode="cover", depth=2)
+    def test_ilang(self):
+        dut = Driver()
+        vl = rtlil.convert(dut, ports=[])
+        with open("main_stage.il", "w") as f:
+            f.write(vl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/logical/input_stage.py b/src/soc/fu/logical/input_stage.py
new file mode 100644 (file)
index 0000000..e6ab48e
--- /dev/null
@@ -0,0 +1,63 @@
+# This stage is intended to adjust the input data before sending it to
+# the acutal ALU. Things like handling inverting the input, carry_in
+# generation for subtraction, and handling of immediates should happen
+# here
+from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
+                    unsigned)
+from nmutil.pipemodbase import PipeModBase
+from soc.decoder.power_enums import InternalOp
+from soc.alu.pipe_data import ALUInputData
+from soc.decoder.power_enums import CryIn
+
+
+class ALUInputStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "input")
+
+    def ispec(self):
+        return ALUInputData(self.pspec)
+
+    def ospec(self):
+        return ALUInputData(self.pspec)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        ##### operand A #####
+
+        # operand a to be as-is or inverted
+        a = Signal.like(self.i.a)
+
+        with m.If(self.i.ctx.op.invert_a):
+            comb += a.eq(~self.i.a)
+        with m.Else():
+            comb += a.eq(self.i.a)
+
+        comb += self.o.a.eq(a)
+
+        ##### operand B #####
+
+        # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+        # remove this, just do self.o.b.eq(self.i.b) and move the
+        # immediate-detection into set_alu_inputs in the unit test
+        # If there's an immediate, set the B operand to that
+        comb += self.o.b.eq(self.i.b)
+
+        ##### carry-in #####
+
+        # either copy incoming carry or set to 1/0 as defined by op
+        with m.Switch(self.i.ctx.op.input_carry):
+            with m.Case(CryIn.ZERO):
+                comb += self.o.carry_in.eq(0)
+            with m.Case(CryIn.ONE):
+                comb += self.o.carry_in.eq(1)
+            with m.Case(CryIn.CA):
+                comb += self.o.carry_in.eq(self.i.carry_in)
+
+        ##### sticky overflow and context (both pass-through) #####
+
+        comb += self.o.so.eq(self.i.so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/logical/main_stage.py b/src/soc/fu/logical/main_stage.py
new file mode 100644 (file)
index 0000000..e740d07
--- /dev/null
@@ -0,0 +1,127 @@
+# This stage is intended to do most of the work of executing Logical
+# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ
+# however input and output stages also perform bit-negation on input(s)
+# and output, as well as carry and overflow generation.
+# This module however should not gate the carry or overflow, that's up
+# to the output stage
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.logical.pipe_data import ALUInputData
+from soc.alu.pipe_data import ALUOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+from soc.countzero.countzero import ZeroCounter
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+
+
+def array_of(count, bitwidth):
+    res = []
+    for i in range(count):
+        res.append(Signal(bitwidth, reset_less=True))
+    return res
+
+
+class LogicalMainStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "main")
+        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+        self.fields.create_specs()
+
+    def ispec(self):
+        return ALUInputData(self.pspec)
+
+    def ospec(self):
+        return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+        op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o
+
+        ##########################
+        # main switch for logic ops AND, OR and XOR, cmpb, parity, and popcount
+
+        with m.Switch(op.insn_type):
+
+            ###### AND, OR, XOR #######
+            with m.Case(InternalOp.OP_AND):
+                comb += o.eq(a & b)
+            with m.Case(InternalOp.OP_OR):
+                comb += o.eq(a | b)
+            with m.Case(InternalOp.OP_XOR):
+                comb += o.eq(a ^ b)
+
+            ###### cmpb #######
+            with m.Case(InternalOp.OP_CMPB):
+                l = []
+                for i in range(8):
+                    slc = slice(i*8, (i+1)*8)
+                    l.append(Repl(a[slc] == b[slc], 8))
+                comb += o.eq(Cat(*l))
+
+            ###### popcount #######
+            with m.Case(InternalOp.OP_POPCNT):
+                # starting from a, perform successive addition-reductions
+                # creating arrays big enough to store the sum, each time
+                pc = [a]
+                # QTY32 2-bit (to take 2x 1-bit sums) etc.
+                work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)]
+                for l, b in work:
+                    pc.append(array_of(l, b))
+                pc8 = pc[3]     # array of 8 8-bit counts (popcntb)
+                pc32 = pc[5]    # array of 2 32-bit counts (popcntw)
+                popcnt = pc[-1] # array of 1 64-bit count (popcntd)
+                # cascade-tree of adds
+                for idx, (l, b) in enumerate(work):
+                    for i in range(l):
+                        stt, end = i*2, i*2+1
+                        src, dst = pc[idx], pc[idx+1]
+                        comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
+                                          Cat(src[end], Const(0, 1)))
+                # decode operation length
+                with m.If(op.data_len[2:4] == 0b00):
+                    # popcntb - pack 8x 4-bit answers into output
+                    for i in range(8):
+                        comb += o[i*8:i*8+4].eq(pc8[i])
+                with m.Elif(op.data_len[3] == 0):
+                    # popcntw - pack 2x 5-bit answers into output
+                    for i in range(2):
+                        comb += o[i*32:i*32+5].eq(pc32[i])
+                with m.Else():
+                    # popcntd - put 1x 6-bit answer into output
+                    comb += o.eq(popcnt[0])
+
+            ###### parity #######
+            with m.Case(InternalOp.OP_PRTY):
+                # strange instruction which XORs together the LSBs of each byte
+                par0 = Signal(reset_less=True)
+                par1 = Signal(reset_less=True)
+                comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor())
+                comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor())
+                with m.If(op.data_len[3] == 1):
+                    comb += o.eq(par0 ^ par1)
+                with m.Else():
+                    comb += o[0].eq(par0)
+                    comb += o[32].eq(par1)
+
+            ###### cntlz #######
+            with m.Case(InternalOp.OP_CNTZ):
+                XO = self.fields.FormX.XO[0:-1]
+                m.submodules.countz = countz = ZeroCounter()
+                comb += countz.rs_i.eq(a)
+                comb += countz.is_32bit_i.eq(op.is_32bit)
+                comb += countz.count_right_i.eq(XO[-1])
+                comb += o.eq(countz.result_o)
+
+            ###### bpermd #######
+            # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt
+
+        ###### sticky overflow and context, both pass-through #####
+
+        comb += self.o.so.eq(self.i.so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/logical/pipe_data.py b/src/soc/fu/logical/pipe_data.py
new file mode 100644 (file)
index 0000000..4bf064f
--- /dev/null
@@ -0,0 +1,25 @@
+from nmigen import Signal, Const
+from ieee754.fpcommon.getop import FPPipeContext
+from soc.alu.pipe_data import IntegerData
+
+
+class ALUInputData(IntegerData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+        self.a = Signal(64, reset_less=True) # RA
+        self.b = Signal(64, reset_less=True) # RB/immediate
+        self.so = Signal(reset_less=True)
+        self.carry_in = Signal(reset_less=True)
+
+    def __iter__(self):
+        yield from super().__iter__()
+        yield self.a
+        yield self.b
+        yield self.carry_in
+        yield self.so
+
+    def eq(self, i):
+        lst = super().eq(i)
+        return lst + [self.a.eq(i.a), self.b.eq(i.b),
+                      self.carry_in.eq(i.carry_in),
+                      self.so.eq(i.so)]
diff --git a/src/soc/fu/logical/pipeline.py b/src/soc/fu/logical/pipeline.py
new file mode 100644 (file)
index 0000000..f3c8327
--- /dev/null
@@ -0,0 +1,25 @@
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.alu.input_stage import ALUInputStage
+from soc.logical.main_stage import LogicalMainStage
+from soc.alu.output_stage import ALUOutputStage
+
+class LogicalStages(PipeModBaseChain):
+    def get_chain(self):
+        inp = ALUInputStage(self.pspec)
+        main = LogicalMainStage(self.pspec)
+        out = ALUOutputStage(self.pspec)
+        return [inp, main, out]
+
+
+class LogicalBasePipe(ControlBase):
+    def __init__(self, pspec):
+        ControlBase.__init__(self)
+        self.pipe1 = LogicalStages(pspec)
+        self._eqs = self.connect([self.pipe1])
+
+    def elaborate(self, platform):
+        m = ControlBase.elaborate(self, platform)
+        m.submodules.pipe = self.pipe1
+        m.d.comb += self._eqs
+        return m
diff --git a/src/soc/fu/logical/test/test_bperm.py b/src/soc/fu/logical/test/test_bperm.py
new file mode 100644 (file)
index 0000000..7a742b0
--- /dev/null
@@ -0,0 +1 @@
+'''Empty until I write the unit test'''
diff --git a/src/soc/fu/logical/test/test_pipe_caller.py b/src/soc/fu/logical/test/test_pipe_caller.py
new file mode 100644 (file)
index 0000000..79c1e29
--- /dev/null
@@ -0,0 +1,262 @@
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.logical.pipeline import LogicalBasePipe
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+
+class TestCase:
+    def __init__(self, program, regs, sprs, name):
+        self.program = program
+        self.regs = regs
+        self.sprs = sprs
+        self.name = name
+
+def get_rec_width(rec):
+    recwidth = 0
+    # Setup random inputs for dut.op
+    for p in rec.ports():
+        width = p.width
+        recwidth += width
+    return recwidth
+
+def set_alu_inputs(alu, dec2, sim):
+    # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+    # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
+    # and place it into data_i.b
+
+    reg3_ok = yield dec2.e.read_reg3.ok
+    reg1_ok = yield dec2.e.read_reg1.ok
+    assert reg3_ok != reg1_ok
+    if reg3_ok:
+        data1 = yield dec2.e.read_reg3.data
+        data1 = sim.gpr(data1).value
+    elif reg1_ok:
+        data1 = yield dec2.e.read_reg1.data
+        data1 = sim.gpr(data1).value
+    else:
+        data1 = 0
+
+    yield alu.p.data_i.a.eq(data1)
+
+    # If there's an immediate, set the B operand to that
+    reg2_ok = yield dec2.e.read_reg2.ok
+    imm_ok = yield dec2.e.imm_data.imm_ok
+    if imm_ok:
+        data2 = yield dec2.e.imm_data.imm
+    elif reg2_ok:
+        data2 = yield dec2.e.read_reg2.data
+        data2 = sim.gpr(data2).value
+    else:
+        data2 = 0
+    yield alu.p.data_i.b.eq(data2)
+
+
+
+def set_extra_alu_inputs(alu, dec2, sim):
+    carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
+    yield alu.p.data_i.carry_in.eq(carry)
+    so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+    yield alu.p.data_i.so.eq(so)
+    
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class LogicalTestCase(FHDLTestCase):
+    def __init__(self, name):
+        super().__init__(name)
+        self.test_name = name
+    def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
+        tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
+        test_data.append(tc)
+
+    def test_rand(self):
+        insns = ["and", "or", "xor"]
+        for i in range(40):
+            choice = random.choice(insns)
+            lst = [f"{choice} 3, 1, 2"]
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[2] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_rand_imm_logical(self):
+        insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"]
+        for i in range(10):
+            choice = random.choice(insns)
+            imm = random.randint(0, (1<<16)-1)
+            lst = [f"{choice} 3, 1, {imm}"]
+            print(lst)
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    @unittest.skip("broken")
+    def test_cntz(self):
+        insns = ["cntlzd", "cnttzd"]
+        for i in range(10):
+            choice = random.choice(insns)
+            lst = [f"{choice} 3, 1"]
+            print(lst)
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_parity(self):
+        insns = ["prtyw", "prtyd"]
+        for i in range(10):
+            choice = random.choice(insns)
+            lst = [f"{choice} 3, 1"]
+            print(lst)
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_popcnt(self):
+        insns = ["popcntb", "popcntw", "popcntd"]
+        for i in range(10):
+            choice = random.choice(insns)
+            lst = [f"{choice} 3, 1"]
+            print(lst)
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_cmpb(self):
+        lst = ["cmpb 3, 1, 2"]
+        initial_regs = [0] * 32
+        initial_regs[1] = 0xdeadbeefcafec0de
+        initial_regs[2] = 0xd0adb0000afec1de
+        self.run_tst_program(Program(lst), initial_regs)
+
+    def test_ilang(self):
+        rec = CompALUOpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        alu = LogicalBasePipe(pspec)
+        vl = rtlil.convert(alu, ports=alu.ports())
+        with open("logical_pipeline.il", "w") as f:
+            f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+    def __init__(self, test_data):
+        super().__init__("run_all")
+        self.test_data = test_data
+
+    def run_all(self):
+        m = Module()
+        comb = m.d.comb
+        instruction = Signal(32)
+
+        pdecode = create_pdecode()
+
+        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+        rec = CompALUOpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        m.submodules.alu = alu = LogicalBasePipe(pspec)
+
+        comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+        comb += alu.p.valid_i.eq(1)
+        comb += alu.n.ready_i.eq(1)
+        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+        sim = Simulator(m)
+
+        sim.add_clock(1e-6)
+        def process():
+            for test in self.test_data:
+                print(test.name)
+                program = test.program
+                self.subTest(test.name)
+                simulator = ISA(pdecode2, test.regs, test.sprs, 0)
+                gen = program.generate_instructions()
+                instructions = list(zip(gen, program.assembly.splitlines()))
+
+                index = simulator.pc.CIA.value//4
+                while index < len(instructions):
+                    ins, code = instructions[index]
+
+                    print("0x{:X}".format(ins & 0xffffffff))
+                    print(code)
+
+                    # ask the decoder to decode this binary data (endian'd)
+                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
+                    yield instruction.eq(ins)          # raw binary instr.
+                    yield Settle()
+                    fn_unit = yield pdecode2.e.fn_unit
+                    self.assertEqual(fn_unit, Function.LOGICAL.value, code)
+                    yield from set_alu_inputs(alu, pdecode2, simulator)
+                    yield from set_extra_alu_inputs(alu, pdecode2, simulator)
+                    yield 
+                    opname = code.split(' ')[0]
+                    yield from simulator.call(opname)
+                    index = simulator.pc.CIA.value//4
+
+                    vld = yield alu.n.valid_o
+                    while not vld:
+                        yield
+                        vld = yield alu.n.valid_o
+                    yield
+                    alu_out = yield alu.n.data_o.o
+                    out_reg_valid = yield pdecode2.e.write_reg.ok
+                    if out_reg_valid:
+                        write_reg_idx = yield pdecode2.e.write_reg.data
+                        expected = simulator.gpr(write_reg_idx).value
+                        print(f"expected {expected:x}, actual: {alu_out:x}")
+                        self.assertEqual(expected, alu_out, code)
+                    yield from self.check_extra_alu_outputs(alu, pdecode2,
+                                                            simulator)
+
+        sim.add_sync_process(process)
+        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+                            traces=[]):
+            sim.run()
+    def check_extra_alu_outputs(self, alu, dec2, sim):
+        rc = yield dec2.e.rc.data
+        if rc:
+            cr_expected = sim.crl[0].get_range().value
+            cr_actual = yield alu.n.data_o.cr0
+            self.assertEqual(cr_expected, cr_actual)
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(TestRunner(test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
diff --git a/src/soc/fu/shift_rot/formal/.gitignore b/src/soc/fu/shift_rot/formal/.gitignore
new file mode 100644 (file)
index 0000000..150f68c
--- /dev/null
@@ -0,0 +1 @@
+*/*
diff --git a/src/soc/fu/shift_rot/formal/proof_main_stage.py b/src/soc/fu/shift_rot/formal/proof_main_stage.py
new file mode 100644 (file)
index 0000000..50264d5
--- /dev/null
@@ -0,0 +1,108 @@
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+                    signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.shift_rot.main_stage import ShiftRotMainStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+    def __init__(self):
+        # inputs and outputs
+        pass
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        rec = CompALUOpSubset()
+        recwidth = 0
+        # Setup random inputs for dut.op
+        for p in rec.ports():
+            width = p.width
+            recwidth += width
+            comb += p.eq(AnyConst(width))
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+        m.submodules.dut = dut = ShiftRotMainStage(pspec)
+
+        # convenience variables
+        a = dut.i.rs
+        b = dut.i.rb
+        ra = dut.i.ra
+        carry_in = dut.i.carry_in
+        so_in = dut.i.so
+        carry_out = dut.o.carry_out
+        o = dut.o.o
+
+        # setup random inputs
+        comb += [a.eq(AnyConst(64)),
+                 b.eq(AnyConst(64)),
+                 carry_in.eq(AnyConst(1)),
+                 so_in.eq(AnyConst(1))]
+
+        comb += dut.i.ctx.op.eq(rec)
+
+        # Assert that op gets copied from the input to output
+        for rec_sig in rec.ports():
+            name = rec_sig.name
+            dut_sig = getattr(dut.o.ctx.op, name)
+            comb += Assert(dut_sig == rec_sig)
+
+        # signed and signed/32 versions of input a
+        a_signed = Signal(signed(64))
+        a_signed_32 = Signal(signed(32))
+        comb += a_signed.eq(a)
+        comb += a_signed_32.eq(a[0:32])
+
+        # main assertion of arithmetic operations
+        with m.Switch(rec.insn_type):
+            with m.Case(InternalOp.OP_SHL):
+                comb += Assume(ra == 0)
+                with m.If(rec.is_32bit):
+                    comb += Assert(o[0:32] == ((a << b[0:6]) & 0xffffffff))
+                    comb += Assert(o[32:64] == 0)
+                with m.Else():
+                    comb += Assert(o == ((a << b[0:7]) & ((1 << 64)-1)))
+            with m.Case(InternalOp.OP_SHR):
+                comb += Assume(ra == 0)
+                with m.If(~rec.is_signed):
+                    with m.If(rec.is_32bit):
+                        comb += Assert(o[0:32] == (a[0:32] >> b[0:6]))
+                        comb += Assert(o[32:64] == 0)
+                    with m.Else():
+                        comb += Assert(o == (a >> b[0:7]))
+                with m.Else():
+                    with m.If(rec.is_32bit):
+                        comb += Assert(o[0:32] == (a_signed_32 >> b[0:6]))
+                        comb += Assert(o[32:64] == Repl(a[31], 32))
+                    with m.Else():
+                        comb += Assert(o == (a_signed >> b[0:7]))
+
+        return m
+
+
+class ALUTestCase(FHDLTestCase):
+    def test_formal(self):
+        module = Driver()
+        self.assertFormal(module, mode="bmc", depth=2)
+        self.assertFormal(module, mode="cover", depth=2)
+    def test_ilang(self):
+        dut = Driver()
+        vl = rtlil.convert(dut, ports=[])
+        with open("main_stage.il", "w") as f:
+            f.write(vl)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/shift_rot/input_stage.py b/src/soc/fu/shift_rot/input_stage.py
new file mode 100644 (file)
index 0000000..72e4c92
--- /dev/null
@@ -0,0 +1,58 @@
+# This stage is intended to adjust the input data before sending it to
+# the acutal ALU. Things like handling inverting the input, carry_in
+# generation for subtraction, and handling of immediates should happen
+# here
+from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
+                    unsigned)
+from nmutil.pipemodbase import PipeModBase
+from soc.decoder.power_enums import InternalOp
+from soc.shift_rot.pipe_data import ShiftRotInputData
+from soc.decoder.power_enums import CryIn
+
+
+class ShiftRotInputStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "input")
+
+    def ispec(self):
+        return ShiftRotInputData(self.pspec)
+
+    def ospec(self):
+        return ShiftRotInputData(self.pspec)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        ##### operand A #####
+
+        # operand a to be as-is or inverted
+        a = Signal.like(self.i.ra)
+
+        with m.If(self.i.ctx.op.invert_a):
+            comb += a.eq(~self.i.ra)
+        with m.Else():
+            comb += a.eq(self.i.ra)
+
+        comb += self.o.ra.eq(a)
+        comb += self.o.rb.eq(self.i.rb)
+        comb += self.o.rs.eq(self.i.rs)
+
+
+        ##### carry-in #####
+
+        # either copy incoming carry or set to 1/0 as defined by op
+        with m.Switch(self.i.ctx.op.input_carry):
+            with m.Case(CryIn.ZERO):
+                comb += self.o.carry_in.eq(0)
+            with m.Case(CryIn.ONE):
+                comb += self.o.carry_in.eq(1)
+            with m.Case(CryIn.CA):
+                comb += self.o.carry_in.eq(self.i.carry_in)
+
+        ##### sticky overflow and context (both pass-through) #####
+
+        comb += self.o.so.eq(self.i.so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/shift_rot/main_stage.py b/src/soc/fu/shift_rot/main_stage.py
new file mode 100644 (file)
index 0000000..f237528
--- /dev/null
@@ -0,0 +1,78 @@
+# This stage is intended to do most of the work of executing shift
+# instructions, as well as carry and overflow generation. This module
+# however should not gate the carry or overflow, that's up to the
+# output stage
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
+from nmutil.pipemodbase import PipeModBase
+from soc.alu.pipe_data import ALUOutputData
+from soc.shift_rot.pipe_data import ShiftRotInputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+from soc.shift_rot.rotator import Rotator
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+
+
+class ShiftRotMainStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "main")
+        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+        self.fields.create_specs()
+
+    def ispec(self):
+        return ShiftRotInputData(self.pspec)
+
+    def ospec(self):
+        return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        # obtain me and mb fields from instruction.
+        m_fields = self.fields.instrs['M']
+        md_fields = self.fields.instrs['MD']
+        mb = Signal(m_fields['MB'][0:-1].shape())
+        me = Signal(m_fields['ME'][0:-1].shape())
+        mb_extra = Signal(1, reset_less=True)
+        comb += mb.eq(m_fields['MB'][0:-1])
+        comb += me.eq(m_fields['ME'][0:-1])
+        comb += mb_extra.eq(md_fields['mb'][0:-1][0])
+
+        # set up microwatt rotator module
+        m.submodules.rotator = rotator = Rotator()
+        comb += [
+            rotator.me.eq(me),
+            rotator.mb.eq(mb),
+            rotator.mb_extra.eq(mb_extra),
+            rotator.rs.eq(self.i.rs),
+            rotator.ra.eq(self.i.ra),
+            rotator.shift.eq(self.i.rb),
+            rotator.is_32bit.eq(self.i.ctx.op.is_32bit),
+            rotator.arith.eq(self.i.ctx.op.is_signed),
+        ]
+
+        # instruction rotate type
+        mode = Signal(3, reset_less=True)
+        with m.Switch(self.i.ctx.op.insn_type):
+            with m.Case(InternalOp.OP_SHL):  comb += mode.eq(0b000)
+            with m.Case(InternalOp.OP_SHR):  comb += mode.eq(0b001) # R-shift
+            with m.Case(InternalOp.OP_RLC):  comb += mode.eq(0b110) # clear LR
+            with m.Case(InternalOp.OP_RLCL): comb += mode.eq(0b010) # clear L
+            with m.Case(InternalOp.OP_RLCR): comb += mode.eq(0b100) # clear R
+
+        comb += Cat(rotator.right_shift,
+                    rotator.clear_left,
+                    rotator.clear_right).eq(mode)
+                
+        # outputs from the microwatt rotator module
+        comb += [self.o.o.eq(rotator.result_o),
+                 self.o.carry_out.eq(rotator.carry_out_o)]
+
+        ###### sticky overflow and context, both pass-through #####
+
+        comb += self.o.so.eq(self.i.so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/shift_rot/maskgen.py b/src/soc/fu/shift_rot/maskgen.py
new file mode 100644 (file)
index 0000000..89246e0
--- /dev/null
@@ -0,0 +1,47 @@
+from nmigen import (Elaboratable, Signal, Module)
+import math
+
+class MaskGen(Elaboratable):
+    """MaskGen - create a diff mask
+
+    example: x=5 --> a=0b11111
+             y=3 --> b=0b00111
+             o:        0b11000
+             x=2 --> a=0b00011
+             y=4 --> b=0b01111
+             o:        0b10011
+    """
+    def __init__(self, width):
+        self.width = width
+        self.shiftwidth = math.ceil(math.log2(width))
+        self.mb = Signal(self.shiftwidth, reset_less=True)
+        self.me = Signal(self.shiftwidth, reset_less=True)
+
+        self.o = Signal(width, reset_less=True)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        x = Signal.like(self.mb)
+        y = Signal.like(self.mb)
+
+        comb += x.eq(64 - self.mb)
+        comb += y.eq(63 - self.me)
+
+        mask_a = Signal.like(self.o)
+        mask_b = Signal.like(self.o)
+
+        comb += mask_a.eq((1<<x) - 1)
+        comb += mask_b.eq((1<<y) - 1)
+
+        with m.If(x > y):
+            comb += self.o.eq(mask_a ^ mask_b)
+        with m.Else():
+            comb += self.o.eq(mask_a ^ ~mask_b)
+            
+
+        return m
+
+    def ports(self):
+        return [self.mb, self.me, self.o]
diff --git a/src/soc/fu/shift_rot/pipe_data.py b/src/soc/fu/shift_rot/pipe_data.py
new file mode 100644 (file)
index 0000000..7f98d16
--- /dev/null
@@ -0,0 +1,30 @@
+from nmigen import Signal, Const
+from nmutil.dynamicpipe import SimpleHandshakeRedir
+from soc.alu.alu_input_record import CompALUOpSubset
+from ieee754.fpcommon.getop import FPPipeContext
+from soc.alu.pipe_data import IntegerData
+
+
+class ShiftRotInputData(IntegerData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+        self.ra = Signal(64, reset_less=True) # RA
+        self.rs = Signal(64, reset_less=True) # RS
+        self.rb = Signal(64, reset_less=True) # RB/immediate
+        self.so = Signal(reset_less=True)
+        self.carry_in = Signal(reset_less=True)
+
+    def __iter__(self):
+        yield from super().__iter__()
+        yield self.ra
+        yield self.rs
+        yield self.rb
+        yield self.carry_in
+        yield self.so
+
+    def eq(self, i):
+        lst = super().eq(i)
+        return lst + [self.rs.eq(i.rs), self.ra.eq(i.ra),
+                      self.rb.eq(i.rb),
+                      self.carry_in.eq(i.carry_in),
+                      self.so.eq(i.so)]
diff --git a/src/soc/fu/shift_rot/pipeline.py b/src/soc/fu/shift_rot/pipeline.py
new file mode 100644 (file)
index 0000000..1080aa8
--- /dev/null
@@ -0,0 +1,25 @@
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.shift_rot.input_stage import ShiftRotInputStage
+from soc.shift_rot.main_stage import ShiftRotMainStage
+from soc.alu.output_stage import ALUOutputStage
+
+class ShiftRotStages(PipeModBaseChain):
+    def get_chain(self):
+        inp = ShiftRotInputStage(self.pspec)
+        main = ShiftRotMainStage(self.pspec)
+        out = ALUOutputStage(self.pspec)
+        return [inp, main, out]
+
+
+class ShiftRotBasePipe(ControlBase):
+    def __init__(self, pspec):
+        ControlBase.__init__(self)
+        self.pipe1 = ShiftRotStages(pspec)
+        self._eqs = self.connect([self.pipe1])
+
+    def elaborate(self, platform):
+        m = ControlBase.elaborate(self, platform)
+        m.submodules.pipe = self.pipe1
+        m.d.comb += self._eqs
+        return m
diff --git a/src/soc/fu/shift_rot/rotator.py b/src/soc/fu/shift_rot/rotator.py
new file mode 100644 (file)
index 0000000..23aa0e4
--- /dev/null
@@ -0,0 +1,156 @@
+# Manual translation and adaptation of rotator.vhdl from microwatt into nmigen
+#
+
+from nmigen import (Elaboratable, Signal, Module, Const, Cat,
+                    unsigned, signed)
+from soc.shift_rot.rotl import ROTL
+
+# note BE bit numbering
+def right_mask(m, mask_begin):
+    ret = Signal(64, name="right_mask", reset_less=True)
+    with m.If(mask_begin <= 64):
+        m.d.comb += ret.eq((1<<(64-mask_begin)) - 1)
+    return ret
+
+def left_mask(m, mask_end):
+    ret = Signal(64, name="left_mask", reset_less=True)
+    m.d.comb += ret.eq(~((1<<(63-mask_end)) - 1))
+    return ret
+
+
+class Rotator(Elaboratable):
+    """Rotator: covers multiple POWER9 rotate functions
+
+        supported modes:
+
+        * sl[wd]
+        * rlw*, rldic, rldicr, rldimi
+        * rldicl, sr[wd]
+        * sra[wd][i]
+
+        use as follows:
+
+        * shift = RB[0:7]
+        * arith = 1 when is_signed
+        * right_shift = 1 when insn_type is OP_SHR
+        * clear_left = 1 when insn_type is OP_RLC or OP_RLCL
+        * clear_right = 1 when insn_type is OP_RLC or OP_RLCR
+    """
+    def __init__(self):
+        # input
+        self.me = Signal(5, reset_less=True)        # ME field
+        self.mb = Signal(5, reset_less=True)        # MB field
+        self.mb_extra = Signal(1, reset_less=True)  # extra bit of mb in MD-form
+        self.ra = Signal(64, reset_less=True)       # RA
+        self.rs = Signal(64, reset_less=True)       # RS
+        self.ra = Signal(64, reset_less=True)       # RA
+        self.shift = Signal(7, reset_less=True)     # RB[0:7]
+        self.is_32bit = Signal(reset_less=True)
+        self.right_shift = Signal(reset_less=True)
+        self.arith = Signal(reset_less=True)
+        self.clear_left = Signal(reset_less=True)
+        self.clear_right = Signal(reset_less=True)
+        # output
+        self.result_o = Signal(64, reset_less=True)
+        self.carry_out_o = Signal(reset_less=True)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+        ra, rs = self.ra, self.rs
+
+        # temporaries
+        rot_in = Signal(64, reset_less=True)
+        rot_count = Signal(6, reset_less=True)
+        rot = Signal(64, reset_less=True)
+        sh = Signal(7, reset_less=True)
+        mb = Signal(7, reset_less=True)
+        me = Signal(7, reset_less=True)
+        mr = Signal(64, reset_less=True)
+        ml = Signal(64, reset_less=True)
+        output_mode = Signal(2, reset_less=True)
+
+        # First replicate bottom 32 bits to both halves if 32-bit
+        comb += rot_in[0:32].eq(rs[0:32])
+        with m.If(self.is_32bit):
+            comb += rot_in[32:64].eq(rs[0:32])
+        with m.Else():
+            comb += rot_in[32:64].eq(rs[32:64])
+
+        shift_signed = Signal(signed(6))
+        comb += shift_signed.eq(self.shift[0:6])
+
+        # Negate shift count for right shifts
+        with m.If(self.right_shift):
+            comb += rot_count.eq(-shift_signed)
+        with m.Else():
+            comb += rot_count.eq(self.shift[0:6])
+
+        # ROTL submodule
+        m.submodules.rotl = rotl = ROTL(64)
+        comb += rotl.a.eq(rot_in)
+        comb += rotl.b.eq(rot_count)
+        comb += rot.eq(rotl.o)
+
+        # Trim shift count to 6 bits for 32-bit shifts
+        comb += sh.eq(Cat(self.shift[0:6], self.shift[6] & ~self.is_32bit))
+
+        # XXX errr... we should already have these, in Fields?  oh well
+        # Work out mask begin/end indexes (caution, big-endian bit numbering)
+
+        # mask-begin (mb)
+        with m.If(self.clear_left):
+            comb += mb.eq(self.mb)
+            with m.If(self.is_32bit):
+                comb += mb[5:7].eq(Const(0b01, 2))
+            with m.Else():
+                comb += mb[5:7].eq(Cat(self.mb_extra, Const(0b0, 1)))
+        with m.Elif(self.right_shift):
+            # this is basically mb = sh + (is_32bit? 32: 0);
+            comb += mb.eq(sh)
+            with m.If(self.is_32bit):
+                comb += mb[5:7].eq(Cat(~sh[5], sh[5]))
+        with m.Else():
+            comb += mb.eq(Cat(Const(0b0, 5), self.is_32bit, Const(0b0, 1)))
+
+        # mask-end (me)
+        with m.If(self.clear_right & self.is_32bit):
+            # TODO: track down where this is.  have to use fields.
+            comb += me.eq(Cat(self.me, Const(0b01, 2)))
+        with m.Elif(self.clear_right & ~self.clear_left):
+            # this is me, have to use fields
+            comb += me.eq(Cat(self.mb, self.mb_extra, Const(0b0, 1)))
+        with m.Else():
+            # effectively, 63 - sh
+            comb += me.eq(Cat(~sh[0:6], sh[6]))
+
+        # Calculate left and right masks
+        comb += mr.eq(right_mask(m, mb))
+        comb += ml.eq(left_mask(m, me))
+
+        # Work out output mode
+        # 00 for sl[wd]
+        # 0w for rlw*, rldic, rldicr, rldimi, where w = 1 iff mb > me
+        # 10 for rldicl, sr[wd]
+        # 1z for sra[wd][i], z = 1 if rs is negative
+        with m.If((self.clear_left & ~self.clear_right) | self.right_shift):
+            comb += output_mode.eq(Cat(self.arith & rot_in[63], Const(1, 1)))
+        with m.Else():
+            mbgt = self.clear_right & (mb[0:6] > me[0:6])
+            comb += output_mode.eq(Cat(mbgt, Const(0, 1)))
+
+        # Generate output from rotated input and masks
+        with m.Switch(output_mode):
+            with m.Case(0b00):
+                comb += self.result_o.eq((rot & (mr & ml)) | (ra & ~(mr & ml)))
+            with m.Case(0b01):
+                comb += self.result_o.eq((rot & (mr | ml)) | (ra & ~(mr | ml)))
+            with m.Case(0b10):
+                comb += self.result_o.eq(rot & mr)
+            with m.Case(0b11):
+                comb += self.result_o.eq(rot | ~mr)
+                # Generate carry output for arithmetic shift right of -ve value
+                comb += self.carry_out_o.eq(rs & ~ml)
+
+        return m
+
diff --git a/src/soc/fu/shift_rot/rotl.py b/src/soc/fu/shift_rot/rotl.py
new file mode 100644 (file)
index 0000000..d2ebfcf
--- /dev/null
@@ -0,0 +1,24 @@
+from nmigen import (Elaboratable, Signal, Module)
+import math
+
+class ROTL(Elaboratable):
+    def __init__(self, width):
+        self.width = width
+        self.shiftwidth = math.ceil(math.log2(width))
+        self.a = Signal(width, reset_less=True)
+        self.b = Signal(self.shiftwidth, reset_less=True)
+
+        self.o = Signal(width, reset_less=True)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        shl = Signal.like(self.a)
+        shr = Signal.like(self.a)
+
+        comb += shl.eq(self.a << self.b)
+        comb += shr.eq(self.a >> (self.width - self.b))
+
+        comb += self.o.eq(shl | shr)
+        return m
diff --git a/src/soc/fu/shift_rot/test/test_maskgen.py b/src/soc/fu/shift_rot/test/test_maskgen.py
new file mode 100644 (file)
index 0000000..1a4d34e
--- /dev/null
@@ -0,0 +1,48 @@
+from nmigen import Signal, Module
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+from soc.alu.maskgen import MaskGen
+from soc.decoder.helpers import MASK
+import random
+import unittest
+
+class MaskGenTestCase(FHDLTestCase):
+    def test_maskgen(self):
+        m = Module()
+        comb = m.d.comb
+        m.submodules.dut = dut = MaskGen(64)
+        mb = Signal.like(dut.mb)
+        me = Signal.like(dut.me)
+        o = Signal.like(dut.o)
+
+        comb += [
+            dut.mb.eq(mb),
+            dut.me.eq(me),
+            o.eq(dut.o)]
+
+        sim = Simulator(m)
+
+        def process():
+            for x in range(0, 64):
+                for y in range(0, 64):
+                    yield mb.eq(x)
+                    yield me.eq(y)
+                    yield Delay(1e-6)
+
+                    expected = MASK(x, y)
+                    result = yield o
+                    self.assertEqual(expected, result)
+
+        sim.add_process(process) # or sim.add_sync_process(process), see below
+        with sim.write_vcd("maskgen.vcd", "maskgen.gtkw", traces=dut.ports()):
+            sim.run()
+
+    def test_ilang(self):
+        dut = MaskGen(64)
+        vl = rtlil.convert(dut, ports=dut.ports())
+        with open("maskgen.il", "w") as f:
+            f.write(vl)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/soc/fu/shift_rot/test/test_pipe_caller.py b/src/soc/fu/shift_rot/test/test_pipe_caller.py
new file mode 100644 (file)
index 0000000..dbd4092
--- /dev/null
@@ -0,0 +1,279 @@
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.shift_rot.pipeline import ShiftRotBasePipe
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+class TestCase:
+    def __init__(self, program, regs, sprs, name):
+        self.program = program
+        self.regs = regs
+        self.sprs = sprs
+        self.name = name
+
+def get_rec_width(rec):
+    recwidth = 0
+    # Setup random inputs for dut.op
+    for p in rec.ports():
+        width = p.width
+        recwidth += width
+    return recwidth
+
+def set_alu_inputs(alu, dec2, sim):
+    inputs = []
+    # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+    # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
+    # and place it into data_i.b
+
+    reg3_ok = yield dec2.e.read_reg3.ok
+    if reg3_ok:
+        reg3_sel = yield dec2.e.read_reg3.data
+        data3 = sim.gpr(reg3_sel).value
+    else:
+        data3 = 0
+    reg1_ok = yield dec2.e.read_reg1.ok
+    if reg1_ok:
+        reg1_sel = yield dec2.e.read_reg1.data
+        data1 = sim.gpr(reg1_sel).value
+    else:
+        data1 = 0
+    reg2_ok = yield dec2.e.read_reg2.ok
+    imm_ok = yield dec2.e.imm_data.ok
+    if reg2_ok:
+        reg2_sel = yield dec2.e.read_reg2.data
+        data2 = sim.gpr(reg2_sel).value
+    elif imm_ok:
+        data2 = yield dec2.e.imm_data.imm
+    else:
+        data2 = 0
+
+    yield alu.p.data_i.ra.eq(data1)
+    yield alu.p.data_i.rb.eq(data2)
+    yield alu.p.data_i.rs.eq(data3)
+
+
+def set_extra_alu_inputs(alu, dec2, sim):
+    carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
+    yield alu.p.data_i.carry_in.eq(carry)
+    so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+    yield alu.p.data_i.so.eq(so)
+    
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class ALUTestCase(FHDLTestCase):
+    def __init__(self, name):
+        super().__init__(name)
+        self.test_name = name
+    def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
+        tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
+        test_data.append(tc)
+
+
+    def test_shift(self):
+        insns = ["slw", "sld", "srw", "srd", "sraw", "srad"]
+        for i in range(20):
+            choice = random.choice(insns)
+            lst = [f"{choice} 3, 1, 2"]
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[2] = random.randint(0, 63)
+            print(initial_regs[1], initial_regs[2])
+            self.run_tst_program(Program(lst), initial_regs)
+
+
+    def test_shift_arith(self):
+        lst = ["sraw 3, 1, 2"]
+        initial_regs = [0] * 32
+        initial_regs[1] = random.randint(0, (1<<64)-1)
+        initial_regs[2] = random.randint(0, 63)
+        print(initial_regs[1], initial_regs[2])
+        self.run_tst_program(Program(lst), initial_regs)
+
+    def test_shift_once(self):
+        lst = ["slw 3, 1, 4",
+               "slw 3, 1, 2"]
+        initial_regs = [0] * 32
+        initial_regs[1] = 0x80000000
+        initial_regs[2] = 0x40
+        initial_regs[4] = 0x00
+        self.run_tst_program(Program(lst), initial_regs)
+
+    def test_rlwinm(self):
+        for i in range(10):
+            mb = random.randint(0,31)
+            me = random.randint(0,31)
+            sh = random.randint(0,31)
+            lst = [f"rlwinm 3, 1, {mb}, {me}, {sh}"]
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_rlwimi(self):
+        lst = ["rlwimi 3, 1, 5, 20, 6"]
+        initial_regs = [0] * 32
+        initial_regs[1] = 0xdeadbeef
+        initial_regs[3] = 0x12345678
+        self.run_tst_program(Program(lst), initial_regs)
+
+    def test_rlwnm(self):
+        lst = ["rlwnm 3, 1, 2, 20, 6"]
+        initial_regs = [0] * 32
+        initial_regs[1] = random.randint(0, (1<<64)-1)
+        initial_regs[2] = random.randint(0, 63)
+        self.run_tst_program(Program(lst), initial_regs)
+
+    def test_rldicl(self):
+        lst = ["rldicl 3, 1, 5, 20"]
+        initial_regs = [0] * 32
+        initial_regs[1] = random.randint(0, (1<<64)-1)
+        self.run_tst_program(Program(lst), initial_regs)
+
+    def test_rldicr(self):
+        lst = ["rldicr 3, 1, 5, 20"]
+        initial_regs = [0] * 32
+        initial_regs[1] = random.randint(0, (1<<64)-1)
+        self.run_tst_program(Program(lst), initial_regs)
+
+    def test_rlc(self):
+        insns = ["rldic", "rldicl", "rldicr"]
+        for i in range(20):
+            choice = random.choice(insns)
+            sh = random.randint(0, 63)
+            m = random.randint(0, 63)
+            lst = [f"{choice} 3, 1, {sh}, {m}"]
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
+
+    def test_ilang(self):
+        rec = CompALUOpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        alu = ShiftRotBasePipe(pspec)
+        vl = rtlil.convert(alu, ports=alu.ports())
+        with open("pipeline.il", "w") as f:
+            f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+    def __init__(self, test_data):
+        super().__init__("run_all")
+        self.test_data = test_data
+
+    def run_all(self):
+        m = Module()
+        comb = m.d.comb
+        instruction = Signal(32)
+
+        pdecode = create_pdecode()
+
+        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+        rec = CompALUOpSubset()
+
+        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+        m.submodules.alu = alu = ShiftRotBasePipe(pspec)
+
+        comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+        comb += alu.p.valid_i.eq(1)
+        comb += alu.n.ready_i.eq(1)
+        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+        sim = Simulator(m)
+
+        sim.add_clock(1e-6)
+        def process():
+            for test in self.test_data:
+                print(test.name)
+                program = test.program
+                self.subTest(test.name)
+                simulator = ISA(pdecode2, test.regs, test.sprs, 0)
+                gen = program.generate_instructions()
+                instructions = list(zip(gen, program.assembly.splitlines()))
+
+                index = simulator.pc.CIA.value//4
+                while index < len(instructions):
+                    ins, code = instructions[index]
+
+                    print("0x{:X}".format(ins & 0xffffffff))
+                    print(code)
+
+                    # ask the decoder to decode this binary data (endian'd)
+                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
+                    yield instruction.eq(ins)          # raw binary instr.
+                    yield Settle()
+                    fn_unit = yield pdecode2.e.fn_unit
+                    self.assertEqual(fn_unit, Function.SHIFT_ROT.value)
+                    yield from set_alu_inputs(alu, pdecode2, simulator)
+                    yield from set_extra_alu_inputs(alu, pdecode2, simulator)
+                    yield 
+                    opname = code.split(' ')[0]
+                    yield from simulator.call(opname)
+                    index = simulator.pc.CIA.value//4
+
+                    vld = yield alu.n.valid_o
+                    while not vld:
+                        yield
+                        vld = yield alu.n.valid_o
+                    yield
+                    alu_out = yield alu.n.data_o.o
+                    out_reg_valid = yield pdecode2.e.write_reg.ok
+                    if out_reg_valid:
+                        write_reg_idx = yield pdecode2.e.write_reg.data
+                        expected = simulator.gpr(write_reg_idx).value
+                        msg = f"expected {expected:x}, actual: {alu_out:x}"
+                        self.assertEqual(expected, alu_out, msg)
+                    yield from self.check_extra_alu_outputs(alu, pdecode2,
+                                                            simulator)
+
+        sim.add_sync_process(process)
+        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+                            traces=[]):
+            sim.run()
+    def check_extra_alu_outputs(self, alu, dec2, sim):
+        rc = yield dec2.e.rc.data
+        if rc:
+            cr_expected = sim.crl[0].get_range().value
+            cr_actual = yield alu.n.data_o.cr0
+            self.assertEqual(cr_expected, cr_actual)
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(TestRunner(test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
diff --git a/src/soc/pipe/__init__.py b/src/soc/pipe/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/soc/pipe/alu/alu_input_record.py b/src/soc/pipe/alu/alu_input_record.py
deleted file mode 100644 (file)
index 41a40eb..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-from nmigen.hdl.rec import Record, Layout
-
-from soc.decoder.power_enums import InternalOp, Function, CryIn
-
-
-class CompALUOpSubset(Record):
-    """CompALUOpSubset
-
-    a copy of the relevant subset information from Decode2Execute1Type
-    needed for ALU operations.  use with eq_from_execute1 (below) to
-    grab subsets.
-    """
-    def __init__(self, name=None):
-        layout = (('insn_type', InternalOp),
-                  ('fn_unit', Function),
-                  ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
-                    #'cr = Signal(32, reset_less=True) # NO: this is from the CR SPR
-                    #'xerc = XerBits() # NO: this is from the XER SPR
-                  ('lk', 1),
-                  ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
-                  ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
-                  ('invert_a', 1),
-                  ('invert_out', 1),
-                  ('input_carry', CryIn),
-                  ('output_carry', 1),
-                  ('input_cr', 1),
-                  ('output_cr', 1),
-                  ('is_32bit', 1),
-                  ('is_signed', 1),
-                  ('data_len', 4), # TODO: should be in separate CompLDSTSubset
-                  ('insn', 32),
-                  ('byte_reverse', 1),
-                  ('sign_extend', 1))
-
-        Record.__init__(self, Layout(layout), name=name)
-
-        # grrr.  Record does not have kwargs
-        self.insn_type.reset_less = True
-        self.fn_unit.reset_less = True
-        #self.cr = Signal(32, reset_less = True
-        #self.xerc = XerBits(
-        self.lk.reset_less = True
-        self.invert_a.reset_less = True
-        self.invert_out.reset_less = True
-        self.input_carry.reset_less = True
-        self.output_carry.reset_less = True
-        self.input_cr.reset_less = True
-        self.output_cr.reset_less = True
-        self.is_32bit.reset_less = True
-        self.is_signed.reset_less = True
-        self.data_len.reset_less = True
-        self.byte_reverse.reset_less = True
-        self.sign_extend.reset_less = True
-
-    def eq_from_execute1(self, other):
-        """ use this to copy in from Decode2Execute1Type
-        """
-        res = []
-        for fname, sig in self.fields.items():
-            eqfrom = other.fields[fname]
-            res.append(sig.eq(eqfrom))
-        return res
-
-    def ports(self):
-        return [self.insn_type,
-                #self.cr,
-                #self.xerc,
-                self.lk,
-                self.invert_a,
-                self.invert_out,
-                self.input_carry,
-                self.output_carry,
-                self.input_cr,
-                self.output_cr,
-                self.is_32bit,
-                self.is_signed,
-                self.data_len,
-                self.byte_reverse,
-                self.sign_extend,
-        ]
diff --git a/src/soc/pipe/alu/formal/.gitignore b/src/soc/pipe/alu/formal/.gitignore
deleted file mode 100644 (file)
index 150f68c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-*/*
diff --git a/src/soc/pipe/alu/formal/proof_input_stage.py b/src/soc/pipe/alu/formal/proof_input_stage.py
deleted file mode 100644 (file)
index 347ab7d..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import Module, Signal, Elaboratable, Mux
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.input_stage import ALUInputStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
-    def __init__(self):
-        # inputs and outputs
-        pass
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        rec = CompALUOpSubset()
-        recwidth = 0
-        # Setup random inputs for dut.op
-        for p in rec.ports():
-            width = p.width
-            recwidth += width
-            comb += p.eq(AnyConst(width))
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
-        m.submodules.dut = dut = ALUInputStage(pspec)
-
-        a = Signal(64)
-        b = Signal(64)
-        comb += [dut.i.a.eq(a),
-                 dut.i.b.eq(b),
-                 a.eq(AnyConst(64)),
-                 b.eq(AnyConst(64))]
-
-        comb += dut.i.ctx.op.eq(rec)
-
-        # Assert that op gets copied from the input to output
-        for p in rec.ports():
-            name = p.name
-            rec_sig = p
-            dut_sig = getattr(dut.o.ctx.op, name)
-            comb += Assert(dut_sig == rec_sig)
-
-        with m.If(rec.invert_a):
-            comb += Assert(dut.o.a == ~a)
-        with m.Else():
-            comb += Assert(dut.o.a == a)
-
-        comb += Assert(dut.o.b == b)
-
-        return m
-
-
-class GTCombinerTestCase(FHDLTestCase):
-    def test_formal(self):
-        module = Driver()
-        self.assertFormal(module, mode="bmc", depth=4)
-        self.assertFormal(module, mode="cover", depth=4)
-    def test_ilang(self):
-        dut = Driver()
-        vl = rtlil.convert(dut, ports=[])
-        with open("input_stage.il", "w") as f:
-            f.write(vl)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/src/soc/pipe/alu/formal/proof_main_stage.py b/src/soc/pipe/alu/formal/proof_main_stage.py
deleted file mode 100644 (file)
index f102fc2..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
-                    signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.main_stage import ALUMainStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
-    def __init__(self):
-        # inputs and outputs
-        pass
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        rec = CompALUOpSubset()
-        recwidth = 0
-        # Setup random inputs for dut.op
-        for p in rec.ports():
-            width = p.width
-            recwidth += width
-            comb += p.eq(AnyConst(width))
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
-        m.submodules.dut = dut = ALUMainStage(pspec)
-
-        # convenience variables
-        a = dut.i.a
-        b = dut.i.b
-        carry_in = dut.i.carry_in
-        so_in = dut.i.so
-        carry_out = dut.o.carry_out
-        o = dut.o.o
-
-        # setup random inputs
-        comb += [a.eq(AnyConst(64)),
-                 b.eq(AnyConst(64)),
-                 carry_in.eq(AnyConst(1)),
-                 so_in.eq(AnyConst(1))]
-
-        comb += dut.i.ctx.op.eq(rec)
-
-        # Assert that op gets copied from the input to output
-        for rec_sig in rec.ports():
-            name = rec_sig.name
-            dut_sig = getattr(dut.o.ctx.op, name)
-            comb += Assert(dut_sig == rec_sig)
-
-        # signed and signed/32 versions of input a
-        a_signed = Signal(signed(64))
-        a_signed_32 = Signal(signed(32))
-        comb += a_signed.eq(a)
-        comb += a_signed_32.eq(a[0:32])
-
-        # main assertion of arithmetic operations
-        with m.Switch(rec.insn_type):
-            with m.Case(InternalOp.OP_ADD):
-                comb += Assert(Cat(o, carry_out) == (a + b + carry_in))
-
-        return m
-
-
-class ALUTestCase(FHDLTestCase):
-    def test_formal(self):
-        module = Driver()
-        self.assertFormal(module, mode="bmc", depth=2)
-        self.assertFormal(module, mode="cover", depth=2)
-    def test_ilang(self):
-        dut = Driver()
-        vl = rtlil.convert(dut, ports=[])
-        with open("main_stage.il", "w") as f:
-            f.write(vl)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/src/soc/pipe/alu/formal/proof_output_stage.py b/src/soc/pipe/alu/formal/proof_output_stage.py
deleted file mode 100644 (file)
index 288da07..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import Module, Signal, Elaboratable, Mux, Cat, signed
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.output_stage import ALUOutputStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
-    def __init__(self):
-        # inputs and outputs
-        pass
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        rec = CompALUOpSubset()
-        recwidth = 0
-        # Setup random inputs for dut.op
-        for p in rec.ports():
-            width = p.width
-            recwidth += width
-            comb += p.eq(AnyConst(width))
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
-        m.submodules.dut = dut = ALUOutputStage(pspec)
-
-        o = Signal(64)
-        carry_out = Signal()
-        carry_out32 = Signal()
-        ov = Signal()
-        ov32 = Signal()
-        cr0 = Signal(4)
-        so = Signal()
-        comb += [dut.i.o.eq(o),
-                 dut.i.carry_out.eq(carry_out),
-                 dut.i.so.eq(so),
-                 dut.i.carry_out32.eq(carry_out32),
-                 dut.i.cr0.eq(cr0),
-                 dut.i.ov.eq(ov),
-                 dut.i.ov32.eq(ov32),
-                 o.eq(AnyConst(64)),
-                 carry_out.eq(AnyConst(1)),
-                 carry_out32.eq(AnyConst(1)),
-                 ov.eq(AnyConst(1)),
-                 ov32.eq(AnyConst(1)),
-                 cr0.eq(AnyConst(4)),
-                 so.eq(AnyConst(1))]
-
-        comb += dut.i.ctx.op.eq(rec)
-
-        with m.If(dut.i.ctx.op.invert_out):
-            comb += Assert(dut.o.o == ~o)
-        with m.Else():
-            comb += Assert(dut.o.o == o)
-
-        cr_out = Signal.like(cr0)
-        comb += cr_out.eq(dut.o.cr0)
-
-        o_signed = Signal(signed(64))
-        comb += o_signed.eq(dut.o.o)
-        # Assert only one of the comparison bits is set
-        comb += Assert(cr_out[3] + cr_out[2] + cr_out[1] == 1)
-        with m.If(o_signed == 0):
-            comb += Assert(cr_out[1] == 1)
-        with m.Elif(o_signed > 0):
-            # sigh.  see https://bugs.libre-soc.org/show_bug.cgi?id=305#c61
-            # for OP_CMP we do b-a rather than a-b (just like ADD) and
-            # then invert the *test condition*.
-            with m.If(rec.insn_type == InternalOp.OP_CMP):
-                comb += Assert(cr_out[3] == 1)
-            with m.Else():
-                comb += Assert(cr_out[2] == 1)
-        with m.Elif(o_signed < 0):
-            # ditto as above
-            with m.If(rec.insn_type == InternalOp.OP_CMP):
-                comb += Assert(cr_out[2] == 1)
-            with m.Else():
-                comb += Assert(cr_out[3] == 1)
-
-
-        # Assert that op gets copied from the input to output
-        for p in rec.ports():
-            name = p.name
-            rec_sig = p
-            dut_sig = getattr(dut.o.ctx.op, name)
-            comb += Assert(dut_sig == rec_sig)
-
-
-        return m
-
-class GTCombinerTestCase(FHDLTestCase):
-    def test_formal(self):
-        module = Driver()
-        self.assertFormal(module, mode="bmc", depth=4)
-        self.assertFormal(module, mode="cover", depth=4)
-    def test_ilang(self):
-        dut = Driver()
-        vl = rtlil.convert(dut, ports=[])
-        with open("output_stage.il", "w") as f:
-            f.write(vl)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/src/soc/pipe/alu/input_stage.py b/src/soc/pipe/alu/input_stage.py
deleted file mode 100644 (file)
index 7520732..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-# This stage is intended to adjust the input data before sending it to
-# the acutal ALU. Things like handling inverting the input, carry_in
-# generation for subtraction, and handling of immediates should happen
-# here
-from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
-                    unsigned)
-from nmutil.pipemodbase import PipeModBase
-from soc.decoder.power_enums import InternalOp
-from soc.alu.pipe_data import ALUInputData
-from soc.decoder.power_enums import CryIn
-
-
-class ALUInputStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "input")
-
-    def ispec(self):
-        return ALUInputData(self.pspec)
-
-    def ospec(self):
-        return ALUInputData(self.pspec)
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-        ctx = self.i.ctx
-
-        ##### operand A #####
-
-        # operand a to be as-is or inverted
-        a = Signal.like(self.i.a)
-
-        with m.If(ctx.op.invert_a):
-            comb += a.eq(~self.i.a)
-        with m.Else():
-            comb += a.eq(self.i.a)
-
-        comb += self.o.a.eq(a)
-        comb += self.o.b.eq(self.i.b)
-
-        ##### carry-in #####
-
-        # either copy incoming carry or set to 1/0 as defined by op
-        with m.Switch(ctx.op.input_carry):
-            with m.Case(CryIn.ZERO):
-                comb += self.o.carry_in.eq(0)
-            with m.Case(CryIn.ONE):
-                comb += self.o.carry_in.eq(1)
-            with m.Case(CryIn.CA):
-                comb += self.o.carry_in.eq(self.i.carry_in)
-
-        ##### sticky overflow and context (both pass-through) #####
-
-        comb += self.o.so.eq(self.i.so)
-        comb += self.o.ctx.eq(ctx)
-
-        return m
diff --git a/src/soc/pipe/alu/main_stage.py b/src/soc/pipe/alu/main_stage.py
deleted file mode 100644 (file)
index 5100166..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-# This stage is intended to do most of the work of executing the Arithmetic
-# instructions. This would be like the additions, compares, and sign-extension
-# as well as carry and overflow generation. This module
-# however should not gate the carry or overflow, that's up to the
-# output stage
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
-from nmutil.pipemodbase import PipeModBase
-from soc.alu.pipe_data import ALUInputData, ALUOutputData
-from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-
-
-class ALUMainStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "main")
-
-    def ispec(self):
-        return ALUInputData(self.pspec)
-
-    def ospec(self):
-        return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-        carry_out, o = self.o.carry_out, self.o.o
-
-        # check if op is 32-bit, and get sign bit from operand a
-        is_32bit = Signal(reset_less=True)
-        sign_bit = Signal(reset_less=True)
-        comb += is_32bit.eq(self.i.ctx.op.is_32bit)
-        comb += sign_bit.eq(Mux(is_32bit, self.i.a[31], self.i.a[63]))
-
-        # little trick: do the add using only one add (not 2)
-        add_a = Signal(self.i.a.width + 2, reset_less=True)
-        add_b = Signal(self.i.a.width + 2, reset_less=True)
-        add_output = Signal(self.i.a.width + 2, reset_less=True)
-        with m.If((self.i.ctx.op.insn_type == InternalOp.OP_ADD) |
-                  (self.i.ctx.op.insn_type == InternalOp.OP_CMP)):
-            # in bit 0, 1+carry_in creates carry into bit 1 and above
-            comb += add_a.eq(Cat(self.i.carry_in, self.i.a, Const(0, 1)))
-            comb += add_b.eq(Cat(Const(1, 1), self.i.b, Const(0, 1)))
-            comb += add_output.eq(add_a + add_b)
-
-        ##########################
-        # main switch-statement for handling arithmetic operations
-
-        with m.Switch(self.i.ctx.op.insn_type):
-            #### CMP, CMPL ####
-            with m.Case(InternalOp.OP_CMP):
-                # this is supposed to be inverted (b-a, not a-b)
-                # however we have a trick: instead of adding either 2x 64-bit
-                # MUXes to invert a and b, or messing with a 64-bit output,
-                # swap +ve and -ve test in the *output* stage using an XOR gate
-                comb += o.eq(add_output[1:-1])
-
-            #### add ####
-            with m.Case(InternalOp.OP_ADD):
-                # bit 0 is not part of the result, top bit is the carry-out
-                comb += o.eq(add_output[1:-1])
-                comb += carry_out.eq(add_output[-1])
-
-            #### exts (sign-extend) ####
-            with m.Case(InternalOp.OP_EXTS):
-                with m.If(self.i.ctx.op.data_len == 1):
-                    comb += o.eq(Cat(self.i.a[0:8], Repl(self.i.a[7], 64-8)))
-                with m.If(self.i.ctx.op.data_len == 2):
-                    comb += o.eq(Cat(self.i.a[0:16], Repl(self.i.a[15], 64-16)))
-                with m.If(self.i.ctx.op.data_len == 4):
-                    comb += o.eq(Cat(self.i.a[0:32], Repl(self.i.a[31], 64-32)))
-            with m.Case(InternalOp.OP_CMPEQB):
-                eqs = Signal(8, reset_less=True)
-                src1 = Signal(8, reset_less=True)
-                comb += src1.eq(self.i.a[0:8])
-                for i in range(8):
-                    comb += eqs[i].eq(src1 == self.i.b[8*i:8*(i+1)])
-                comb += self.o.cr0.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
-
-        ###### sticky overflow and context, both pass-through #####
-
-        comb += self.o.so.eq(self.i.so)
-        comb += self.o.ctx.eq(self.i.ctx)
-
-        return m
diff --git a/src/soc/pipe/alu/output_stage.py b/src/soc/pipe/alu/output_stage.py
deleted file mode 100644 (file)
index 1253795..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-# This stage is intended to handle the gating of carry and overflow
-# out, summary overflow generation, and updating the condition
-# register
-from nmigen import (Module, Signal, Cat, Repl)
-from nmutil.pipemodbase import PipeModBase
-from soc.alu.pipe_data import ALUInputData, ALUOutputData
-from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-
-
-class ALUOutputStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "output")
-
-    def ispec(self):
-        return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
-
-    def ospec(self):
-        return ALUOutputData(self.pspec)
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        # op requests inversion of the output
-        o = Signal.like(self.i.o)
-        with m.If(self.i.ctx.op.invert_out):
-            comb += o.eq(~self.i.o)
-        with m.Else():
-            comb += o.eq(self.i.o)
-
-        # create condition register cr0 and sticky-overflow
-        is_zero = Signal(reset_less=True)
-        is_positive = Signal(reset_less=True)
-        is_negative = Signal(reset_less=True)
-        msb_test = Signal(reset_less=True) # set equal to MSB, invert if OP=CMP
-        is_cmp = Signal(reset_less=True)   # true if OP=CMP
-        so = Signal(reset_less=True)
-
-        # TODO: if o[63] is XORed with "operand == OP_CMP"
-        # that can be used as a test
-        # see https://bugs.libre-soc.org/show_bug.cgi?id=305#c60
-
-        comb += is_cmp.eq(self.i.ctx.op.insn_type == InternalOp.OP_CMP)
-        comb += msb_test.eq(o[-1] ^ is_cmp)
-        comb += is_zero.eq(o == 0)
-        comb += is_positive.eq(~is_zero & ~msb_test)
-        comb += is_negative.eq(~is_zero & msb_test)
-        comb += so.eq(self.i.so | self.i.ov)
-
-        comb += self.o.o.eq(o)
-        with m.If(self.i.ctx.op.insn_type != InternalOp.OP_CMPEQB):
-            comb += self.o.cr0.eq(Cat(so, is_zero, is_positive, is_negative))
-        with m.Else():
-            comb += self.o.cr0.eq(self.i.cr0)
-            
-        comb += self.o.so.eq(so)
-
-        comb += self.o.ctx.eq(self.i.ctx)
-
-        return m
diff --git a/src/soc/pipe/alu/pipe_data.py b/src/soc/pipe/alu/pipe_data.py
deleted file mode 100644 (file)
index c386397..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-from nmigen import Signal, Const
-from nmutil.dynamicpipe import SimpleHandshakeRedir
-from soc.alu.alu_input_record import CompALUOpSubset
-from ieee754.fpcommon.getop import FPPipeContext
-
-
-class IntegerData:
-
-    def __init__(self, pspec):
-        self.ctx = FPPipeContext(pspec)
-        self.muxid = self.ctx.muxid
-
-    def __iter__(self):
-        yield from self.ctx
-
-    def eq(self, i):
-        return [self.ctx.eq(i.ctx)]
-
-    def ports(self):
-        return self.ctx.ports()
-
-
-class ALUInputData(IntegerData):
-    def __init__(self, pspec):
-        super().__init__(pspec)
-        self.a = Signal(64, reset_less=True) # RA
-        self.b = Signal(64, reset_less=True) # RB/immediate
-        self.so = Signal(reset_less=True)
-        self.carry_in = Signal(reset_less=True)
-
-    def __iter__(self):
-        yield from super().__iter__()
-        yield self.a
-        yield self.b
-        yield self.carry_in
-        yield self.so
-
-    def eq(self, i):
-        lst = super().eq(i)
-        return lst + [self.a.eq(i.a), self.b.eq(i.b),
-                      self.carry_in.eq(i.carry_in),
-                      self.so.eq(i.so)]
-
-# TODO: ALUIntermediateData which does not have
-# cr0, ov, ov32 in it (because they are generated as outputs by
-# the final output stage, not by the intermediate stage)
-# https://bugs.libre-soc.org/show_bug.cgi?id=305#c19
-
-class ALUOutputData(IntegerData):
-    def __init__(self, pspec):
-        super().__init__(pspec)
-        self.o = Signal(64, reset_less=True, name="stage_o")
-        self.carry_out = Signal(reset_less=True)
-        self.carry_out32 = Signal(reset_less=True)
-        self.cr0 = Signal(4, reset_less=True)
-        self.ov = Signal(reset_less=True)
-        self.ov32 = Signal(reset_less=True)
-        self.so = Signal(reset_less=True)
-
-    def __iter__(self):
-        yield from super().__iter__()
-        yield self.o
-        yield self.carry_out
-        yield self.carry_out32
-        yield self.cr0
-        yield self.ov
-        yield self.ov32
-        yield self.so
-
-    def eq(self, i):
-        lst = super().eq(i)
-        return lst + [self.o.eq(i.o),
-                      self.carry_out.eq(i.carry_out),
-                      self.carry_out32.eq(i.carry_out32),
-                      self.cr0.eq(i.cr0), self.ov.eq(i.ov),
-                      self.ov32.eq(i.ov32), self.so.eq(i.so)]
-
-
-class IntPipeSpec:
-    def __init__(self, id_wid=2, op_wid=1):
-        self.id_wid = id_wid
-        self.op_wid = op_wid
-        self.opkls = lambda _: CompALUOpSubset(name="op")
-        self.stage = None
-
-
-class ALUPipeSpec(IntPipeSpec):
-    def __init__(self, id_wid, op_wid):
-        super().__init__(id_wid, op_wid)
-        self.pipekls = SimpleHandshakeRedir
diff --git a/src/soc/pipe/alu/pipeline.py b/src/soc/pipe/alu/pipeline.py
deleted file mode 100644 (file)
index e8dd199..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.alu.input_stage import ALUInputStage
-from soc.alu.main_stage import ALUMainStage
-from soc.alu.output_stage import ALUOutputStage
-
-class ALUStages(PipeModBaseChain):
-    def get_chain(self):
-        inp = ALUInputStage(self.pspec)
-        main = ALUMainStage(self.pspec)
-        out = ALUOutputStage(self.pspec)
-        return [inp, main, out]
-
-
-class ALUBasePipe(ControlBase):
-    def __init__(self, pspec):
-        ControlBase.__init__(self)
-        self.pipe1 = ALUStages(pspec)
-        self._eqs = self.connect([self.pipe1])
-
-    def elaborate(self, platform):
-        m = ControlBase.elaborate(self, platform)
-        m.submodules.pipe = self.pipe1
-        m.d.comb += self._eqs
-        return m
diff --git a/src/soc/pipe/alu/test/test_pipe_caller.py b/src/soc/pipe/alu/test/test_pipe_caller.py
deleted file mode 100644 (file)
index f42112e..0000000
+++ /dev/null
@@ -1,270 +0,0 @@
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function, InternalOp)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-
-
-from soc.alu.pipeline import ALUBasePipe
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.alu.pipe_data import ALUPipeSpec
-import random
-
-class TestCase:
-    def __init__(self, program, regs, sprs, name):
-        self.program = program
-        self.regs = regs
-        self.sprs = sprs
-        self.name = name
-
-def get_rec_width(rec):
-    recwidth = 0
-    # Setup random inputs for dut.op
-    for p in rec.ports():
-        width = p.width
-        recwidth += width
-    return recwidth
-
-def set_alu_inputs(alu, dec2, sim):
-    # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
-    # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
-    # and place it into data_i.b
-
-    reg3_ok = yield dec2.e.read_reg3.ok
-    reg1_ok = yield dec2.e.read_reg1.ok
-    assert reg3_ok != reg1_ok
-    if reg3_ok:
-        data1 = yield dec2.e.read_reg3.data
-        data1 = sim.gpr(data1).value
-    elif reg1_ok:
-        data1 = yield dec2.e.read_reg1.data
-        data1 = sim.gpr(data1).value
-    else:
-        data1 = 0
-
-    yield alu.p.data_i.a.eq(data1)
-
-    # If there's an immediate, set the B operand to that
-    reg2_ok = yield dec2.e.read_reg2.ok
-    imm_ok = yield dec2.e.imm_data.imm_ok
-    if imm_ok:
-        data2 = yield dec2.e.imm_data.imm
-    elif reg2_ok:
-        data2 = yield dec2.e.read_reg2.data
-        data2 = sim.gpr(data2).value
-    else:
-        data2 = 0
-    yield alu.p.data_i.b.eq(data2)
-
-
-
-def set_extra_alu_inputs(alu, dec2, sim):
-    carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
-    yield alu.p.data_i.carry_in.eq(carry)
-    so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
-    yield alu.p.data_i.so.eq(so)
-
-
-# This test bench is a bit different than is usual. Initially when I
-# was writing it, I had all of the tests call a function to create a
-# device under test and simulator, initialize the dut, run the
-# simulation for ~2 cycles, and assert that the dut output what it
-# should have. However, this was really slow, since it needed to
-# create and tear down the dut and simulator for every test case.
-
-# Now, instead of doing that, every test case in ALUTestCase puts some
-# data into the test_data list below, describing the instructions to
-# be tested and the initial state. Once all the tests have been run,
-# test_data gets passed to TestRunner which then sets up the DUT and
-# simulator once, runs all the data through it, and asserts that the
-# results match the pseudocode sim at every cycle.
-
-# By doing this, I've reduced the time it takes to run the test suite
-# massively. Before, it took around 1 minute on my computer, now it
-# takes around 3 seconds
-
-test_data = []
-
-
-class ALUTestCase(FHDLTestCase):
-    def __init__(self, name):
-        super().__init__(name)
-        self.test_name = name
-    def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
-        tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
-        test_data.append(tc)
-
-    def test_rand(self):
-        insns = ["add", "add.", "subf"]
-        for i in range(40):
-            choice = random.choice(insns)
-            lst = [f"{choice} 3, 1, 2"]
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_rand_imm(self):
-        insns = ["addi", "addis", "subfic"]
-        for i in range(10):
-            choice = random.choice(insns)
-            imm = random.randint(-(1<<15), (1<<15)-1)
-            lst = [f"{choice} 3, 1, {imm}"]
-            print(lst)
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_adde(self):
-        lst = ["adde. 5, 6, 7"]
-        initial_regs = [0] * 32
-        initial_regs[6] = random.randint(0, (1<<64)-1)
-        initial_regs[7] = random.randint(0, (1<<64)-1)
-        initial_sprs = {}
-        xer = SelectableInt(0, 64)
-        xer[XER_bits['CA']] = 1
-        initial_sprs[special_sprs['XER']] = xer
-        self.run_tst_program(Program(lst), initial_regs, initial_sprs)
-
-    def test_cmp(self):
-        lst = ["subf. 1, 6, 7",
-               "cmp cr2, 1, 6, 7"]
-        initial_regs = [0] * 32
-        initial_regs[6] = 0x10
-        initial_regs[7] = 0x05
-        self.run_tst_program(Program(lst), initial_regs, {})
-
-    def test_extsb(self):
-        insns = ["extsb", "extsh", "extsw"]
-        for i in range(10):
-            choice = random.choice(insns)
-            lst = [f"{choice} 3, 1"]
-            print(lst)
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_cmpeqb(self):
-        lst = ["cmpeqb cr0, 1, 2"]
-        for i in range(20):
-            initial_regs = [0] * 32
-            initial_regs[1] = i
-            initial_regs[2] = 0x01030507090b0d0f11
-            self.run_tst_program(Program(lst), initial_regs, {})
-
-    def test_ilang(self):
-        rec = CompALUOpSubset()
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
-        alu = ALUBasePipe(pspec)
-        vl = rtlil.convert(alu, ports=alu.ports())
-        with open("pipeline.il", "w") as f:
-            f.write(vl)
-
-
-class TestRunner(FHDLTestCase):
-    def __init__(self, test_data):
-        super().__init__("run_all")
-        self.test_data = test_data
-
-    def run_all(self):
-        m = Module()
-        comb = m.d.comb
-        instruction = Signal(32)
-
-        pdecode = create_pdecode()
-
-        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
-
-        rec = CompALUOpSubset()
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
-        m.submodules.alu = alu = ALUBasePipe(pspec)
-
-        comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
-        comb += alu.p.valid_i.eq(1)
-        comb += alu.n.ready_i.eq(1)
-        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
-        sim = Simulator(m)
-
-        sim.add_clock(1e-6)
-        def process():
-            for test in self.test_data:
-                print(test.name)
-                program = test.program
-                self.subTest(test.name)
-                simulator = ISA(pdecode2, test.regs, test.sprs, 0)
-                gen = program.generate_instructions()
-                instructions = list(zip(gen, program.assembly.splitlines()))
-
-                index = simulator.pc.CIA.value//4
-                while index < len(instructions):
-                    ins, code = instructions[index]
-
-                    print("0x{:X}".format(ins & 0xffffffff))
-                    print(code)
-
-                    # ask the decoder to decode this binary data (endian'd)
-                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
-                    yield instruction.eq(ins)          # raw binary instr.
-                    yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
-                    self.assertEqual(fn_unit, Function.ALU.value)
-                    yield from set_alu_inputs(alu, pdecode2, simulator)
-                    yield from set_extra_alu_inputs(alu, pdecode2, simulator)
-                    yield
-                    opname = code.split(' ')[0]
-                    yield from simulator.call(opname)
-                    index = simulator.pc.CIA.value//4
-
-                    vld = yield alu.n.valid_o
-                    while not vld:
-                        yield
-                        vld = yield alu.n.valid_o
-                    yield
-                    alu_out = yield alu.n.data_o.o
-                    out_reg_valid = yield pdecode2.e.write_reg.ok
-                    if out_reg_valid:
-                        write_reg_idx = yield pdecode2.e.write_reg.data
-                        expected = simulator.gpr(write_reg_idx).value
-                        print(f"expected {expected:x}, actual: {alu_out:x}")
-                        self.assertEqual(expected, alu_out)
-                    yield from self.check_extra_alu_outputs(alu, pdecode2,
-                                                            simulator, code)
-
-        sim.add_sync_process(process)
-        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
-                            traces=[]):
-            sim.run()
-
-    def check_extra_alu_outputs(self, alu, dec2, sim, code):
-        rc = yield dec2.e.rc.data
-        if rc:
-            cr_expected = sim.crl[0].get_range().value
-            cr_actual = yield alu.n.data_o.cr0
-            self.assertEqual(cr_expected, cr_actual, code)
-
-        op = yield dec2.e.insn_type
-        if op == InternalOp.OP_CMP.value or \
-           op == InternalOp.OP_CMPEQB.value:
-            bf = yield dec2.dec.BF
-            cr_actual = yield alu.n.data_o.cr0
-            cr_expected = sim.crl[bf].get_range().value
-            self.assertEqual(cr_expected, cr_actual, code)
-
-
-
-if __name__ == "__main__":
-    unittest.main(exit=False)
-    suite = unittest.TestSuite()
-    suite.addTest(TestRunner(test_data))
-
-    runner = unittest.TextTestRunner()
-    runner.run(suite)
diff --git a/src/soc/pipe/branch/__init__.py b/src/soc/pipe/branch/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/soc/pipe/branch/br_input_record.py b/src/soc/pipe/branch/br_input_record.py
deleted file mode 100644 (file)
index d4f039c..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-from nmigen.hdl.rec import Record, Layout
-
-from soc.decoder.power_enums import InternalOp, Function, CryIn
-
-
-class CompBROpSubset(Record):
-    """CompBROpSubset
-
-    TODO: remove anything not needed by the Branch pipeline (determine this
-    after all branch operations have been written.  see
-    https://bugs.libre-soc.org/show_bug.cgi?id=313#c3)
-
-    a copy of the relevant subset information from Decode2Execute1Type
-    needed for Branch operations.  use with eq_from_execute1 (below) to
-    grab subsets.
-    """
-    def __init__(self, name=None):
-        layout = (('insn_type', InternalOp),
-                  ('fn_unit', Function),
-                  ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
-                   #'cr = Signal(32) # NO: this is from the CR SPR
-                    #'xerc = XerBits() # NO: this is from the XER SPR
-                  ('lk', 1),
-                  ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
-                  ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
-                  ('invert_a', 1),
-                  ('invert_out', 1),
-                  ('input_carry', CryIn),
-                  ('output_carry', 1),
-                  ('input_cr', 1),
-                  ('output_cr', 1),
-                  ('is_32bit', 1),
-                  ('is_signed', 1),
-                  ('insn', 32),
-                  ('byte_reverse', 1),
-                  ('sign_extend', 1))
-
-        Record.__init__(self, Layout(layout), name=name)
-
-        # grrr.  Record does not have kwargs
-        self.insn_type.reset_less = True
-        self.fn_unit.reset_less = True
-        #self.cr = Signal(32, reset_less = True
-        #self.xerc = XerBits(
-        self.lk.reset_less = True
-        self.invert_a.reset_less = True
-        self.invert_out.reset_less = True
-        self.input_carry.reset_less = True
-        self.output_carry.reset_less = True
-        self.input_cr.reset_less = True
-        self.output_cr.reset_less = True
-        self.is_32bit.reset_less = True
-        self.is_signed.reset_less = True
-        self.byte_reverse.reset_less = True
-        self.sign_extend.reset_less = True
-
-    def eq_from_execute1(self, other):
-        """ use this to copy in from Decode2Execute1Type
-        """
-        res = []
-        for fname, sig in self.fields.items():
-            eqfrom = other.fields[fname]
-            res.append(sig.eq(eqfrom))
-        return res
-
-    def ports(self):
-        return [self.insn_type,
-                #self.cr,
-                #self.xerc,
-                self.lk,
-                self.invert_a,
-                self.invert_out,
-                self.input_carry,
-                self.output_carry,
-                self.input_cr,
-                self.output_cr,
-                self.is_32bit,
-                self.is_signed,
-                self.byte_reverse,
-                self.sign_extend,
-        ]
diff --git a/src/soc/pipe/branch/formal/proof_input_stage.py b/src/soc/pipe/branch/formal/proof_input_stage.py
deleted file mode 100644 (file)
index fb097c8..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import Module, Signal, Elaboratable, Mux
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.input_stage import ALUInputStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.branch.br_input_record import CompBROpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
-    def __init__(self):
-        # inputs and outputs
-        pass
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        rec = CompBROpSubset()
-        recwidth = 0
-        # Setup random inputs for dut.op
-        for p in rec.ports():
-            width = p.width
-            recwidth += width
-            comb += p.eq(AnyConst(width))
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
-        m.submodules.dut = dut = ALUInputStage(pspec)
-
-        a = Signal(64)
-        b = Signal(64)
-        comb += [dut.i.a.eq(a),
-                 dut.i.b.eq(b),
-                 a.eq(AnyConst(64)),
-                 b.eq(AnyConst(64))]
-
-        comb += dut.i.ctx.op.eq(rec)
-
-        # Assert that op gets copied from the input to output
-        for p in rec.ports():
-            name = p.name
-            rec_sig = p
-            dut_sig = getattr(dut.o.ctx.op, name)
-            comb += Assert(dut_sig == rec_sig)
-
-        with m.If(rec.invert_a):
-            comb += Assert(dut.o.a == ~a)
-        with m.Else():
-            comb += Assert(dut.o.a == a)
-
-        with m.If(rec.imm_data.imm_ok &
-                  ~(rec.insn_type == InternalOp.OP_RLC)):
-            comb += Assert(dut.o.b == rec.imm_data.imm)
-        with m.Else():
-            comb += Assert(dut.o.b == b)
-
-        return m
-
-class GTCombinerTestCase(FHDLTestCase):
-    def test_formal(self):
-        module = Driver()
-        self.assertFormal(module, mode="bmc", depth=4)
-        self.assertFormal(module, mode="cover", depth=4)
-    def test_ilang(self):
-        dut = Driver()
-        vl = rtlil.convert(dut, ports=[])
-        with open("input_stage.il", "w") as f:
-            f.write(vl)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/src/soc/pipe/branch/formal/proof_main_stage.py b/src/soc/pipe/branch/formal/proof_main_stage.py
deleted file mode 100644 (file)
index 5ca9481..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
-                    signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.logical.main_stage import LogicalMainStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
-    def __init__(self):
-        # inputs and outputs
-        pass
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        rec = CompALUOpSubset()
-        recwidth = 0
-        # Setup random inputs for dut.op
-        for p in rec.ports():
-            width = p.width
-            recwidth += width
-            comb += p.eq(AnyConst(width))
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
-        m.submodules.dut = dut = LogicalMainStage(pspec)
-
-        # convenience variables
-        a = dut.i.a
-        b = dut.i.b
-        carry_in = dut.i.carry_in
-        so_in = dut.i.so
-        carry_out = dut.o.carry_out
-        o = dut.o.o
-
-        # setup random inputs
-        comb += [a.eq(AnyConst(64)),
-                 b.eq(AnyConst(64)),
-                 carry_in.eq(AnyConst(1)),
-                 so_in.eq(AnyConst(1))]
-
-        comb += dut.i.ctx.op.eq(rec)
-
-        # Assert that op gets copied from the input to output
-        for rec_sig in rec.ports():
-            name = rec_sig.name
-            dut_sig = getattr(dut.o.ctx.op, name)
-            comb += Assert(dut_sig == rec_sig)
-
-        # signed and signed/32 versions of input a
-        a_signed = Signal(signed(64))
-        a_signed_32 = Signal(signed(32))
-        comb += a_signed.eq(a)
-        comb += a_signed_32.eq(a[0:32])
-
-        # main assertion of arithmetic operations
-        with m.Switch(rec.insn_type):
-            with m.Case(InternalOp.OP_AND):
-                comb += Assert(dut.o.o == a & b)
-            with m.Case(InternalOp.OP_OR):
-                comb += Assert(dut.o.o == a | b)
-            with m.Case(InternalOp.OP_XOR):
-                comb += Assert(dut.o.o == a ^ b)
-
-        return m
-
-
-class LogicalTestCase(FHDLTestCase):
-    def test_formal(self):
-        module = Driver()
-        self.assertFormal(module, mode="bmc", depth=2)
-        self.assertFormal(module, mode="cover", depth=2)
-    def test_ilang(self):
-        dut = Driver()
-        vl = rtlil.convert(dut, ports=[])
-        with open("main_stage.il", "w") as f:
-            f.write(vl)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/src/soc/pipe/branch/input_stage.py b/src/soc/pipe/branch/input_stage.py
deleted file mode 100644 (file)
index e6ab48e..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-# This stage is intended to adjust the input data before sending it to
-# the acutal ALU. Things like handling inverting the input, carry_in
-# generation for subtraction, and handling of immediates should happen
-# here
-from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
-                    unsigned)
-from nmutil.pipemodbase import PipeModBase
-from soc.decoder.power_enums import InternalOp
-from soc.alu.pipe_data import ALUInputData
-from soc.decoder.power_enums import CryIn
-
-
-class ALUInputStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "input")
-
-    def ispec(self):
-        return ALUInputData(self.pspec)
-
-    def ospec(self):
-        return ALUInputData(self.pspec)
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        ##### operand A #####
-
-        # operand a to be as-is or inverted
-        a = Signal.like(self.i.a)
-
-        with m.If(self.i.ctx.op.invert_a):
-            comb += a.eq(~self.i.a)
-        with m.Else():
-            comb += a.eq(self.i.a)
-
-        comb += self.o.a.eq(a)
-
-        ##### operand B #####
-
-        # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
-        # remove this, just do self.o.b.eq(self.i.b) and move the
-        # immediate-detection into set_alu_inputs in the unit test
-        # If there's an immediate, set the B operand to that
-        comb += self.o.b.eq(self.i.b)
-
-        ##### carry-in #####
-
-        # either copy incoming carry or set to 1/0 as defined by op
-        with m.Switch(self.i.ctx.op.input_carry):
-            with m.Case(CryIn.ZERO):
-                comb += self.o.carry_in.eq(0)
-            with m.Case(CryIn.ONE):
-                comb += self.o.carry_in.eq(1)
-            with m.Case(CryIn.CA):
-                comb += self.o.carry_in.eq(self.i.carry_in)
-
-        ##### sticky overflow and context (both pass-through) #####
-
-        comb += self.o.so.eq(self.i.so)
-        comb += self.o.ctx.eq(self.i.ctx)
-
-        return m
diff --git a/src/soc/pipe/branch/main_stage.py b/src/soc/pipe/branch/main_stage.py
deleted file mode 100644 (file)
index 6f6d488..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-# This stage is intended to do most of the work of executing Logical
-# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ
-# however input and output stages also perform bit-negation on input(s)
-# and output, as well as carry and overflow generation.
-# This module however should not gate the carry or overflow, that's up
-# to the output stage
-
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
-from nmutil.pipemodbase import PipeModBase
-from soc.branch.pipe_data import BranchInputData, BranchOutputData
-from soc.decoder.power_enums import InternalOp
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
-
-def br_ext(bd):
-    return Cat(Const(0, 2), bd, Repl(bd[-1], 64-(bd.shape().width + 2)))
-
-"""
-Notes on BO Field:
-
-BO    Description
-0000z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=0
-0001z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=0
-001at Branch if CR[BI]=0
-0100z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=1
-0101z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=1
-011at Branch if CR[BI]=1
-1a00t Decrement the CTR, then branch if decremented CTR[M:63]!=0
-1a01t Decrement the CTR, then branch if decremented CTR[M:63]=0
-1z1zz Branch always
-"""
-
-class BranchMainStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "main")
-        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
-        self.fields.create_specs()
-
-    def ispec(self):
-        return BranchInputData(self.pspec)
-
-    def ospec(self):
-        return BranchOutputData(self.pspec) # TODO: ALUIntermediateData
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-        op = self.i.ctx.op
-        lk = op.lk # see PowerDecode2 as to why this is done
-        nia_o, lr_o = self.o.nia, self.o.lr
-
-        # obtain relevant instruction fields
-        i_fields = self.fields.FormI
-        aa = Signal(i_fields.AA[0:-1].shape())
-        comb += aa.eq(i_fields.AA[0:-1])
-
-        br_imm_addr = Signal(64, reset_less=True)
-        br_addr = Signal(64, reset_less=True)
-        br_taken = Signal(reset_less=True)
-
-        # Handle absolute or relative branches
-        with m.If(aa):
-            comb += br_addr.eq(br_imm_addr)
-        with m.Else():
-            comb += br_addr.eq(br_imm_addr + self.i.cia)
-
-        # fields for conditional branches (BO and BI are same for BC and BCREG)
-        # NOTE: here, BO and BI we would like be treated as CR regfile
-        # selectors (similar to RA, RB, RS, RT).  see comment here:
-        # https://bugs.libre-soc.org/show_bug.cgi?id=313#c2
-        b_fields = self.fields.FormB
-        BO = b_fields.BO[0:-1]
-        BI = b_fields.BI[0:-1]
-
-        # The bit of CR selected by BI
-        cr_bit = Signal(reset_less=True)
-        comb += cr_bit.eq((self.i.cr & (1<<(31-BI))) != 0)
-
-        # Whether the conditional branch should be taken
-        bc_taken = Signal(reset_less=True)
-        with m.If(BO[2]):
-            comb += bc_taken.eq((cr_bit == BO[3]) | BO[4])
-        with m.Else():
-            # decrement the counter and place into output
-            ctr = Signal(64, reset_less=True)
-            comb += ctr.eq(self.i.ctr - 1)
-            comb += self.o.ctr.data.eq(ctr)
-            comb += self.o.ctr.ok.eq(1)
-            # take either all 64 bits or only 32 of post-incremented counter
-            ctr_m = Signal(64, reset_less=True)
-            with m.If((op.is_32bit):
-                comb += ctr_m.eq(ctr[:32])
-            with m.Else():
-                comb += ctr_m.eq(ctr)
-            # check CTR zero/non-zero against BO[1]
-            ctr_zero_bo1 = Signal(reset_less=True) # BO[1] == (ctr==0)
-            comb += ctr_zero_bo1.eq(BO[1] ^ ctr_m.any())
-            with m.If(BO[3:5] == 0b00):
-                comb += bc_taken.eq(ctr_zero_bo1 & ~cr_bit)
-            with m.Elif(BO[3:5] == 0b01):
-                comb += bc_taken.eq(ctr_zero_bo1 & cr_bit)
-            with m.Elif(BO[4] == 1):
-                comb += bc_taken.eq(ctr_zero_bo1)
-
-        ### Main Switch Statement ###
-        with m.Switch(op.insn_type):
-            #### branch ####
-            with m.Case(InternalOp.OP_B):
-                LI = i_fields.LI[0:-1]
-                comb += br_imm_addr.eq(br_ext(LI))
-                comb += br_taken.eq(1)
-            #### branch conditional ####
-            with m.Case(InternalOp.OP_BC):
-                BD = b_fields.BD[0:-1]
-                comb += br_imm_addr.eq(br_ext(BD))
-                comb += br_taken.eq(bc_taken)
-            #### branch conditional reg ####
-            with m.Case(InternalOp.OP_BCREG):
-                comb += br_imm_addr.eq(self.i.spr1) # SPR1 is set by decode unit
-                comb += br_taken.eq(bc_taken)
-
-        ###### output next instruction address #####
-
-        comb += nia_o.data.eq(br_addr)
-        comb += nia_o.ok.eq(br_taken)
-
-        ###### link register - only activate on operations marked as "lk" #####
-
-        with m.If(lk):
-            # ctx.op.lk is the AND of the insn LK field *and* whether the
-            # op is to "listen" to the link field
-            comb += lr_o.data.eq(self.i.cia + 4)
-            comb += lr_o.ok.eq(1)
-
-        ###### and context #####
-        comb += self.o.ctx.eq(self.i.ctx)
-
-        return m
diff --git a/src/soc/pipe/branch/pipe_data.py b/src/soc/pipe/branch/pipe_data.py
deleted file mode 100644 (file)
index 0ef4f00..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-"""
-    Optional Register allocation listed below.  mandatory input
-    (CompBROpSubset, CIA) not included.
-
-    * CR is Condition Register (not an SPR)
-    * SPR1, SPR2 and SPR3 are all from the SPR regfile.  3 ports are needed
-
-    insn       CR  SPR1  SPR2    SPR3
-    ----       --  ----  ----    ----
-    op_b       xx  xx     xx     xx
-    op_ba      xx  xx     xx     xx
-    op_bl      xx  xx     xx     xx
-    op_bla     xx  xx     xx     xx
-    op_bc      CR, xx,    CTR    xx
-    op_bca     CR, xx,    CTR    xx
-    op_bcl     CR, xx,    CTR    xx
-    op_bcla    CR, xx,    CTR    xx
-    op_bclr    CR, LR,    CTR    xx
-    op_bclrl   CR, LR,    CTR    xx
-    op_bcctr   CR, xx,    CTR    xx
-    op_bcctrl  CR, xx,    CTR    xx
-    op_bctar   CR, TAR,   CTR,   xx
-    op_bctarl  CR, TAR,   CTR,   xx
-
-    op_sc      xx  xx     xx     MSR
-    op_scv     xx  LR,    SRR1,  MSR
-    op_rfscv   xx  LR,    CTR,   MSR
-    op_rfid    xx  SRR0,  SRR1,  MSR
-    op_hrfid   xx  HSRR0, HSRR1, MSR
-"""
-
-from nmigen import Signal, Const
-from ieee754.fpcommon.getop import FPPipeContext
-from soc.decoder.power_decoder2 import Data
-from soc.alu.pipe_data import IntegerData
-
-
-class BranchInputData(IntegerData):
-    def __init__(self, pspec):
-        super().__init__(pspec)
-        # Note: for OP_BCREG, SPR1 will either be CTR, LR, or TAR
-        # this involves the *decode* unit selecting the register, based
-        # on detecting the operand being bcctr, bclr or bctar
-
-        self.spr1 = Signal(64, reset_less=True) # see table above, SPR1
-        self.spr2 = Signal(64, reset_less=True) # see table above, SPR2
-        self.spr3 = Signal(64, reset_less=True) # see table above, SPR3
-        self.cr = Signal(32, reset_less=True)   # Condition Register(s) CR0-7
-        self.cia = Signal(64, reset_less=True)  # Current Instruction Address
-
-        # convenience variables.  not all of these are used at once
-        self.ctr = self.srr0 = self.hsrr0 = self.spr2
-        self.lr = self.tar = self.srr1 = self.hsrr1 = self.spr1
-        self.msr = self.spr3
-
-    def __iter__(self):
-        yield from super().__iter__()
-        yield self.spr1
-        yield self.spr2
-        yield self.spr3
-        yield self.cr
-        yield self.cia
-
-    def eq(self, i):
-        lst = super().eq(i)
-        return lst + [self.spr1.eq(i.spr1), self.spr2.eq(i.spr2),
-                      self.spr3.eq(i.spr3),
-                      self.cr.eq(i.cr), self.cia.eq(i.cia)]
-
-
-class BranchOutputData(IntegerData):
-    def __init__(self, pspec):
-        super().__init__(pspec)
-        self.lr = Data(64, name="lr")
-        self.spr = Data(64, name="spr")
-        self.nia = Data(64, name="nia")
-
-        # convenience variables.
-        self.ctr = self.spr
-
-    def __iter__(self):
-        yield from super().__iter__()
-        yield from self.lr
-        yield from self.spr
-        yield from self.nia
-
-    def eq(self, i):
-        lst = super().eq(i)
-        return lst + [self.lr.eq(i.lr), self.spr.eq(i.spr),
-                      self.nia.eq(i.nia)]
diff --git a/src/soc/pipe/branch/pipeline.py b/src/soc/pipe/branch/pipeline.py
deleted file mode 100644 (file)
index ac132f7..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.branch.main_stage import BranchMainStage
-
-class BranchStages(PipeModBaseChain):
-    def get_chain(self):
-        main = BranchMainStage(self.pspec)
-        return [main]
-
-
-class BranchBasePipe(ControlBase):
-    def __init__(self, pspec):
-        ControlBase.__init__(self)
-        self.pipe1 = BranchStages(pspec)
-        self._eqs = self.connect([self.pipe1])
-
-    def elaborate(self, platform):
-        m = ControlBase.elaborate(self, platform)
-        m.submodules.pipe = self.pipe1
-        m.d.comb += self._eqs
-        return m
diff --git a/src/soc/pipe/branch/test/test_pipe_caller.py b/src/soc/pipe/branch/test/test_pipe_caller.py
deleted file mode 100644 (file)
index 10d2bba..0000000
+++ /dev/null
@@ -1,210 +0,0 @@
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-
-
-from soc.branch.pipeline import BranchBasePipe
-from soc.branch.br_input_record import CompBROpSubset
-from soc.alu.pipe_data import ALUPipeSpec
-import random
-
-
-class TestCase:
-    def __init__(self, program, regs, sprs, cr, name):
-        self.program = program
-        self.regs = regs
-        self.sprs = sprs
-        self.name = name
-        self.cr = cr
-
-def get_rec_width(rec):
-    recwidth = 0
-    # Setup random inputs for dut.op
-    for p in rec.ports():
-        width = p.width
-        recwidth += width
-    return recwidth
-
-
-# This test bench is a bit different than is usual. Initially when I
-# was writing it, I had all of the tests call a function to create a
-# device under test and simulator, initialize the dut, run the
-# simulation for ~2 cycles, and assert that the dut output what it
-# should have. However, this was really slow, since it needed to
-# create and tear down the dut and simulator for every test case.
-
-# Now, instead of doing that, every test case in ALUTestCase puts some
-# data into the test_data list below, describing the instructions to
-# be tested and the initial state. Once all the tests have been run,
-# test_data gets passed to TestRunner which then sets up the DUT and
-# simulator once, runs all the data through it, and asserts that the
-# results match the pseudocode sim at every cycle.
-
-# By doing this, I've reduced the time it takes to run the test suite
-# massively. Before, it took around 1 minute on my computer, now it
-# takes around 3 seconds
-
-test_data = []
-
-
-class BranchTestCase(FHDLTestCase):
-    def __init__(self, name):
-        super().__init__(name)
-        self.test_name = name
-    def run_tst_program(self, prog, initial_regs=[0] * 32,
-                        initial_sprs={}, initial_cr=0):
-        tc = TestCase(prog, initial_regs, initial_sprs, initial_cr,
-                      self.test_name)
-        test_data.append(tc)
-
-    def test_unconditional(self):
-        choices = ["b", "ba", "bl", "bla"]
-        for i in range(20):
-            choice = random.choice(choices)
-            imm = random.randrange(-1<<23, (1<<23)-1) * 4
-            lst = [f"{choice} {imm}"]
-            initial_regs = [0] * 32
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_bc_cr(self):
-        for i in range(20):
-            bc = random.randrange(-1<<13, (1<<13)-1) * 4
-            bo = random.choice([0b01100, 0b00100, 0b10100])
-            bi = random.randrange(0, 31)
-            cr = random.randrange(0, (1<<32)-1)
-            lst = [f"bc {bo}, {bi}, {bc}"]
-            initial_regs = [0] * 32
-            self.run_tst_program(Program(lst), initial_cr=cr)
-
-    def test_bc_ctr(self):
-        for i in range(20):
-            bc = random.randrange(-1<<13, (1<<13)-1) * 4
-            bo = random.choice([0, 2, 8, 10, 16, 18])
-            bi = random.randrange(0, 31)
-            cr = random.randrange(0, (1<<32)-1)
-            ctr = random.randint(0, (1<<32)-1)
-            lst = [f"bc {bo}, {bi}, {bc}"]
-            initial_sprs={9: SelectableInt(ctr, 64)}
-            self.run_tst_program(Program(lst),
-                                 initial_sprs=initial_sprs,
-                                 initial_cr=cr)
-
-    def test_ilang(self):
-        rec = CompBROpSubset()
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
-        alu = BranchBasePipe(pspec)
-        vl = rtlil.convert(alu, ports=alu.ports())
-        with open("logical_pipeline.il", "w") as f:
-            f.write(vl)
-
-
-class TestRunner(FHDLTestCase):
-    def __init__(self, test_data):
-        super().__init__("run_all")
-        self.test_data = test_data
-
-    def run_all(self):
-        m = Module()
-        comb = m.d.comb
-        instruction = Signal(32)
-
-        pdecode = create_pdecode()
-
-        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
-
-        rec = CompBROpSubset()
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
-        m.submodules.branch = branch = BranchBasePipe(pspec)
-
-        comb += branch.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
-        comb += branch.p.valid_i.eq(1)
-        comb += branch.n.ready_i.eq(1)
-        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
-        sim = Simulator(m)
-
-        sim.add_clock(1e-6)
-        def process():
-            for test in self.test_data:
-                print(test.name)
-                program = test.program
-                self.subTest(test.name)
-                simulator = ISA(pdecode2, test.regs, test.sprs, test.cr)
-                initial_cia = 0x2000
-                simulator.set_pc(initial_cia)
-                gen = program.generate_instructions()
-                instructions = list(zip(gen, program.assembly.splitlines()))
-
-                index = (simulator.pc.CIA.value - initial_cia)//4
-                while index < len(instructions) and index >= 0:
-                    print(index)
-                    ins, code = instructions[index]
-
-                    print("0x{:X}".format(ins & 0xffffffff))
-                    print(code)
-
-                    # ask the decoder to decode this binary data (endian'd)
-                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
-                    yield instruction.eq(ins)          # raw binary instr.
-                    yield branch.p.data_i.cia.eq(simulator.pc.CIA.value)
-                    yield branch.p.data_i.cr.eq(simulator.cr.get_range().value)
-                    # note, here, the op will need further decoding in order
-                    # to set the correct SPRs on SPR1/2/3.  op_bc* require
-                    # spr2 to be set to CTR, op_bctar require spr1 to be
-                    # set to TAR, op_bclr* require spr1 to be set to LR.
-                    # if op_sc*, op_rf* and op_hrfid are to be added here
-                    # then additional op-decoding is required, accordingly
-                    yield branch.p.data_i.spr2.eq(simulator.spr['CTR'].value)
-                    print(f"cr0: {simulator.crl[0].get_range()}")
-                    yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
-                    self.assertEqual(fn_unit, Function.BRANCH.value, code)
-                    yield
-                    yield
-                    opname = code.split(' ')[0]
-                    prev_nia = simulator.pc.NIA.value
-                    yield from simulator.call(opname)
-                    index = (simulator.pc.CIA.value - initial_cia)//4
-
-                    yield from self.assert_outputs(branch, pdecode2,
-                                                   simulator, prev_nia, code)
-
-
-        sim.add_sync_process(process)
-        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
-                            traces=[]):
-            sim.run()
-
-    def assert_outputs(self, branch, dec2, sim, prev_nia, code):
-        branch_taken = yield branch.n.data_o.nia.ok
-        sim_branch_taken = prev_nia != sim.pc.CIA
-        self.assertEqual(branch_taken, sim_branch_taken, code)
-        if branch_taken:
-            branch_addr = yield branch.n.data_o.nia.data
-            self.assertEqual(branch_addr, sim.pc.CIA.value, code)
-
-        lk = yield dec2.e.lk
-        branch_lk = yield branch.n.data_o.lr.ok
-        self.assertEqual(lk, branch_lk, code)
-        if lk:
-            branch_lr = yield branch.n.data_o.lr.data
-            self.assertEqual(sim.spr['LR'], branch_lr, code)
-
-
-if __name__ == "__main__":
-    unittest.main(exit=False)
-    suite = unittest.TestSuite()
-    suite.addTest(TestRunner(test_data))
-
-    runner = unittest.TextTestRunner()
-    runner.run(suite)
diff --git a/src/soc/pipe/countzero/countzero.py b/src/soc/pipe/countzero/countzero.py
deleted file mode 100644 (file)
index bd61f57..0000000
+++ /dev/null
@@ -1,136 +0,0 @@
-# https://github.com/antonblanchard/microwatt/blob/master/countzero.vhdl
-from nmigen import Memory, Module, Signal, Cat, Elaboratable
-from nmigen.hdl.rec import Record, Layout
-from nmigen.cli import main
-
-
-def or4(a, b, c, d):
-    return Cat(a.any(), b.any(), c.any(), d.any())
-
-
-class IntermediateResult(Record):
-    def __init__(self, name=None):
-        layout = (('v16', 15),
-                  ('sel_hi', 2),
-                  ('is_32bit', 1),
-                  ('count_right', 1))
-        Record.__init__(self, Layout(layout), name=name)
-
-
-class ZeroCounter(Elaboratable):
-    def __init__(self):
-        self.rs_i = Signal(64, reset_less=True)
-        self.count_right_i = Signal(1, reset_less=True)
-        self.is_32bit_i = Signal(1, reset_less=True)
-        self.result_o = Signal(64, reset_less=True)
-
-    def ports(self):
-        return [self.rs_i, self.count_right_i, self.is_32bit_i, self.result_o]
-
-    def elaborate(self, platform):
-        m = Module()
-
-        # TODO: replace this with m.submodule.pe1 = PriorityEncoder(4)
-        # m.submodule.pe2 = PriorityEncoder(4)
-        # m.submodule.pe3 = PriorityEncoder(4)
-        # etc.
-        # and where right will assign input to v and !right will assign v[::-1]
-        # so as to reverse the order of the input bits.
-
-        def encoder(v, right):
-            """
-            Return the index of the leftmost or rightmost 1 in a set of 4 bits.
-            Assumes v is not "0000"; if it is, return (right ? "11" : "00").
-            """
-            ret = Signal(2, reset_less=True)
-            with m.If(right):
-                with m.If(v[0]):
-                    m.d.comb += ret.eq(0)
-                with m.Elif(v[1]):
-                    m.d.comb += ret.eq(1)
-                with m.Elif(v[2]):
-                    m.d.comb += ret.eq(2)
-                with m.Else():
-                    m.d.comb += ret.eq(3)
-            with m.Else():
-                with m.If(v[3]):
-                    m.d.comb += ret.eq(3)
-                with m.Elif(v[2]):
-                    m.d.comb += ret.eq(2)
-                with m.Elif(v[1]):
-                    m.d.comb += ret.eq(1)
-                with m.Else():
-                    m.d.comb += ret.eq(0)
-            return ret
-
-        r = IntermediateResult()
-        r_in = IntermediateResult()
-
-        m.d.comb += r.eq(r_in) # make the module entirely combinatorial for now
-
-        v = IntermediateResult()
-        y = Signal(4, reset_less=True)
-        z = Signal(4, reset_less=True)
-        sel = Signal(6, reset_less=True)
-        v4 = Signal(4, reset_less=True)
-
-        # Test 4 groups of 16 bits each.
-        # The top 2 groups are considered to be zero in 32-bit mode.
-        m.d.comb += z.eq(or4(self.rs_i[0:16], self.rs_i[16:32],
-                             self.rs_i[32:48], self.rs_i[48:64]))
-        with m.If(self.is_32bit_i):
-            m.d.comb += v.sel_hi[1].eq(0)
-            with m.If(self.count_right_i):
-                m.d.comb += v.sel_hi[0].eq(~z[0])
-            with m.Else():
-                m.d.comb += v.sel_hi[0].eq(z[1])
-        with m.Else():
-            m.d.comb += v.sel_hi.eq(encoder(z, self.count_right_i))
-
-        # Select the leftmost/rightmost non-zero group of 16 bits
-        with m.Switch(v.sel_hi):
-            with m.Case(0):
-                m.d.comb += v.v16.eq(self.rs_i[0:16])
-            with m.Case(1):
-                m.d.comb += v.v16.eq(self.rs_i[16:32])
-            with m.Case(2):
-                m.d.comb += v.v16.eq(self.rs_i[32:48])
-            with m.Case(3):
-                m.d.comb += v.v16.eq(self.rs_i[48:64])
-
-        # Latch this and do the rest in the next cycle, for the sake of timing
-        m.d.comb += v.is_32bit.eq(self.is_32bit_i)
-        m.d.comb += v.count_right.eq(self.count_right_i)
-        m.d.comb += r_in.eq(v)
-        m.d.comb += sel[4:6].eq(r.sel_hi)
-
-        # Test 4 groups of 4 bits
-        m.d.comb += y.eq(or4(r.v16[0:4], r.v16[4:8],
-                             r.v16[8:12], r.v16[12:16]))
-        m.d.comb += sel[2:4].eq(encoder(y, r.count_right))
-
-        # Select the leftmost/rightmost non-zero group of 4 bits
-        with m.Switch(sel[2:4]):
-            with m.Case(0):
-                m.d.comb += v4.eq(r.v16[0:4])
-            with m.Case(1):
-                m.d.comb += v4.eq(r.v16[4:8])
-            with m.Case(2):
-                m.d.comb += v4.eq(r.v16[8:12])
-            with m.Case(3):
-                m.d.comb += v4.eq(r.v16[12:16])
-
-        m.d.comb += sel[0:2].eq(encoder(v4, r.count_right))
-
-        # sel is now the index of the leftmost/rightmost 1 bit in rs
-        o = self.result_o
-        with m.If(v4 == 0):
-            # operand is zero, return 32 for 32-bit, else 64
-            m.d.comb += o[5:7].eq(Cat(r.is_32bit, ~r.is_32bit))
-        with m.Elif(r.count_right):
-            # return (63 - sel), trimmed to 5 bits in 32-bit mode
-            m.d.comb += o.eq(Cat(~sel[0:5], ~(sel[5] | r.is_32bit)))
-        with m.Else():
-            m.d.comb += o.eq(sel)
-
-        return m
diff --git a/src/soc/pipe/countzero/test/test_countzero.py b/src/soc/pipe/countzero/test/test_countzero.py
deleted file mode 100644 (file)
index 6018519..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-# https://github.com/antonblanchard/microwatt/blob/master/countzero_tb.vhdl
-from nmigen import Module, Signal
-from nmigen.cli import rtlil
-from nmigen.back.pysim import Simulator, Delay
-from nmigen.test.utils import FHDLTestCase
-import unittest
-from soc.countzero.countzero import ZeroCounter
-
-
-class ZeroCounterTestCase(FHDLTestCase):
-    def test_zerocounter(self):
-        m = Module()
-        comb = m.d.comb
-        m.submodules.dut = dut = ZeroCounter()
-
-        sim = Simulator(m)
-        # sim.add_clock(1e-6)
-
-        def process():
-            print("test zero input")
-            yield dut.rs_i.eq(0)
-            yield dut.is_32bit_i.eq(0)
-            yield dut.count_right_i.eq(0)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            assert result == 0x40
-            # report "bad cntlzd 0 = " & to_hstring(result);
-            assert(result == 0x40)
-            yield dut.count_right_i.eq(1)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            # report "bad cntlzd 0 = " & to_hstring(result);
-            assert(result == 0x40)
-            yield dut.is_32bit_i.eq(1)
-            yield dut.count_right_i.eq(0)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            # report "bad cntlzw 0 = " & to_hstring(result);
-            assert(result == 0x20)
-            yield dut.count_right_i.eq(1)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            # report "bad cntlzw 0 = " & to_hstring(result);
-            assert(result == 0x20)
-            # TODO next tests
-
-            yield dut.rs_i.eq(0b00010000)
-            yield dut.is_32bit_i.eq(0)
-            yield dut.count_right_i.eq(0)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            assert result == 4, "result %d" % result
-
-            yield dut.count_right_i.eq(1)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            assert result == 59, "result %d" % result
-
-            yield dut.is_32bit_i.eq(1)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            assert result == 27, "result %d" % result
-
-            yield dut.rs_i.eq(0b1100000100000000)
-            yield dut.is_32bit_i.eq(0)
-            yield dut.count_right_i.eq(0)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            assert result == 14, "result %d" % result
-
-            yield dut.count_right_i.eq(1)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            assert result == 55, "result %d" % result
-
-            yield dut.is_32bit_i.eq(1)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            assert result == 23, "result %d" % result
-
-            yield dut.count_right_i.eq(0)
-            yield Delay(1e-6)
-            result = yield dut.result_o
-            assert result == 14, "result %d" % result
-
-
-        sim.add_process(process)  # or sim.add_sync_process(process), see below
-
-        # run test and write vcd
-        fn = "genullnau"
-        with sim.write_vcd(fn+".vcd", fn+".gtkw", traces=dut.ports()):
-            sim.run()
-
-    # cntlzd_w
-    # cnttzd_w
-
-
-if __name__ == "__main__":
-
-    dut = ZeroCounter()
-    vl = rtlil.convert(dut, ports=dut.ports())
-    with open("countzero.il", "w") as f:
-        f.write(vl)
-
-    unittest.main()
diff --git a/src/soc/pipe/cr/main_stage.py b/src/soc/pipe/cr/main_stage.py
deleted file mode 100644 (file)
index 67bd78e..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-# This stage is intended to do Condition Register instructions
-# and output, as well as carry and overflow generation.
-# NOTE: with the exception of mtcrf and mfcr, we really should be doing
-# the field decoding which
-# selects which bits of CR are to be read / written, back in the
-# decoder / insn-isue, have both self.i.cr and self.o.cr
-# be broken down into 4-bit-wide "registers", with their
-# own "Register File" (indexed by bt, ba and bb),
-# exactly how INT regs are done (by RA, RB, RS and RT)
-# however we are pushed for time so do it as *one* register.
-
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
-from nmutil.pipemodbase import PipeModBase
-from soc.cr.pipe_data import CRInputData, CROutputData
-from soc.decoder.power_enums import InternalOp
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
-
-
-class CRMainStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "main")
-        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
-        self.fields.create_specs()
-
-    def ispec(self):
-        return CRInputData(self.pspec)
-
-    def ospec(self):
-        return CROutputData(self.pspec)
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-        op = self.i.ctx.op
-        xl_fields = self.fields.FormXL
-        xfx_fields = self.fields.FormXFX
-        # default: cr_o remains same as cr input unless modified, below
-        cr_o = Signal.like(self.i.cr)
-        comb += cr_o.eq(self.i.cr)
-
-        ##### prepare inputs / temp #####
-
-        # Generate array for cr input so bits can be selected
-        cr_arr = Array([Signal(name=f"cr_arr_{i}") for i in range(32)])
-        for i in range(32):
-            comb += cr_arr[i].eq(self.i.cr[31-i])
-
-        # Generate array for cr output so the bit to write to can be
-        # selected by a signal
-        cr_out_arr = Array([Signal(name=f"cr_out_{i}") for i in range(32)])
-        for i in range(32):
-            comb += cr_o[31-i].eq(cr_out_arr[i])
-            comb += cr_out_arr[i].eq(cr_arr[i])
-
-        # Generate the mask for mtcrf, mtocrf, and mfocrf
-        # replicate every fxm field in the insn to 4-bit, as a mask
-        FXM = xfx_fields.FXM[0:-1]
-        mask = Signal(32, reset_less=True)
-        comb += mask.eq(Cat(*[Repl(FXM[i], 4) for i in range(8)]))
-
-        #################################
-        ##### main switch statement #####
-
-        with m.Switch(op.insn_type):
-            ##### mcrf #####
-            with m.Case(InternalOp.OP_MCRF):
-                # MCRF copies the 4 bits of crA to crB (for instance
-                # copying cr2 to cr1)
-                BF = xl_fields.BF[0:-1]   # destination CR
-                BFA = xl_fields.BFA[0:-1] # source CR
-
-                for i in range(4):
-                    comb += cr_out_arr[BF*4 + i].eq(cr_arr[BFA*4 + i])
-
-            ##### crand, cror, crnor etc. #####
-            with m.Case(InternalOp.OP_CROP):
-                # crand/cror and friends get decoded to the same opcode, but
-                # one of the fields inside the instruction is a 4 bit lookup
-                # table. This lookup table gets indexed by bits a and b from
-                # the CR to determine what the resulting bit should be.
-
-                # Grab the lookup table for cr_op type instructions
-                lut = Array([Signal(name=f"lut{i}") for i in range(4)])
-                # There's no field, just have to grab it directly from the insn
-                for i in range(4):
-                    comb += lut[i].eq(self.i.ctx.op.insn[6+i])
-
-                # Get the bit selector fields from the instruction
-                BT = xl_fields.BT[0:-1]
-                BA = xl_fields.BA[0:-1]
-                BB = xl_fields.BB[0:-1]
-
-                # Use the two input bits to look up the result in the LUT
-                comb += cr_out_arr[BT].eq(lut[Cat(cr_arr[BB], cr_arr[BA])])
-
-            ##### mtcrf #####
-            with m.Case(InternalOp.OP_MTCRF):
-                # mtocrf and mtcrf are essentially identical
-                # put input (RA) - mask-selected - into output CR, leave
-                # rest of CR alone.
-                comb += cr_o.eq((self.i.a[0:32] & mask) | (self.i.cr & ~mask))
-
-            ##### mfcr #####
-            with m.Case(InternalOp.OP_MFCR):
-                # Ugh. mtocrf and mtcrf have one random bit differentiating
-                # them. This bit is not in any particular field, so this
-                # extracts that bit from the instruction
-                move_one = Signal(reset_less=True)
-                comb += move_one.eq(self.i.ctx.op.insn[20])
-
-                # mfocrf
-                with m.If(move_one):
-                    comb += self.o.o.eq(self.i.cr & mask)
-                # mfcrf
-                with m.Else():
-                    comb += self.o.o.eq(self.i.cr)
-
-        # output and context
-        comb += self.o.cr.eq(cr_o)
-        comb += self.o.ctx.eq(self.i.ctx)
-
-        return m
diff --git a/src/soc/pipe/cr/pipe_data.py b/src/soc/pipe/cr/pipe_data.py
deleted file mode 100644 (file)
index d56c8f3..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-from nmigen import Signal, Const
-from ieee754.fpcommon.getop import FPPipeContext
-from soc.alu.pipe_data import IntegerData
-
-
-class CRInputData(IntegerData):
-    def __init__(self, pspec):
-        super().__init__(pspec)
-        self.a = Signal(64, reset_less=True) # RA
-        self.cr = Signal(64, reset_less=True) # CR in
-
-    def __iter__(self):
-        yield from super().__iter__()
-        yield self.a
-        yield self.cr
-
-    def eq(self, i):
-        lst = super().eq(i)
-        return lst + [self.a.eq(i.a),
-                      self.cr.eq(i.cr)]
-
-class CROutputData(IntegerData):
-    def __init__(self, pspec):
-        super().__init__(pspec)
-        self.o = Signal(64, reset_less=True) # RA
-        self.cr = Signal(64, reset_less=True) # CR in
-
-    def __iter__(self):
-        yield from super().__iter__()
-        yield self.o
-        yield self.cr
-
-    def eq(self, i):
-        lst = super().eq(i)
-        return lst + [self.o.eq(i.o),
-                      self.cr.eq(i.cr)]
diff --git a/src/soc/pipe/cr/pipeline.py b/src/soc/pipe/cr/pipeline.py
deleted file mode 100644 (file)
index 121cdf8..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.cr.main_stage import CRMainStage
-
-class CRStages(PipeModBaseChain):
-    def get_chain(self):
-        main = CRMainStage(self.pspec)
-        return [main]
-
-
-class CRBasePipe(ControlBase):
-    def __init__(self, pspec):
-        ControlBase.__init__(self)
-        self.pipe1 = CRStages(pspec)
-        self._eqs = self.connect([self.pipe1])
-
-    def elaborate(self, platform):
-        m = ControlBase.elaborate(self, platform)
-        m.submodules.pipe = self.pipe1
-        m.d.comb += self._eqs
-        return m
diff --git a/src/soc/pipe/cr/test/test_pipe_caller.py b/src/soc/pipe/cr/test/test_pipe_caller.py
deleted file mode 100644 (file)
index fa08fb6..0000000
+++ /dev/null
@@ -1,232 +0,0 @@
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-
-
-from soc.cr.pipeline import CRBasePipe
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.alu.pipe_data import ALUPipeSpec
-import random
-
-
-class TestCase:
-    def __init__(self, program, regs, sprs, cr, name):
-        self.program = program
-        self.regs = regs
-        self.sprs = sprs
-        self.name = name
-        self.cr = cr
-
-def get_rec_width(rec):
-    recwidth = 0
-    # Setup random inputs for dut.op
-    for p in rec.ports():
-        width = p.width
-        recwidth += width
-    return recwidth
-
-
-# This test bench is a bit different than is usual. Initially when I
-# was writing it, I had all of the tests call a function to create a
-# device under test and simulator, initialize the dut, run the
-# simulation for ~2 cycles, and assert that the dut output what it
-# should have. However, this was really slow, since it needed to
-# create and tear down the dut and simulator for every test case.
-
-# Now, instead of doing that, every test case in ALUTestCase puts some
-# data into the test_data list below, describing the instructions to
-# be tested and the initial state. Once all the tests have been run,
-# test_data gets passed to TestRunner which then sets up the DUT and
-# simulator once, runs all the data through it, and asserts that the
-# results match the pseudocode sim at every cycle.
-
-# By doing this, I've reduced the time it takes to run the test suite
-# massively. Before, it took around 1 minute on my computer, now it
-# takes around 3 seconds
-
-test_data = []
-
-
-class CRTestCase(FHDLTestCase):
-    def __init__(self, name):
-        super().__init__(name)
-        self.test_name = name
-    def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={},
-                        initial_cr=0):
-        tc = TestCase(prog, initial_regs, initial_sprs, initial_cr,
-                      self.test_name)
-        test_data.append(tc)
-
-    def test_crop(self):
-        insns = ["crand", "cror", "crnand", "crnor", "crxor", "creqv",
-                 "crandc", "crorc"]
-        for i in range(40):
-            choice = random.choice(insns)
-            ba = random.randint(0, 31)
-            bb = random.randint(0, 31)
-            bt = random.randint(0, 31)
-            lst = [f"{choice} {ba}, {bb}, {bt}"]
-            cr = random.randint(0, 7)
-            self.run_tst_program(Program(lst), initial_cr=cr)
-
-    def test_mcrf(self):
-        lst = ["mcrf 0, 5"]
-        cr = 0xffff0000
-        self.run_tst_program(Program(lst), initial_cr=cr)
-
-    def test_mtcrf(self):
-        for i in range(20):
-            mask = random.randint(0, 255)
-            lst = [f"mtcrf {mask}, 2"]
-            cr = random.randint(0, (1<<32)-1)
-            initial_regs = [0] * 32
-            initial_regs[2] = random.randint(0, (1<<32)-1)
-            self.run_tst_program(Program(lst), initial_regs=initial_regs,
-                                 initial_cr=cr)
-    def test_mtocrf(self):
-        for i in range(20):
-            mask = 1<<random.randint(0, 7)
-            lst = [f"mtocrf {mask}, 2"]
-            cr = random.randint(0, (1<<32)-1)
-            initial_regs = [0] * 32
-            initial_regs[2] = random.randint(0, (1<<32)-1)
-            self.run_tst_program(Program(lst), initial_regs=initial_regs,
-                                 initial_cr=cr)
-
-    def test_mfcr(self):
-        for i in range(5):
-            lst = ["mfcr 2"]
-            cr = random.randint(0, (1<<32)-1)
-            self.run_tst_program(Program(lst), initial_cr=cr)
-
-    def test_mfocrf(self):
-        for i in range(20):
-            mask = 1<<random.randint(0, 7)
-            lst = [f"mfocrf 2, {mask}"]
-            cr = random.randint(0, (1<<32)-1)
-            self.run_tst_program(Program(lst), initial_cr=cr)
-        
-
-    def test_ilang(self):
-        rec = CompALUOpSubset()
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
-        alu = CRBasePipe(pspec)
-        ports = alu.ports()
-        vl = rtlil.convert(alu, ports=alu.ports())
-        with open("logical_pipeline.il", "w") as f:
-            f.write(vl)
-
-
-class TestRunner(FHDLTestCase):
-    def __init__(self, test_data):
-        super().__init__("run_all")
-        self.test_data = test_data
-
-    def set_inputs(self, alu, dec2, simulator):
-        yield alu.p.data_i.cr.eq(simulator.cr.get_range().value)
-
-        reg3_ok = yield dec2.e.read_reg3.ok
-        if reg3_ok:
-            reg3_sel = yield dec2.e.read_reg3.data
-            reg3 = simulator.gpr(reg3_sel).value
-            yield alu.p.data_i.a.eq(reg3)
-
-    def run_all(self):
-        m = Module()
-        comb = m.d.comb
-        instruction = Signal(32)
-
-        pdecode = create_pdecode()
-
-        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
-
-        rec = CompALUOpSubset()
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
-        m.submodules.alu = alu = CRBasePipe(pspec)
-
-        comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
-        comb += alu.p.valid_i.eq(1)
-        comb += alu.n.ready_i.eq(1)
-        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
-        sim = Simulator(m)
-
-        sim.add_clock(1e-6)
-        def process():
-            for test in self.test_data:
-                print(test.name)
-                program = test.program
-                self.subTest(test.name)
-                simulator = ISA(pdecode2, test.regs, test.sprs, test.cr)
-                gen = program.generate_instructions()
-                instructions = list(zip(gen, program.assembly.splitlines()))
-
-                index = simulator.pc.CIA.value//4
-                while index < len(instructions):
-                    ins, code = instructions[index]
-
-                    print("0x{:X}".format(ins & 0xffffffff))
-                    print(code)
-
-                    # ask the decoder to decode this binary data (endian'd)
-                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
-                    yield instruction.eq(ins)          # raw binary instr.
-                    yield Settle()
-                    yield from self.set_inputs(alu, pdecode2, simulator)
-                    fn_unit = yield pdecode2.e.fn_unit
-                    self.assertEqual(fn_unit, Function.CR.value, code)
-                    yield 
-                    opname = code.split(' ')[0]
-                    yield from simulator.call(opname)
-                    index = simulator.pc.CIA.value//4
-
-                    vld = yield alu.n.valid_o
-                    while not vld:
-                        yield
-                        vld = yield alu.n.valid_o
-                    yield
-                    cr_out = yield pdecode2.e.output_cr
-                    if cr_out:
-                        cr_expected = simulator.cr.get_range().value
-                        cr_real = yield alu.n.data_o.cr
-                        msg = f"real: {cr_expected:x}, actual: {cr_real:x}"
-                        msg += " code: %s" % code
-                        self.assertEqual(cr_expected, cr_real, msg)
-
-                    reg_out = yield pdecode2.e.write_reg.ok
-                    if reg_out:
-                        reg_sel = yield pdecode2.e.write_reg.data
-                        reg_data = simulator.gpr(reg_sel).value
-                        output = yield alu.n.data_o.o
-                        msg = f"real: {reg_data:x}, actual: {output:x}"
-                        self.assertEqual(reg_data, output)
-
-        sim.add_sync_process(process)
-        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
-                            traces=[]):
-            sim.run()
-    def check_extra_alu_outputs(self, alu, dec2, sim):
-        rc = yield dec2.e.rc.data
-        if rc:
-            cr_expected = sim.crl[0].get_range().value
-            cr_actual = yield alu.n.data_o.cr0
-            self.assertEqual(cr_expected, cr_actual)
-
-
-if __name__ == "__main__":
-    unittest.main(exit=False)
-    suite = unittest.TestSuite()
-    suite.addTest(TestRunner(test_data))
-
-    runner = unittest.TextTestRunner()
-    runner.run(suite)
diff --git a/src/soc/pipe/logical/__init__.py b/src/soc/pipe/logical/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/soc/pipe/logical/bperm.py b/src/soc/pipe/logical/bperm.py
deleted file mode 100644 (file)
index 674555b..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-from nmigen import Elaboratable, Signal, Module, Repl, Cat, Const, Array
-from nmigen.cli import main
-
-
-class Bpermd(Elaboratable):
-    """This class does a Bit Permute on a Doubleword
-
-   X-form bpermd RA,RS,RB]
-
-   Eight permuted bits are produced. For each permuted bit i where i ranges
-   from 0 to 7 and for each byte i of RS, do the following. If byte i of RS
-   is less than 64, permuted bit i is setto the bit of RB specified by byte
-   i of RS; otherwise permuted bit i is set to 0. The  permuted  bits are
-   placed in the least-significantbyte of RA, and the remaining bits are
-   filled with 0s.
-   Special Registers Altered: None
-
-   Programming note:
-   The fact that the permuted bit is 0 if the corresponding index value
-   exceeds 63 permits the permuted bits to be selected from a 128-bit
-   quantity, using a single index register. For example, assume that the
-   128-bit quantity Q, from which the permuted bits are to be selected, is
-   in registers r2(high-order 64 bits of Q) and r3 (low-order 64 bits of Q),
-   that the index values are in register r1, with each byte of r1 containing
-   a value in the range 0:127, and that each byte of register r4 contains
-   the value 64. The following code sequence selects eight permuted bits
-   from Q and places them into the low-order byte of r6.
-    """
-
-    def __init__(self, width):
-        self.width = width
-        self.rs = Signal(width, reset_less=True)
-        self.ra = Signal(width, reset_less=True)
-        self.rb = Signal(width, reset_less=True)
-
-    def elaborate(self, platform):
-        m = Module()
-        perm = Signal(self.width, reset_less=True)
-        rb64 = [Signal(1, reset_less=True, name=f"rb64_{i}") for i in range(64)]
-        for i in range(64):
-            m.d.comb += rb64[i].eq(self.rb[i])
-        rb64 = Array(rb64)
-        for i in range(8):
-            index = self.rs[8*i:8*i+8]
-            idx = Signal(8, name=f"idx_{i}", reset_less=True)
-            m.d.comb += idx.eq(index)
-            with m.If(idx < 64):
-                m.d.comb += perm[i].eq(rb64[idx])
-        m.d.comb += self.ra[0:8].eq(perm)
-        return m
-
-
-if __name__ == "__main__":
-    bperm = Bpermd(width=64)
-    main(bperm, ports=[bperm.rs, bperm.ra, bperm.rb])
diff --git a/src/soc/pipe/logical/formal/.gitignore b/src/soc/pipe/logical/formal/.gitignore
deleted file mode 100644 (file)
index 150f68c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-*/*
diff --git a/src/soc/pipe/logical/formal/proof_bperm.py b/src/soc/pipe/logical/formal/proof_bperm.py
deleted file mode 100644 (file)
index da19894..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-# Proof of correctness for bit permute module
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
-                    signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.logical.bperm import Bpermd
-
-import unittest
-
-
-# So formal verification is a little different than writing a test
-# case, as you're actually generating logic around your module to
-# check that it behaves a certain way. So here, I'm going to create a
-# module to put my formal assertions in
-class Driver(Elaboratable):
-    def __init__(self):
-        # We don't need any inputs and outputs here, so I won't
-        # declare any
-        pass
-
-    def elaborate(self, platform):
-        # standard stuff
-        m = Module()
-        comb = m.d.comb
-
-        # instantiate the device under test as a submodule
-        m.submodules.bperm = bperm = Bpermd(64)
-
-        # Grab the inputs and outputs of the DUT to make them more
-        # convenient to access
-        rs = bperm.rs
-        rb = bperm.rb
-        ra = bperm.ra
-
-        # Before we prove any properties about the DUT, we need to set
-        # up its inputs. There's a couple ways to do this, you could
-        # define some inputs and outputs for the driver module and
-        # wire them up to the DUT, but that's kind of a pain. The
-        # other option is to use AnyConst/AnySeq, which tells yosys
-        # that those inputs can take on any value.
-
-        # AnyConst should be used when the input should take on a
-        # random value, but that value should be constant throughout
-        # the test.
-        # AnySeq should be used when the input can change on every
-        # cycle
-
-        # Since this is a combinatorial circuit, it really doesn't
-        # matter which one you choose, so I chose AnyConst. If this
-        # was a sequential circuit, (especially a state machine) you'd
-        # want to use AnySeq
-        comb += [rs.eq(AnyConst(64)),
-                 rb.eq(AnyConst(64))]
-
-
-        # The pseudocode in the Power ISA manual (v3.1) is as follows:
-        # do i = 0 to 7
-        #    index <- RS[8*i:8*i+8]
-        #    if index < 64:
-        #        perm[i] <- RB[index]
-        #    else:
-        #        perm[i] <- 0
-        # RA <- 56'b0 || perm[0:8]  # big endian though
-
-        # Looking at this, I can identify 3 properties that the bperm
-        # module should keep:
-        #   1. RA[8:64] should always equal 0
-        #   2. If RB[i*8:i*8+8] >= 64 then RA[i] should equal 0
-        #   3. If RB[i*8:i*8+8] < 64 then RA[i] should RS[index]
-
-        # Now we need to Assert that the properties above hold:
-
-        # Property 1: RA[8:64] should always equal 0
-        comb += Assert(ra[8:] == 0)
-        # Notice how we're adding Assert to comb like it's a circuit?
-        # That's because it kind of is. If you run this proof and have
-        # yosys graph the ilang, you'll be able to see an equals
-        # comparison cell feeding into an assert cell
-
-        # Now we need to prove property #2. I'm going to leave this to
-        # you Cole. I'd start by writing a for loop and extracting the
-        # 8 indices into signals. Then I'd write an if statement
-        # checking if the index is >= 64 (it's hardware, so use an
-        # m.If()). Finally, I'd add an assert that checks whether
-        # ra[i] is equal to 0
-
-
-
-        return m
-
-
-class TestCase(FHDLTestCase):
-    # This bit here is actually in charge of running the formal
-    # proof. It has nmigen spit out the ilang, and feeds it to
-    # SymbiYosys to run the proof. If the proof fails, yosys will
-    # generate a .vcd file showing how it was able to violate your
-    # assertions in proof_bperm_formal/engine_0/trace.vcd. From that
-    # you should be able to figure out what went wrong, and either
-    # correct the assertion or fix the DUT
-    def test_formal(self):
-        module = Driver()
-        # This runs a Bounded Model Check on the driver module
-        # above. What that does is it starts at some initial state,
-        # and steps it through `depth` cycles, checking that the
-        # assertions hold at every cycle. Since this is a
-        # combinatorial module, it only needs 1 cycle to prove
-        # everything. 
-        self.assertFormal(module, mode="bmc", depth=2)
-        self.assertFormal(module, mode="cover", depth=2)
-
-    # As mentioned above, you can look at the graph in yosys and see
-    # all the assertion cells
-    def test_ilang(self):
-        dut = Driver()
-        vl = rtlil.convert(dut, ports=[])
-        with open("bperm.il", "w") as f:
-            f.write(vl)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/src/soc/pipe/logical/formal/proof_input_stage.py b/src/soc/pipe/logical/formal/proof_input_stage.py
deleted file mode 100644 (file)
index bb62fb6..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import Module, Signal, Elaboratable, Mux
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.input_stage import ALUInputStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
-    def __init__(self):
-        # inputs and outputs
-        pass
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        rec = CompALUOpSubset()
-        recwidth = 0
-        # Setup random inputs for dut.op
-        for p in rec.ports():
-            width = p.width
-            recwidth += width
-            comb += p.eq(AnyConst(width))
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
-        m.submodules.dut = dut = ALUInputStage(pspec)
-
-        a = Signal(64)
-        b = Signal(64)
-        comb += [dut.i.a.eq(a),
-                 dut.i.b.eq(b),
-                 a.eq(AnyConst(64)),
-                 b.eq(AnyConst(64))]
-                      
-
-        comb += dut.i.ctx.op.eq(rec)
-
-
-        # Assert that op gets copied from the input to output
-        for p in rec.ports():
-            name = p.name
-            rec_sig = p
-            dut_sig = getattr(dut.o.ctx.op, name)
-            comb += Assert(dut_sig == rec_sig)
-
-        with m.If(rec.invert_a):
-            comb += Assert(dut.o.a == ~a)
-        with m.Else():
-            comb += Assert(dut.o.a == a)
-
-        with m.If(rec.imm_data.imm_ok &
-                  ~(rec.insn_type == InternalOp.OP_RLC)):
-            comb += Assert(dut.o.b == rec.imm_data.imm)
-        with m.Else():
-            comb += Assert(dut.o.b == b)
-
-
-
-
-        return m
-
-class GTCombinerTestCase(FHDLTestCase):
-    def test_formal(self):
-        module = Driver()
-        self.assertFormal(module, mode="bmc", depth=4)
-        self.assertFormal(module, mode="cover", depth=4)
-    def test_ilang(self):
-        dut = Driver()
-        vl = rtlil.convert(dut, ports=[])
-        with open("input_stage.il", "w") as f:
-            f.write(vl)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/src/soc/pipe/logical/formal/proof_main_stage.py b/src/soc/pipe/logical/formal/proof_main_stage.py
deleted file mode 100644 (file)
index 5ca9481..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
-                    signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.logical.main_stage import LogicalMainStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
-    def __init__(self):
-        # inputs and outputs
-        pass
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        rec = CompALUOpSubset()
-        recwidth = 0
-        # Setup random inputs for dut.op
-        for p in rec.ports():
-            width = p.width
-            recwidth += width
-            comb += p.eq(AnyConst(width))
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
-        m.submodules.dut = dut = LogicalMainStage(pspec)
-
-        # convenience variables
-        a = dut.i.a
-        b = dut.i.b
-        carry_in = dut.i.carry_in
-        so_in = dut.i.so
-        carry_out = dut.o.carry_out
-        o = dut.o.o
-
-        # setup random inputs
-        comb += [a.eq(AnyConst(64)),
-                 b.eq(AnyConst(64)),
-                 carry_in.eq(AnyConst(1)),
-                 so_in.eq(AnyConst(1))]
-
-        comb += dut.i.ctx.op.eq(rec)
-
-        # Assert that op gets copied from the input to output
-        for rec_sig in rec.ports():
-            name = rec_sig.name
-            dut_sig = getattr(dut.o.ctx.op, name)
-            comb += Assert(dut_sig == rec_sig)
-
-        # signed and signed/32 versions of input a
-        a_signed = Signal(signed(64))
-        a_signed_32 = Signal(signed(32))
-        comb += a_signed.eq(a)
-        comb += a_signed_32.eq(a[0:32])
-
-        # main assertion of arithmetic operations
-        with m.Switch(rec.insn_type):
-            with m.Case(InternalOp.OP_AND):
-                comb += Assert(dut.o.o == a & b)
-            with m.Case(InternalOp.OP_OR):
-                comb += Assert(dut.o.o == a | b)
-            with m.Case(InternalOp.OP_XOR):
-                comb += Assert(dut.o.o == a ^ b)
-
-        return m
-
-
-class LogicalTestCase(FHDLTestCase):
-    def test_formal(self):
-        module = Driver()
-        self.assertFormal(module, mode="bmc", depth=2)
-        self.assertFormal(module, mode="cover", depth=2)
-    def test_ilang(self):
-        dut = Driver()
-        vl = rtlil.convert(dut, ports=[])
-        with open("main_stage.il", "w") as f:
-            f.write(vl)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/src/soc/pipe/logical/input_stage.py b/src/soc/pipe/logical/input_stage.py
deleted file mode 100644 (file)
index e6ab48e..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-# This stage is intended to adjust the input data before sending it to
-# the acutal ALU. Things like handling inverting the input, carry_in
-# generation for subtraction, and handling of immediates should happen
-# here
-from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
-                    unsigned)
-from nmutil.pipemodbase import PipeModBase
-from soc.decoder.power_enums import InternalOp
-from soc.alu.pipe_data import ALUInputData
-from soc.decoder.power_enums import CryIn
-
-
-class ALUInputStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "input")
-
-    def ispec(self):
-        return ALUInputData(self.pspec)
-
-    def ospec(self):
-        return ALUInputData(self.pspec)
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        ##### operand A #####
-
-        # operand a to be as-is or inverted
-        a = Signal.like(self.i.a)
-
-        with m.If(self.i.ctx.op.invert_a):
-            comb += a.eq(~self.i.a)
-        with m.Else():
-            comb += a.eq(self.i.a)
-
-        comb += self.o.a.eq(a)
-
-        ##### operand B #####
-
-        # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
-        # remove this, just do self.o.b.eq(self.i.b) and move the
-        # immediate-detection into set_alu_inputs in the unit test
-        # If there's an immediate, set the B operand to that
-        comb += self.o.b.eq(self.i.b)
-
-        ##### carry-in #####
-
-        # either copy incoming carry or set to 1/0 as defined by op
-        with m.Switch(self.i.ctx.op.input_carry):
-            with m.Case(CryIn.ZERO):
-                comb += self.o.carry_in.eq(0)
-            with m.Case(CryIn.ONE):
-                comb += self.o.carry_in.eq(1)
-            with m.Case(CryIn.CA):
-                comb += self.o.carry_in.eq(self.i.carry_in)
-
-        ##### sticky overflow and context (both pass-through) #####
-
-        comb += self.o.so.eq(self.i.so)
-        comb += self.o.ctx.eq(self.i.ctx)
-
-        return m
diff --git a/src/soc/pipe/logical/main_stage.py b/src/soc/pipe/logical/main_stage.py
deleted file mode 100644 (file)
index e740d07..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-# This stage is intended to do most of the work of executing Logical
-# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ
-# however input and output stages also perform bit-negation on input(s)
-# and output, as well as carry and overflow generation.
-# This module however should not gate the carry or overflow, that's up
-# to the output stage
-
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
-from nmutil.pipemodbase import PipeModBase
-from soc.logical.pipe_data import ALUInputData
-from soc.alu.pipe_data import ALUOutputData
-from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-from soc.countzero.countzero import ZeroCounter
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
-
-
-def array_of(count, bitwidth):
-    res = []
-    for i in range(count):
-        res.append(Signal(bitwidth, reset_less=True))
-    return res
-
-
-class LogicalMainStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "main")
-        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
-        self.fields.create_specs()
-
-    def ispec(self):
-        return ALUInputData(self.pspec)
-
-    def ospec(self):
-        return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-        op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o
-
-        ##########################
-        # main switch for logic ops AND, OR and XOR, cmpb, parity, and popcount
-
-        with m.Switch(op.insn_type):
-
-            ###### AND, OR, XOR #######
-            with m.Case(InternalOp.OP_AND):
-                comb += o.eq(a & b)
-            with m.Case(InternalOp.OP_OR):
-                comb += o.eq(a | b)
-            with m.Case(InternalOp.OP_XOR):
-                comb += o.eq(a ^ b)
-
-            ###### cmpb #######
-            with m.Case(InternalOp.OP_CMPB):
-                l = []
-                for i in range(8):
-                    slc = slice(i*8, (i+1)*8)
-                    l.append(Repl(a[slc] == b[slc], 8))
-                comb += o.eq(Cat(*l))
-
-            ###### popcount #######
-            with m.Case(InternalOp.OP_POPCNT):
-                # starting from a, perform successive addition-reductions
-                # creating arrays big enough to store the sum, each time
-                pc = [a]
-                # QTY32 2-bit (to take 2x 1-bit sums) etc.
-                work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)]
-                for l, b in work:
-                    pc.append(array_of(l, b))
-                pc8 = pc[3]     # array of 8 8-bit counts (popcntb)
-                pc32 = pc[5]    # array of 2 32-bit counts (popcntw)
-                popcnt = pc[-1] # array of 1 64-bit count (popcntd)
-                # cascade-tree of adds
-                for idx, (l, b) in enumerate(work):
-                    for i in range(l):
-                        stt, end = i*2, i*2+1
-                        src, dst = pc[idx], pc[idx+1]
-                        comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
-                                          Cat(src[end], Const(0, 1)))
-                # decode operation length
-                with m.If(op.data_len[2:4] == 0b00):
-                    # popcntb - pack 8x 4-bit answers into output
-                    for i in range(8):
-                        comb += o[i*8:i*8+4].eq(pc8[i])
-                with m.Elif(op.data_len[3] == 0):
-                    # popcntw - pack 2x 5-bit answers into output
-                    for i in range(2):
-                        comb += o[i*32:i*32+5].eq(pc32[i])
-                with m.Else():
-                    # popcntd - put 1x 6-bit answer into output
-                    comb += o.eq(popcnt[0])
-
-            ###### parity #######
-            with m.Case(InternalOp.OP_PRTY):
-                # strange instruction which XORs together the LSBs of each byte
-                par0 = Signal(reset_less=True)
-                par1 = Signal(reset_less=True)
-                comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor())
-                comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor())
-                with m.If(op.data_len[3] == 1):
-                    comb += o.eq(par0 ^ par1)
-                with m.Else():
-                    comb += o[0].eq(par0)
-                    comb += o[32].eq(par1)
-
-            ###### cntlz #######
-            with m.Case(InternalOp.OP_CNTZ):
-                XO = self.fields.FormX.XO[0:-1]
-                m.submodules.countz = countz = ZeroCounter()
-                comb += countz.rs_i.eq(a)
-                comb += countz.is_32bit_i.eq(op.is_32bit)
-                comb += countz.count_right_i.eq(XO[-1])
-                comb += o.eq(countz.result_o)
-
-            ###### bpermd #######
-            # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt
-
-        ###### sticky overflow and context, both pass-through #####
-
-        comb += self.o.so.eq(self.i.so)
-        comb += self.o.ctx.eq(self.i.ctx)
-
-        return m
diff --git a/src/soc/pipe/logical/pipe_data.py b/src/soc/pipe/logical/pipe_data.py
deleted file mode 100644 (file)
index 4bf064f..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-from nmigen import Signal, Const
-from ieee754.fpcommon.getop import FPPipeContext
-from soc.alu.pipe_data import IntegerData
-
-
-class ALUInputData(IntegerData):
-    def __init__(self, pspec):
-        super().__init__(pspec)
-        self.a = Signal(64, reset_less=True) # RA
-        self.b = Signal(64, reset_less=True) # RB/immediate
-        self.so = Signal(reset_less=True)
-        self.carry_in = Signal(reset_less=True)
-
-    def __iter__(self):
-        yield from super().__iter__()
-        yield self.a
-        yield self.b
-        yield self.carry_in
-        yield self.so
-
-    def eq(self, i):
-        lst = super().eq(i)
-        return lst + [self.a.eq(i.a), self.b.eq(i.b),
-                      self.carry_in.eq(i.carry_in),
-                      self.so.eq(i.so)]
diff --git a/src/soc/pipe/logical/pipeline.py b/src/soc/pipe/logical/pipeline.py
deleted file mode 100644 (file)
index f3c8327..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.alu.input_stage import ALUInputStage
-from soc.logical.main_stage import LogicalMainStage
-from soc.alu.output_stage import ALUOutputStage
-
-class LogicalStages(PipeModBaseChain):
-    def get_chain(self):
-        inp = ALUInputStage(self.pspec)
-        main = LogicalMainStage(self.pspec)
-        out = ALUOutputStage(self.pspec)
-        return [inp, main, out]
-
-
-class LogicalBasePipe(ControlBase):
-    def __init__(self, pspec):
-        ControlBase.__init__(self)
-        self.pipe1 = LogicalStages(pspec)
-        self._eqs = self.connect([self.pipe1])
-
-    def elaborate(self, platform):
-        m = ControlBase.elaborate(self, platform)
-        m.submodules.pipe = self.pipe1
-        m.d.comb += self._eqs
-        return m
diff --git a/src/soc/pipe/logical/test/test_bperm.py b/src/soc/pipe/logical/test/test_bperm.py
deleted file mode 100644 (file)
index 7a742b0..0000000
+++ /dev/null
@@ -1 +0,0 @@
-'''Empty until I write the unit test'''
diff --git a/src/soc/pipe/logical/test/test_pipe_caller.py b/src/soc/pipe/logical/test/test_pipe_caller.py
deleted file mode 100644 (file)
index 79c1e29..0000000
+++ /dev/null
@@ -1,262 +0,0 @@
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-
-
-from soc.logical.pipeline import LogicalBasePipe
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.alu.pipe_data import ALUPipeSpec
-import random
-
-
-class TestCase:
-    def __init__(self, program, regs, sprs, name):
-        self.program = program
-        self.regs = regs
-        self.sprs = sprs
-        self.name = name
-
-def get_rec_width(rec):
-    recwidth = 0
-    # Setup random inputs for dut.op
-    for p in rec.ports():
-        width = p.width
-        recwidth += width
-    return recwidth
-
-def set_alu_inputs(alu, dec2, sim):
-    # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
-    # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
-    # and place it into data_i.b
-
-    reg3_ok = yield dec2.e.read_reg3.ok
-    reg1_ok = yield dec2.e.read_reg1.ok
-    assert reg3_ok != reg1_ok
-    if reg3_ok:
-        data1 = yield dec2.e.read_reg3.data
-        data1 = sim.gpr(data1).value
-    elif reg1_ok:
-        data1 = yield dec2.e.read_reg1.data
-        data1 = sim.gpr(data1).value
-    else:
-        data1 = 0
-
-    yield alu.p.data_i.a.eq(data1)
-
-    # If there's an immediate, set the B operand to that
-    reg2_ok = yield dec2.e.read_reg2.ok
-    imm_ok = yield dec2.e.imm_data.imm_ok
-    if imm_ok:
-        data2 = yield dec2.e.imm_data.imm
-    elif reg2_ok:
-        data2 = yield dec2.e.read_reg2.data
-        data2 = sim.gpr(data2).value
-    else:
-        data2 = 0
-    yield alu.p.data_i.b.eq(data2)
-
-
-
-def set_extra_alu_inputs(alu, dec2, sim):
-    carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
-    yield alu.p.data_i.carry_in.eq(carry)
-    so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
-    yield alu.p.data_i.so.eq(so)
-    
-
-# This test bench is a bit different than is usual. Initially when I
-# was writing it, I had all of the tests call a function to create a
-# device under test and simulator, initialize the dut, run the
-# simulation for ~2 cycles, and assert that the dut output what it
-# should have. However, this was really slow, since it needed to
-# create and tear down the dut and simulator for every test case.
-
-# Now, instead of doing that, every test case in ALUTestCase puts some
-# data into the test_data list below, describing the instructions to
-# be tested and the initial state. Once all the tests have been run,
-# test_data gets passed to TestRunner which then sets up the DUT and
-# simulator once, runs all the data through it, and asserts that the
-# results match the pseudocode sim at every cycle.
-
-# By doing this, I've reduced the time it takes to run the test suite
-# massively. Before, it took around 1 minute on my computer, now it
-# takes around 3 seconds
-
-test_data = []
-
-
-class LogicalTestCase(FHDLTestCase):
-    def __init__(self, name):
-        super().__init__(name)
-        self.test_name = name
-    def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
-        tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
-        test_data.append(tc)
-
-    def test_rand(self):
-        insns = ["and", "or", "xor"]
-        for i in range(40):
-            choice = random.choice(insns)
-            lst = [f"{choice} 3, 1, 2"]
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_rand_imm_logical(self):
-        insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"]
-        for i in range(10):
-            choice = random.choice(insns)
-            imm = random.randint(0, (1<<16)-1)
-            lst = [f"{choice} 3, 1, {imm}"]
-            print(lst)
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    @unittest.skip("broken")
-    def test_cntz(self):
-        insns = ["cntlzd", "cnttzd"]
-        for i in range(10):
-            choice = random.choice(insns)
-            lst = [f"{choice} 3, 1"]
-            print(lst)
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_parity(self):
-        insns = ["prtyw", "prtyd"]
-        for i in range(10):
-            choice = random.choice(insns)
-            lst = [f"{choice} 3, 1"]
-            print(lst)
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_popcnt(self):
-        insns = ["popcntb", "popcntw", "popcntd"]
-        for i in range(10):
-            choice = random.choice(insns)
-            lst = [f"{choice} 3, 1"]
-            print(lst)
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_cmpb(self):
-        lst = ["cmpb 3, 1, 2"]
-        initial_regs = [0] * 32
-        initial_regs[1] = 0xdeadbeefcafec0de
-        initial_regs[2] = 0xd0adb0000afec1de
-        self.run_tst_program(Program(lst), initial_regs)
-
-    def test_ilang(self):
-        rec = CompALUOpSubset()
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
-        alu = LogicalBasePipe(pspec)
-        vl = rtlil.convert(alu, ports=alu.ports())
-        with open("logical_pipeline.il", "w") as f:
-            f.write(vl)
-
-
-class TestRunner(FHDLTestCase):
-    def __init__(self, test_data):
-        super().__init__("run_all")
-        self.test_data = test_data
-
-    def run_all(self):
-        m = Module()
-        comb = m.d.comb
-        instruction = Signal(32)
-
-        pdecode = create_pdecode()
-
-        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
-
-        rec = CompALUOpSubset()
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
-        m.submodules.alu = alu = LogicalBasePipe(pspec)
-
-        comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
-        comb += alu.p.valid_i.eq(1)
-        comb += alu.n.ready_i.eq(1)
-        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
-        sim = Simulator(m)
-
-        sim.add_clock(1e-6)
-        def process():
-            for test in self.test_data:
-                print(test.name)
-                program = test.program
-                self.subTest(test.name)
-                simulator = ISA(pdecode2, test.regs, test.sprs, 0)
-                gen = program.generate_instructions()
-                instructions = list(zip(gen, program.assembly.splitlines()))
-
-                index = simulator.pc.CIA.value//4
-                while index < len(instructions):
-                    ins, code = instructions[index]
-
-                    print("0x{:X}".format(ins & 0xffffffff))
-                    print(code)
-
-                    # ask the decoder to decode this binary data (endian'd)
-                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
-                    yield instruction.eq(ins)          # raw binary instr.
-                    yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
-                    self.assertEqual(fn_unit, Function.LOGICAL.value, code)
-                    yield from set_alu_inputs(alu, pdecode2, simulator)
-                    yield from set_extra_alu_inputs(alu, pdecode2, simulator)
-                    yield 
-                    opname = code.split(' ')[0]
-                    yield from simulator.call(opname)
-                    index = simulator.pc.CIA.value//4
-
-                    vld = yield alu.n.valid_o
-                    while not vld:
-                        yield
-                        vld = yield alu.n.valid_o
-                    yield
-                    alu_out = yield alu.n.data_o.o
-                    out_reg_valid = yield pdecode2.e.write_reg.ok
-                    if out_reg_valid:
-                        write_reg_idx = yield pdecode2.e.write_reg.data
-                        expected = simulator.gpr(write_reg_idx).value
-                        print(f"expected {expected:x}, actual: {alu_out:x}")
-                        self.assertEqual(expected, alu_out, code)
-                    yield from self.check_extra_alu_outputs(alu, pdecode2,
-                                                            simulator)
-
-        sim.add_sync_process(process)
-        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
-                            traces=[]):
-            sim.run()
-    def check_extra_alu_outputs(self, alu, dec2, sim):
-        rc = yield dec2.e.rc.data
-        if rc:
-            cr_expected = sim.crl[0].get_range().value
-            cr_actual = yield alu.n.data_o.cr0
-            self.assertEqual(cr_expected, cr_actual)
-
-
-if __name__ == "__main__":
-    unittest.main(exit=False)
-    suite = unittest.TestSuite()
-    suite.addTest(TestRunner(test_data))
-
-    runner = unittest.TextTestRunner()
-    runner.run(suite)
diff --git a/src/soc/pipe/shift_rot/formal/.gitignore b/src/soc/pipe/shift_rot/formal/.gitignore
deleted file mode 100644 (file)
index 150f68c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-*/*
diff --git a/src/soc/pipe/shift_rot/formal/proof_main_stage.py b/src/soc/pipe/shift_rot/formal/proof_main_stage.py
deleted file mode 100644 (file)
index 50264d5..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
-                    signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.shift_rot.main_stage import ShiftRotMainStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
-    def __init__(self):
-        # inputs and outputs
-        pass
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        rec = CompALUOpSubset()
-        recwidth = 0
-        # Setup random inputs for dut.op
-        for p in rec.ports():
-            width = p.width
-            recwidth += width
-            comb += p.eq(AnyConst(width))
-
-        pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
-        m.submodules.dut = dut = ShiftRotMainStage(pspec)
-
-        # convenience variables
-        a = dut.i.rs
-        b = dut.i.rb
-        ra = dut.i.ra
-        carry_in = dut.i.carry_in
-        so_in = dut.i.so
-        carry_out = dut.o.carry_out
-        o = dut.o.o
-
-        # setup random inputs
-        comb += [a.eq(AnyConst(64)),
-                 b.eq(AnyConst(64)),
-                 carry_in.eq(AnyConst(1)),
-                 so_in.eq(AnyConst(1))]
-
-        comb += dut.i.ctx.op.eq(rec)
-
-        # Assert that op gets copied from the input to output
-        for rec_sig in rec.ports():
-            name = rec_sig.name
-            dut_sig = getattr(dut.o.ctx.op, name)
-            comb += Assert(dut_sig == rec_sig)
-
-        # signed and signed/32 versions of input a
-        a_signed = Signal(signed(64))
-        a_signed_32 = Signal(signed(32))
-        comb += a_signed.eq(a)
-        comb += a_signed_32.eq(a[0:32])
-
-        # main assertion of arithmetic operations
-        with m.Switch(rec.insn_type):
-            with m.Case(InternalOp.OP_SHL):
-                comb += Assume(ra == 0)
-                with m.If(rec.is_32bit):
-                    comb += Assert(o[0:32] == ((a << b[0:6]) & 0xffffffff))
-                    comb += Assert(o[32:64] == 0)
-                with m.Else():
-                    comb += Assert(o == ((a << b[0:7]) & ((1 << 64)-1)))
-            with m.Case(InternalOp.OP_SHR):
-                comb += Assume(ra == 0)
-                with m.If(~rec.is_signed):
-                    with m.If(rec.is_32bit):
-                        comb += Assert(o[0:32] == (a[0:32] >> b[0:6]))
-                        comb += Assert(o[32:64] == 0)
-                    with m.Else():
-                        comb += Assert(o == (a >> b[0:7]))
-                with m.Else():
-                    with m.If(rec.is_32bit):
-                        comb += Assert(o[0:32] == (a_signed_32 >> b[0:6]))
-                        comb += Assert(o[32:64] == Repl(a[31], 32))
-                    with m.Else():
-                        comb += Assert(o == (a_signed >> b[0:7]))
-
-        return m
-
-
-class ALUTestCase(FHDLTestCase):
-    def test_formal(self):
-        module = Driver()
-        self.assertFormal(module, mode="bmc", depth=2)
-        self.assertFormal(module, mode="cover", depth=2)
-    def test_ilang(self):
-        dut = Driver()
-        vl = rtlil.convert(dut, ports=[])
-        with open("main_stage.il", "w") as f:
-            f.write(vl)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/src/soc/pipe/shift_rot/input_stage.py b/src/soc/pipe/shift_rot/input_stage.py
deleted file mode 100644 (file)
index 72e4c92..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-# This stage is intended to adjust the input data before sending it to
-# the acutal ALU. Things like handling inverting the input, carry_in
-# generation for subtraction, and handling of immediates should happen
-# here
-from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
-                    unsigned)
-from nmutil.pipemodbase import PipeModBase
-from soc.decoder.power_enums import InternalOp
-from soc.shift_rot.pipe_data import ShiftRotInputData
-from soc.decoder.power_enums import CryIn
-
-
-class ShiftRotInputStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "input")
-
-    def ispec(self):
-        return ShiftRotInputData(self.pspec)
-
-    def ospec(self):
-        return ShiftRotInputData(self.pspec)
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        ##### operand A #####
-
-        # operand a to be as-is or inverted
-        a = Signal.like(self.i.ra)
-
-        with m.If(self.i.ctx.op.invert_a):
-            comb += a.eq(~self.i.ra)
-        with m.Else():
-            comb += a.eq(self.i.ra)
-
-        comb += self.o.ra.eq(a)
-        comb += self.o.rb.eq(self.i.rb)
-        comb += self.o.rs.eq(self.i.rs)
-
-
-        ##### carry-in #####
-
-        # either copy incoming carry or set to 1/0 as defined by op
-        with m.Switch(self.i.ctx.op.input_carry):
-            with m.Case(CryIn.ZERO):
-                comb += self.o.carry_in.eq(0)
-            with m.Case(CryIn.ONE):
-                comb += self.o.carry_in.eq(1)
-            with m.Case(CryIn.CA):
-                comb += self.o.carry_in.eq(self.i.carry_in)
-
-        ##### sticky overflow and context (both pass-through) #####
-
-        comb += self.o.so.eq(self.i.so)
-        comb += self.o.ctx.eq(self.i.ctx)
-
-        return m
diff --git a/src/soc/pipe/shift_rot/main_stage.py b/src/soc/pipe/shift_rot/main_stage.py
deleted file mode 100644 (file)
index f237528..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-# This stage is intended to do most of the work of executing shift
-# instructions, as well as carry and overflow generation. This module
-# however should not gate the carry or overflow, that's up to the
-# output stage
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
-from nmutil.pipemodbase import PipeModBase
-from soc.alu.pipe_data import ALUOutputData
-from soc.shift_rot.pipe_data import ShiftRotInputData
-from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-from soc.shift_rot.rotator import Rotator
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
-
-
-class ShiftRotMainStage(PipeModBase):
-    def __init__(self, pspec):
-        super().__init__(pspec, "main")
-        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
-        self.fields.create_specs()
-
-    def ispec(self):
-        return ShiftRotInputData(self.pspec)
-
-    def ospec(self):
-        return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        # obtain me and mb fields from instruction.
-        m_fields = self.fields.instrs['M']
-        md_fields = self.fields.instrs['MD']
-        mb = Signal(m_fields['MB'][0:-1].shape())
-        me = Signal(m_fields['ME'][0:-1].shape())
-        mb_extra = Signal(1, reset_less=True)
-        comb += mb.eq(m_fields['MB'][0:-1])
-        comb += me.eq(m_fields['ME'][0:-1])
-        comb += mb_extra.eq(md_fields['mb'][0:-1][0])
-
-        # set up microwatt rotator module
-        m.submodules.rotator = rotator = Rotator()
-        comb += [
-            rotator.me.eq(me),
-            rotator.mb.eq(mb),
-            rotator.mb_extra.eq(mb_extra),
-            rotator.rs.eq(self.i.rs),
-            rotator.ra.eq(self.i.ra),
-            rotator.shift.eq(self.i.rb),
-            rotator.is_32bit.eq(self.i.ctx.op.is_32bit),
-            rotator.arith.eq(self.i.ctx.op.is_signed),
-        ]
-
-        # instruction rotate type
-        mode = Signal(3, reset_less=True)
-        with m.Switch(self.i.ctx.op.insn_type):
-            with m.Case(InternalOp.OP_SHL):  comb += mode.eq(0b000)
-            with m.Case(InternalOp.OP_SHR):  comb += mode.eq(0b001) # R-shift
-            with m.Case(InternalOp.OP_RLC):  comb += mode.eq(0b110) # clear LR
-            with m.Case(InternalOp.OP_RLCL): comb += mode.eq(0b010) # clear L
-            with m.Case(InternalOp.OP_RLCR): comb += mode.eq(0b100) # clear R
-
-        comb += Cat(rotator.right_shift,
-                    rotator.clear_left,
-                    rotator.clear_right).eq(mode)
-                
-        # outputs from the microwatt rotator module
-        comb += [self.o.o.eq(rotator.result_o),
-                 self.o.carry_out.eq(rotator.carry_out_o)]
-
-        ###### sticky overflow and context, both pass-through #####
-
-        comb += self.o.so.eq(self.i.so)
-        comb += self.o.ctx.eq(self.i.ctx)
-
-        return m
diff --git a/src/soc/pipe/shift_rot/maskgen.py b/src/soc/pipe/shift_rot/maskgen.py
deleted file mode 100644 (file)
index 89246e0..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-from nmigen import (Elaboratable, Signal, Module)
-import math
-
-class MaskGen(Elaboratable):
-    """MaskGen - create a diff mask
-
-    example: x=5 --> a=0b11111
-             y=3 --> b=0b00111
-             o:        0b11000
-             x=2 --> a=0b00011
-             y=4 --> b=0b01111
-             o:        0b10011
-    """
-    def __init__(self, width):
-        self.width = width
-        self.shiftwidth = math.ceil(math.log2(width))
-        self.mb = Signal(self.shiftwidth, reset_less=True)
-        self.me = Signal(self.shiftwidth, reset_less=True)
-
-        self.o = Signal(width, reset_less=True)
-
-    def elaborate(self, platform):
-        m = Module()
-        comb = m.d.comb
-
-        x = Signal.like(self.mb)
-        y = Signal.like(self.mb)
-
-        comb += x.eq(64 - self.mb)
-        comb += y.eq(63 - self.me)
-
-        mask_a = Signal.like(self.o)
-        mask_b = Signal.like(self.o)
-
-        comb += mask_a.eq((1<<x) - 1)
-        comb += mask_b.eq((1<<y) - 1)
-
-        with m.If(x > y):
-            comb += self.o.eq(mask_a ^ mask_b)
-        with m.Else():
-            comb += self.o.eq(mask_a ^ ~mask_b)
-            
-
-        return m
-
-    def ports(self):
-        return [self.mb, self.me, self.o]
diff --git a/src/soc/pipe/shift_rot/pipe_data.py b/src/soc/pipe/shift_rot/pipe_data.py
deleted file mode 100644 (file)
index 7f98d16..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-from nmigen import Signal, Const
-from nmutil.dynamicpipe import SimpleHandshakeRedir
-from soc.alu.alu_input_record import CompALUOpSubset
-from ieee754.fpcommon.getop import FPPipeContext
-from soc.alu.pipe_data import IntegerData
-
-
-class ShiftRotInputData(IntegerData):
-    def __init__(self, pspec):
-        super().__init__(pspec)
-        self.ra = Signal(64, reset_less=True) # RA
-        self.rs = Signal(64, reset_less=True) # RS
-        self.rb = Signal(64, reset_less=True) # RB/immediate
-        self.so = Signal(reset_less=True)
-        self.carry_in = Signal(reset_less=True)
-
-    def __iter__(self):
-        yield from super().__iter__()
-        yield self.ra
-        yield self.rs
-        yield self.rb
-        yield self.carry_in
-        yield self.so
-
-    def eq(self, i):
-        lst = super().eq(i)
-        return lst + [self.rs.eq(i.rs), self.ra.eq(i.ra),
-                      self.rb.eq(i.rb),
-                      self.carry_in.eq(i.carry_in),
-                      self.so.eq(i.so)]
diff --git a/src/soc/pipe/shift_rot/pipeline.py b/src/soc/pipe/shift_rot/pipeline.py
deleted file mode 100644 (file)
index 1080aa8..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.shift_rot.input_stage import ShiftRotInputStage
-from soc.shift_rot.main_stage import ShiftRotMainStage
-from soc.alu.output_stage import ALUOutputStage
-
-class ShiftRotStages(PipeModBaseChain):
-    def get_chain(self):
-        inp = ShiftRotInputStage(self.pspec)
-        main = ShiftRotMainStage(self.pspec)
-        out = ALUOutputStage(self.pspec)
-        return [inp, main, out]
-
-
-class ShiftRotBasePipe(ControlBase):
-    def __init__(self, pspec):
-        ControlBase.__init__(self)
-        self.pipe1 = ShiftRotStages(pspec)
-        self._eqs = self.connect([self.pipe1])
-
-    def elaborate(self, platform):
-        m = ControlBase.elaborate(self, platform)
-        m.submodules.pipe = self.pipe1
-        m.d.comb += self._eqs
-        return m
diff --git a/src/soc/pipe/shift_rot/rotator.py b/src/soc/pipe/shift_rot/rotator.py
deleted file mode 100644 (file)
index 23aa0e4..0000000
+++ /dev/null
@@ -1,156 +0,0 @@
-# Manual translation and adaptation of rotator.vhdl from microwatt into nmigen
-#
-
-from nmigen import (Elaboratable, Signal, Module, Const, Cat,
-                    unsigned, signed)
-from soc.shift_rot.rotl import ROTL
-
-# note BE bit numbering
-def right_mask(m, mask_begin):
-    ret = Signal(64, name="right_mask", reset_less=True)
-    with m.If(mask_begin <= 64):
-        m.d.comb += ret.eq((1<<(64-mask_begin)) - 1)
-    return ret
-
-def left_mask(m, mask_end):
-    ret = Signal(64, name="left_mask", reset_less=True)
-    m.d.comb += ret.eq(~((1<<(63-mask_end)) - 1))
-    return ret
-
-
-class Rotator(Elaboratable):
-    """Rotator: covers multiple POWER9 rotate functions
-
-        supported modes:
-
-        * sl[wd]
-        * rlw*, rldic, rldicr, rldimi
-        * rldicl, sr[wd]
-        * sra[wd][i]
-
-        use as follows:
-
-        * shift = RB[0:7]
-        * arith = 1 when is_signed
-        * right_shift = 1 when insn_type is OP_SHR
-        * clear_left = 1 when insn_type is OP_RLC or OP_RLCL
-        * clear_right = 1 when insn_type is OP_RLC or OP_RLCR
-    """
-    def __init__(self):
-        # input
-     &n