--- /dev/null
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+"""
+Links:
+ * https://bugs.libre-soc.org/show_bug.cgi?id=331
+ * https://libre-soc.org/openpower/isa/fixedlogical/
+"""
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+ signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.lib.coding import PriorityEncoder
+from nmigen.cli import rtlil
+
+from soc.fu.logical.main_stage import LogicalMainStage
+from soc.fu.alu.pipe_data import ALUPipeSpec
+from soc.fu.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+ def __init__(self):
+ # inputs and outputs
+ pass
+
+ def popcount(self, sig, width):
+ result = 0
+ for i in range(width):
+ result = result + sig[i]
+ return result
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ rec = CompALUOpSubset()
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ comb += p.eq(AnyConst(width))
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+ m.submodules.dut = dut = LogicalMainStage(pspec)
+
+ # convenience variables
+ a = dut.i.a
+ b = dut.i.b
+ carry_in = dut.i.xer_ca[0]
+ carry_in32 = dut.i.xer_ca[1]
+ so_in = dut.i.xer_so
+ o = dut.o.o
+
+ # setup random inputs
+ comb += [a.eq(AnyConst(64)),
+ b.eq(AnyConst(64)),
+ carry_in.eq(AnyConst(0b11)),
+ so_in.eq(AnyConst(1))]
+
+ comb += dut.i.ctx.op.eq(rec)
+
+ # Assert that op gets copied from the input to output
+ for rec_sig in rec.ports():
+ name = rec_sig.name
+ dut_sig = getattr(dut.o.ctx.op, name)
+ comb += Assert(dut_sig == rec_sig)
+
+ # signed and signed/32 versions of input a
+ a_signed = Signal(signed(64))
+ a_signed_32 = Signal(signed(32))
+ comb += a_signed.eq(a)
+ comb += a_signed_32.eq(a[0:32])
+
+ # main assertion of arithmetic operations
+ with m.Switch(rec.insn_type):
+ with m.Case(InternalOp.OP_AND):
+ comb += Assert(dut.o.o == a & b)
+ with m.Case(InternalOp.OP_OR):
+ comb += Assert(dut.o.o == a | b)
+ with m.Case(InternalOp.OP_XOR):
+ comb += Assert(dut.o.o == a ^ b)
+
+ with m.Case(InternalOp.OP_POPCNT):
+ with m.If(rec.data_len == 8):
+ comb += Assert(dut.o.o == self.popcount(a, 64))
+ with m.If(rec.data_len == 4):
+
+ for i in range(2):
+ comb += Assert(dut.o.o[i*32:(i+1)*32] ==
+ self.popcount(a[i*32:(i+1)*32], 32))
+ with m.If(rec.data_len == 1):
+ for i in range(8):
+ comb += Assert(dut.o.o[i*8:(i+1)*8] ==
+ self.popcount(a[i*8:(i+1)*8], 8))
+
+ with m.Case(InternalOp.OP_PRTY):
+ with m.If(rec.data_len == 8):
+ result = 0
+ for i in range(8):
+ result = result ^ a[i*8]
+ comb += Assert(dut.o.o == result)
+ with m.If(rec.data_len == 4):
+ result_low = 0
+ result_high = 0
+ for i in range(4):
+ result_low = result_low ^ a[i*8]
+ result_high = result_high ^ a[i*8 + 32]
+ comb += Assert(dut.o.o[0:32] == result_low)
+ comb += Assert(dut.o.o[32:64] == result_high)
+ with m.Case(InternalOp.OP_CNTZ):
+ XO = dut.fields.FormX.XO[0:-1]
+ with m.If(rec.is_32bit):
+ m.submodules.pe32 = pe32 = PriorityEncoder(32)
+ peo = Signal(range(0, 32+1))
+ with m.If(pe32.n):
+ comb += peo.eq(32)
+ with m.Else():
+ comb += peo.eq(pe32.o)
+ with m.If(XO[-1]): # cnttzw
+ comb += pe32.i.eq(a[0:32])
+ comb += Assert(dut.o.o == peo)
+ with m.Else(): # cntlzw
+ comb += pe32.i.eq(a[0:32][::-1])
+ comb += Assert(dut.o.o == peo)
+ with m.Else():
+ m.submodules.pe64 = pe64 = PriorityEncoder(64)
+ peo64 = Signal(7)
+ with m.If(pe64.n):
+ comb += peo64.eq(64)
+ with m.Else():
+ comb += peo64.eq(pe64.o)
+ with m.If(XO[-1]): # cnttzd
+ comb += pe64.i.eq(a[0:64])
+ comb += Assert(dut.o.o == peo64)
+ with m.Else(): # cntlzd
+ comb += pe64.i.eq(a[0:64][::-1])
+ comb += Assert(dut.o.o == peo64)
+
+
+ return m
+
+
+class LogicalTestCase(FHDLTestCase):
+ def test_formal(self):
+ module = Driver()
+ self.assertFormal(module, mode="bmc", depth=2)
+ self.assertFormal(module, mode="cover", depth=2)
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("main_stage.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# This stage is intended to do most of the work of executing DIV
+# This module however should not gate the carry or overflow, that's up
+# to the output stage
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.logical.pipe_data import LogicalInputData
+from soc.fu.alu.pipe_data import ALUOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+
+
+class DivMainStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "main")
+ self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+ self.fields.create_specs()
+
+ def ispec(self):
+ return LogicalInputData(self.pspec)
+
+ def ospec(self):
+ return ALUOutputData(self.pspec)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o
+
+ ##########################
+ # main switch for DIV
+
+ with m.Switch(op.insn_type):
+
+ ###### AND, OR, XOR #######
+ with m.Case(InternalOp.OP_AND):
+ comb += o.eq(a & b)
+ with m.Case(InternalOp.OP_OR):
+ comb += o.eq(a | b)
+ with m.Case(InternalOp.OP_XOR):
+ comb += o.eq(a ^ b)
+
+ ###### bpermd #######
+ with m.Case(InternalOp.OP_BPERM):
+ m.submodules.bpermd = bpermd = Bpermd(64)
+ comb += bpermd.rs.eq(a)
+ comb += bpermd.rb.eq(b)
+ comb += o.eq(bpermd.ra)
+
+ ###### sticky overflow and context, both pass-through #####
+
+ comb += self.o.xer_so.data.eq(self.i.xer_so)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+from soc.fu.logical.pipeline import LogicalBasePipe
+from soc.fu.logical.pipe_data import LogicalPipeSpec
+import random
+
+
+class TestCase:
+ def __init__(self, program, regs, sprs, name):
+ self.program = program
+ self.regs = regs
+ self.sprs = sprs
+ self.name = name
+
+
+def set_alu_inputs(alu, dec2, sim):
+ # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+ # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
+ # and place it into data_i.b
+
+ reg3_ok = yield dec2.e.read_reg3.ok
+ reg1_ok = yield dec2.e.read_reg1.ok
+ assert reg3_ok != reg1_ok
+ if reg3_ok:
+ data1 = yield dec2.e.read_reg3.data
+ data1 = sim.gpr(data1).value
+ elif reg1_ok:
+ data1 = yield dec2.e.read_reg1.data
+ data1 = sim.gpr(data1).value
+ else:
+ data1 = 0
+
+ yield alu.p.data_i.a.eq(data1)
+
+ # If there's an immediate, set the B operand to that
+ reg2_ok = yield dec2.e.read_reg2.ok
+ imm_ok = yield dec2.e.imm_data.imm_ok
+ if imm_ok:
+ data2 = yield dec2.e.imm_data.imm
+ elif reg2_ok:
+ data2 = yield dec2.e.read_reg2.data
+ data2 = sim.gpr(data2).value
+ else:
+ data2 = 0
+ yield alu.p.data_i.b.eq(data2)
+
+
+def set_extra_alu_inputs(alu, dec2, sim):
+ carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
+ carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0
+ yield alu.p.data_i.xer_ca[0].eq(carry)
+ yield alu.p.data_i.xer_ca[1].eq(carry32)
+ so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+ yield alu.p.data_i.xer_so.eq(so)
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class LogicalTestCase(FHDLTestCase):
+ def __init__(self, name):
+ super().__init__(name)
+ self.test_name = name
+
+ def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
+ tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
+ test_data.append(tc)
+
+ def test_rand(self):
+ insns = ["and", "or", "xor"]
+ for i in range(40):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1, 2"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1 << 64)-1)
+ initial_regs[2] = random.randint(0, (1 << 64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_rand_imm_logical(self):
+ insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"]
+ for i in range(10):
+ choice = random.choice(insns)
+ imm = random.randint(0, (1 << 16)-1)
+ lst = [f"{choice} 3, 1, {imm}"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1 << 64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_cntz(self):
+ insns = ["cntlzd", "cnttzd", "cntlzw", "cnttzw"]
+ for i in range(100):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1 << 64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_parity(self):
+ insns = ["prtyw", "prtyd"]
+ for i in range(10):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1 << 64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_popcnt(self):
+ insns = ["popcntb", "popcntw", "popcntd"]
+ for i in range(10):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1 << 64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_popcnt_edge(self):
+ insns = ["popcntb", "popcntw", "popcntd"]
+ for choice in insns:
+ lst = [f"{choice} 3, 1"]
+ initial_regs = [0] * 32
+ initial_regs[1] = -1
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_cmpb(self):
+ lst = ["cmpb 3, 1, 2"]
+ initial_regs = [0] * 32
+ initial_regs[1] = 0xdeadbeefcafec0de
+ initial_regs[2] = 0xd0adb0000afec1de
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_bpermd(self):
+ lst = ["bpermd 3, 1, 2"]
+ for i in range(20):
+ initial_regs = [0] * 32
+ initial_regs[1] = 1<<random.randint(0,63)
+ initial_regs[2] = 0xdeadbeefcafec0de
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_ilang(self):
+ pspec = LogicalPipeSpec(id_wid=2)
+ alu = LogicalBasePipe(pspec)
+ vl = rtlil.convert(alu, ports=alu.ports())
+ with open("logical_pipeline.il", "w") as f:
+ f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+ def __init__(self, test_data):
+ super().__init__("run_all")
+ self.test_data = test_data
+
+ def run_all(self):
+ m = Module()
+ comb = m.d.comb
+ instruction = Signal(32)
+
+ pdecode = create_pdecode()
+
+ m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+ pspec = LogicalPipeSpec(id_wid=2)
+ m.submodules.alu = alu = LogicalBasePipe(pspec)
+
+ comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+ comb += alu.p.valid_i.eq(1)
+ comb += alu.n.ready_i.eq(1)
+ comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+ sim = Simulator(m)
+
+ sim.add_clock(1e-6)
+
+ def process():
+ for test in self.test_data:
+ print(test.name)
+ program = test.program
+ self.subTest(test.name)
+ simulator = ISA(pdecode2, test.regs, test.sprs, 0)
+ gen = program.generate_instructions()
+ instructions = list(zip(gen, program.assembly.splitlines()))
+
+ index = simulator.pc.CIA.value//4
+ while index < len(instructions):
+ ins, code = instructions[index]
+
+ print("0x{:X}".format(ins & 0xffffffff))
+ print(code)
+
+ # ask the decoder to decode this binary data (endian'd)
+ yield pdecode2.dec.bigendian.eq(0) # little / big?
+ yield instruction.eq(ins) # raw binary instr.
+ yield Settle()
+ fn_unit = yield pdecode2.e.fn_unit
+ self.assertEqual(fn_unit, Function.LOGICAL.value, code)
+ yield from set_alu_inputs(alu, pdecode2, simulator)
+ yield from set_extra_alu_inputs(alu, pdecode2, simulator)
+ yield
+ opname = code.split(' ')[0]
+ yield from simulator.call(opname)
+ index = simulator.pc.CIA.value//4
+
+ vld = yield alu.n.valid_o
+ while not vld:
+ yield
+ vld = yield alu.n.valid_o
+ yield
+ alu_out = yield alu.n.data_o.o
+ out_reg_valid = yield pdecode2.e.write_reg.ok
+ if out_reg_valid:
+ write_reg_idx = yield pdecode2.e.write_reg.data
+ expected = simulator.gpr(write_reg_idx).value
+ print(f"expected {expected:x}, actual: {alu_out:x}")
+ self.assertEqual(expected, alu_out, code)
+ yield from self.check_extra_alu_outputs(alu, pdecode2,
+ simulator, code)
+
+ sim.add_sync_process(process)
+ with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+ traces=[]):
+ sim.run()
+
+ def check_extra_alu_outputs(self, alu, dec2, sim, code):
+ rc = yield dec2.e.rc.data
+ if rc:
+ cr_expected = sim.crl[0].get_range().value
+ cr_actual = yield alu.n.data_o.cr0.data
+ self.assertEqual(cr_expected, cr_actual, code)
+
+
+if __name__ == "__main__":
+ unittest.main(exit=False)
+ suite = unittest.TestSuite()
+ suite.addTest(TestRunner(test_data))
+
+ runner = unittest.TextTestRunner()
+ runner.run(suite)