From: Luke Kenneth Casson Leighton Date: Fri, 22 May 2020 10:10:35 +0000 (+0100) Subject: cookie-cut start on div pipe X-Git-Tag: div_pipeline~950 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=41f1b32672558e7725e0dd63e7a6bdf5e1752080;p=soc.git cookie-cut start on div pipe --- diff --git a/src/soc/fu/div/__init__.py b/src/soc/fu/div/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/soc/fu/div/formal/proof_main_stage.py b/src/soc/fu/div/formal/proof_main_stage.py new file mode 100644 index 00000000..456ff815 --- /dev/null +++ b/src/soc/fu/div/formal/proof_main_stage.py @@ -0,0 +1,161 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan +""" +Links: + * https://bugs.libre-soc.org/show_bug.cgi?id=331 + * https://libre-soc.org/openpower/isa/fixedlogical/ +""" + +from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, + signed) +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.lib.coding import PriorityEncoder +from nmigen.cli import rtlil + +from soc.fu.logical.main_stage import LogicalMainStage +from soc.fu.alu.pipe_data import ALUPipeSpec +from soc.fu.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def popcount(self, sig, width): + result = 0 + for i in range(width): + result = result + sig[i] + return result + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = LogicalMainStage(pspec) + + # convenience variables + a = dut.i.a + b = dut.i.b + carry_in = dut.i.xer_ca[0] + carry_in32 = dut.i.xer_ca[1] + so_in = dut.i.xer_so + o = dut.o.o + + # setup random inputs + comb += [a.eq(AnyConst(64)), + b.eq(AnyConst(64)), + carry_in.eq(AnyConst(0b11)), + so_in.eq(AnyConst(1))] + + comb += dut.i.ctx.op.eq(rec) + + # Assert that op gets copied from the input to output + for rec_sig in rec.ports(): + name = rec_sig.name + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + # signed and signed/32 versions of input a + a_signed = Signal(signed(64)) + a_signed_32 = Signal(signed(32)) + comb += a_signed.eq(a) + comb += a_signed_32.eq(a[0:32]) + + # main assertion of arithmetic operations + with m.Switch(rec.insn_type): + with m.Case(InternalOp.OP_AND): + comb += Assert(dut.o.o == a & b) + with m.Case(InternalOp.OP_OR): + comb += Assert(dut.o.o == a | b) + with m.Case(InternalOp.OP_XOR): + comb += Assert(dut.o.o == a ^ b) + + with m.Case(InternalOp.OP_POPCNT): + with m.If(rec.data_len == 8): + comb += Assert(dut.o.o == self.popcount(a, 64)) + with m.If(rec.data_len == 4): + + for i in range(2): + comb += Assert(dut.o.o[i*32:(i+1)*32] == + self.popcount(a[i*32:(i+1)*32], 32)) + with m.If(rec.data_len == 1): + for i in range(8): + comb += Assert(dut.o.o[i*8:(i+1)*8] == + self.popcount(a[i*8:(i+1)*8], 8)) + + with m.Case(InternalOp.OP_PRTY): + with m.If(rec.data_len == 8): + result = 0 + for i in range(8): + result = result ^ a[i*8] + comb += Assert(dut.o.o == result) + with m.If(rec.data_len == 4): + result_low = 0 + result_high = 0 + for i in range(4): + result_low = result_low ^ a[i*8] + result_high = result_high ^ a[i*8 + 32] + comb += Assert(dut.o.o[0:32] == result_low) + comb += Assert(dut.o.o[32:64] == result_high) + with m.Case(InternalOp.OP_CNTZ): + XO = dut.fields.FormX.XO[0:-1] + with m.If(rec.is_32bit): + m.submodules.pe32 = pe32 = PriorityEncoder(32) + peo = Signal(range(0, 32+1)) + with m.If(pe32.n): + comb += peo.eq(32) + with m.Else(): + comb += peo.eq(pe32.o) + with m.If(XO[-1]): # cnttzw + comb += pe32.i.eq(a[0:32]) + comb += Assert(dut.o.o == peo) + with m.Else(): # cntlzw + comb += pe32.i.eq(a[0:32][::-1]) + comb += Assert(dut.o.o == peo) + with m.Else(): + m.submodules.pe64 = pe64 = PriorityEncoder(64) + peo64 = Signal(7) + with m.If(pe64.n): + comb += peo64.eq(64) + with m.Else(): + comb += peo64.eq(pe64.o) + with m.If(XO[-1]): # cnttzd + comb += pe64.i.eq(a[0:64]) + comb += Assert(dut.o.o == peo64) + with m.Else(): # cntlzd + comb += pe64.i.eq(a[0:64][::-1]) + comb += Assert(dut.o.o == peo64) + + + return m + + +class LogicalTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=2) + self.assertFormal(module, mode="cover", depth=2) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("main_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/div/main_stage.py b/src/soc/fu/div/main_stage.py new file mode 100644 index 00000000..5e5a8cdb --- /dev/null +++ b/src/soc/fu/div/main_stage.py @@ -0,0 +1,58 @@ +# This stage is intended to do most of the work of executing DIV +# This module however should not gate the carry or overflow, that's up +# to the output stage + +from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) +from nmutil.pipemodbase import PipeModBase +from soc.fu.logical.pipe_data import LogicalInputData +from soc.fu.alu.pipe_data import ALUOutputData +from ieee754.part.partsig import PartitionedSignal +from soc.decoder.power_enums import InternalOp + +from soc.decoder.power_fields import DecodeFields +from soc.decoder.power_fieldsn import SignalBitRange + + +class DivMainStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "main") + self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) + self.fields.create_specs() + + def ispec(self): + return LogicalInputData(self.pspec) + + def ospec(self): + return ALUOutputData(self.pspec) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o + + ########################## + # main switch for DIV + + with m.Switch(op.insn_type): + + ###### AND, OR, XOR ####### + with m.Case(InternalOp.OP_AND): + comb += o.eq(a & b) + with m.Case(InternalOp.OP_OR): + comb += o.eq(a | b) + with m.Case(InternalOp.OP_XOR): + comb += o.eq(a ^ b) + + ###### bpermd ####### + with m.Case(InternalOp.OP_BPERM): + m.submodules.bpermd = bpermd = Bpermd(64) + comb += bpermd.rs.eq(a) + comb += bpermd.rb.eq(b) + comb += o.eq(bpermd.ra) + + ###### sticky overflow and context, both pass-through ##### + + comb += self.o.xer_so.data.eq(self.i.xer_so) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/div/pipe_data.py b/src/soc/fu/div/pipe_data.py new file mode 100644 index 00000000..e634a505 --- /dev/null +++ b/src/soc/fu/div/pipe_data.py @@ -0,0 +1,10 @@ +from nmigen import Signal, Const +from soc.fu.pipe_data import IntegerData +from soc.fu.alu.pipe_data import ALUOutputData, CommonPipeSpec +from soc.fu.logical.pipe_data import LogicalInputData +from soc.fu.logical.logical_input_record import CompLogicalOpSubset + + +class DivPipeSpec(CommonPipeSpec): + regspec = (LogicalInputData.regspec, ALUOutputData.regspec) + opsubsetkls = CompLogicalOpSubset diff --git a/src/soc/fu/div/pipeline.py b/src/soc/fu/div/pipeline.py new file mode 100644 index 00000000..3bde19bd --- /dev/null +++ b/src/soc/fu/div/pipeline.py @@ -0,0 +1,33 @@ +from nmutil.singlepipe import ControlBase +from nmutil.pipemodbase import PipeModBaseChain +from soc.fu.alu.input_stage import ALUInputStage +from soc.fu.logical.main_stage import LogicalMainStage +from soc.fu.alu.output_stage import ALUOutputStage + + +class DivStagesStart(PipeModBaseChain): + def get_chain(self): + inp = ALUInputStage(self.pspec) + main = DivMainStage1(self.pspec) + return [inp, main, out] + +class DivStagesEnd(PipeModBaseChain): + def get_chain(self): + main = DivMainStage2(self.pspec) + out = ALUOutputStage(self.pspec) + return [inp, main, out] + + +class LogicalBasePipe(ControlBase): + def __init__(self, pspec): + ControlBase.__init__(self) + self.pipe1 = DivStagesStart(pspec) + self.pipe5 = DivStagesEnd(pspec) + self._eqs = self.connect([self.pipe1, self.pipe5]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.pipe1 = self.pipe1 + m.submodules.pipe5 = self.pipe5 + m.d.comb += self._eqs + return m diff --git a/src/soc/fu/div/test/test_pipe_caller.py b/src/soc/fu/div/test/test_pipe_caller.py new file mode 100644 index 00000000..073aad72 --- /dev/null +++ b/src/soc/fu/div/test/test_pipe_caller.py @@ -0,0 +1,268 @@ +from nmigen import Module, Signal +from nmigen.back.pysim import Simulator, Delay, Settle +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil +import unittest +from soc.decoder.isa.caller import ISACaller, special_sprs +from soc.decoder.power_decoder import (create_pdecode) +from soc.decoder.power_decoder2 import (PowerDecode2) +from soc.decoder.power_enums import (XER_bits, Function) +from soc.decoder.selectable_int import SelectableInt +from soc.simulator.program import Program +from soc.decoder.isa.all import ISA + +from soc.fu.logical.pipeline import LogicalBasePipe +from soc.fu.logical.pipe_data import LogicalPipeSpec +import random + + +class TestCase: + def __init__(self, program, regs, sprs, name): + self.program = program + self.regs = regs + self.sprs = sprs + self.name = name + + +def set_alu_inputs(alu, dec2, sim): + # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 + # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) + # and place it into data_i.b + + reg3_ok = yield dec2.e.read_reg3.ok + reg1_ok = yield dec2.e.read_reg1.ok + assert reg3_ok != reg1_ok + if reg3_ok: + data1 = yield dec2.e.read_reg3.data + data1 = sim.gpr(data1).value + elif reg1_ok: + data1 = yield dec2.e.read_reg1.data + data1 = sim.gpr(data1).value + else: + data1 = 0 + + yield alu.p.data_i.a.eq(data1) + + # If there's an immediate, set the B operand to that + reg2_ok = yield dec2.e.read_reg2.ok + imm_ok = yield dec2.e.imm_data.imm_ok + if imm_ok: + data2 = yield dec2.e.imm_data.imm + elif reg2_ok: + data2 = yield dec2.e.read_reg2.data + data2 = sim.gpr(data2).value + else: + data2 = 0 + yield alu.p.data_i.b.eq(data2) + + +def set_extra_alu_inputs(alu, dec2, sim): + carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 + carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0 + yield alu.p.data_i.xer_ca[0].eq(carry) + yield alu.p.data_i.xer_ca[1].eq(carry32) + so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 + yield alu.p.data_i.xer_so.eq(so) + + +# This test bench is a bit different than is usual. Initially when I +# was writing it, I had all of the tests call a function to create a +# device under test and simulator, initialize the dut, run the +# simulation for ~2 cycles, and assert that the dut output what it +# should have. However, this was really slow, since it needed to +# create and tear down the dut and simulator for every test case. + +# Now, instead of doing that, every test case in ALUTestCase puts some +# data into the test_data list below, describing the instructions to +# be tested and the initial state. Once all the tests have been run, +# test_data gets passed to TestRunner which then sets up the DUT and +# simulator once, runs all the data through it, and asserts that the +# results match the pseudocode sim at every cycle. + +# By doing this, I've reduced the time it takes to run the test suite +# massively. Before, it took around 1 minute on my computer, now it +# takes around 3 seconds + +test_data = [] + + +class LogicalTestCase(FHDLTestCase): + def __init__(self, name): + super().__init__(name) + self.test_name = name + + def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): + tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) + test_data.append(tc) + + def test_rand(self): + insns = ["and", "or", "xor"] + for i in range(40): + choice = random.choice(insns) + lst = [f"{choice} 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1 << 64)-1) + initial_regs[2] = random.randint(0, (1 << 64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_rand_imm_logical(self): + insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"] + for i in range(10): + choice = random.choice(insns) + imm = random.randint(0, (1 << 16)-1) + lst = [f"{choice} 3, 1, {imm}"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1 << 64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_cntz(self): + insns = ["cntlzd", "cnttzd", "cntlzw", "cnttzw"] + for i in range(100): + choice = random.choice(insns) + lst = [f"{choice} 3, 1"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1 << 64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_parity(self): + insns = ["prtyw", "prtyd"] + for i in range(10): + choice = random.choice(insns) + lst = [f"{choice} 3, 1"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1 << 64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_popcnt(self): + insns = ["popcntb", "popcntw", "popcntd"] + for i in range(10): + choice = random.choice(insns) + lst = [f"{choice} 3, 1"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1 << 64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_popcnt_edge(self): + insns = ["popcntb", "popcntw", "popcntd"] + for choice in insns: + lst = [f"{choice} 3, 1"] + initial_regs = [0] * 32 + initial_regs[1] = -1 + self.run_tst_program(Program(lst), initial_regs) + + def test_cmpb(self): + lst = ["cmpb 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = 0xdeadbeefcafec0de + initial_regs[2] = 0xd0adb0000afec1de + self.run_tst_program(Program(lst), initial_regs) + + def test_bpermd(self): + lst = ["bpermd 3, 1, 2"] + for i in range(20): + initial_regs = [0] * 32 + initial_regs[1] = 1<