From: Luke Kenneth Casson Leighton Date: Mon, 6 Jul 2020 16:02:09 +0000 (+0100) Subject: first cut at mul test pipeline X-Git-Tag: div_pipeline~162^2~31 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2acd91f1ee5412a0b47609cdad2356211987516e;p=soc.git first cut at mul test pipeline --- diff --git a/src/soc/fu/div/setup_stage.py b/src/soc/fu/div/setup_stage.py index a0ea42ed..9b0455be 100644 --- a/src/soc/fu/div/setup_stage.py +++ b/src/soc/fu/div/setup_stage.py @@ -12,10 +12,7 @@ from soc.decoder.power_fields import DecodeFields from soc.decoder.power_fieldsn import SignalBitRange from soc.fu.div.pipe_data import CoreInputData from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation - -def eq32(is_32bit, dest, src): - return [dest[0:32].eq(src[0:32]), - dest[32:64].eq(Mux(is_32bit, 0, src[32:64]))] +from nmutil.util import eq32 class DivSetupStage(PipeModBase): diff --git a/src/soc/fu/mul/main_stage.py b/src/soc/fu/mul/main_stage.py index 97ba81d7..ccdd0d35 100644 --- a/src/soc/fu/mul/main_stage.py +++ b/src/soc/fu/mul/main_stage.py @@ -28,9 +28,10 @@ class MulMainStage2(PipeModBase): ###### xer and context, all pass-through ##### - comb += self.o.xer_ca.data.eq(self.i.xer_ca) - comb += self.o.neg_res.data.eq(self.i.neg_res) - comb += self.o.xer_so.data.eq(self.i.xer_so) + comb += self.o.xer_ca.eq(self.i.xer_ca) + comb += self.o.neg_res.eq(self.i.neg_res) + comb += self.o.neg_res32.eq(self.i.neg_res32) + comb += self.o.xer_so.eq(self.i.xer_so) comb += self.o.ctx.eq(self.i.ctx) return m diff --git a/src/soc/fu/mul/pipe_data.py b/src/soc/fu/mul/pipe_data.py index 429be008..1d047bb8 100644 --- a/src/soc/fu/mul/pipe_data.py +++ b/src/soc/fu/mul/pipe_data.py @@ -1,14 +1,17 @@ from soc.fu.alu.alu_input_record import CompALUOpSubset from soc.fu.pipe_data import IntegerData, CommonPipeSpec from soc.fu.alu.pipe_data import ALUOutputData, ALUInputData +from nmigen import Signal class MulIntermediateData(ALUInputData): def __init__(self, pspec): super().__init__(pspec) - neg_result = Signal(reset_less=True) - self.data.append(neg_result) + self.neg_res = Signal(reset_less=True) + self.neg_res32 = Signal(reset_less=True) + self.data.append(self.neg_res) + self.data.append(self.neg_res32) class MulOutputData(IntegerData): @@ -18,8 +21,10 @@ class MulOutputData(IntegerData): def __init__(self, pspec): super().__init__(pspec, False) - neg_result = Signal(reset_less=True) - self.data.append(neg_result) + self.neg_res = Signal(reset_less=True) + self.neg_res32 = Signal(reset_less=True) + self.data.append(self.neg_res) + self.data.append(self.neg_res32) class MulPipeSpec(CommonPipeSpec): diff --git a/src/soc/fu/mul/pipeline.py b/src/soc/fu/mul/pipeline.py index d32d7529..a557c90e 100644 --- a/src/soc/fu/mul/pipeline.py +++ b/src/soc/fu/mul/pipeline.py @@ -1,9 +1,10 @@ from nmutil.singlepipe import ControlBase from nmutil.pipemodbase import PipeModBaseChain -from soc.fu.shift_rot.input_stage import ShiftRotInputStage -from soc.fu.shift_rot.main_stage import ShiftRotMainStage +from soc.fu.alu.input_stage import ALUInputStage from soc.fu.alu.output_stage import ALUOutputStage -from soc.fu.mul.main_stage import MulMainStage1, MulMainStage2, MulMainStage3 +from soc.fu.mul.pre_stage import MulMainStage1 +from soc.fu.mul.main_stage import MulMainStage2 +from soc.fu.mul.post_stage import MulMainStage3 class MulStages1(PipeModBaseChain): @@ -26,17 +27,19 @@ class MulStages3(PipeModBaseChain): return [main3, out] -class ShiftRotBasePipe(ControlBase): +class MulBasePipe(ControlBase): def __init__(self, pspec): ControlBase.__init__(self) self.pspec = pspec self.pipe1 = MulStages1(pspec) self.pipe2 = MulStages2(pspec) - self.pipe2 = MulStages3(pspec) - self._eqs = self.connect([self.pipe1, self.pipe2]) + self.pipe3 = MulStages3(pspec) + self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3]) def elaborate(self, platform): m = ControlBase.elaborate(self, platform) - m.submodules.pipe = self.pipe1 + m.submodules.mul_pipe1 = self.pipe1 + m.submodules.mul_pipe2 = self.pipe2 + m.submodules.mul_pipe3 = self.pipe3 m.d.comb += self._eqs return m diff --git a/src/soc/fu/mul/post_stage.py b/src/soc/fu/mul/post_stage.py index 501b4ed5..f2464085 100644 --- a/src/soc/fu/mul/post_stage.py +++ b/src/soc/fu/mul/post_stage.py @@ -32,8 +32,8 @@ class MulMainStage3(PipeModBase): comb += is_32bit.eq(op.is_32bit) # check negate: select signed/unsigned - o_s = Signal(signed(o.width * 2), reset_less=True) - mul_o = Signal(o.width * 2, reset_less=True) + o_s = Signal(signed(o.data.width * 2), reset_less=True) + mul_o = Signal(o.data.width * 2, reset_less=True) comb += o_s.eq(-o_i) comb += mul_o.eq(Mux(self.i.neg_res, o_s, o_i)) comb += o.ok.eq(1) @@ -67,8 +67,8 @@ class MulMainStage3(PipeModBase): # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5 ca = Signal(2, reset_less=True) - comb += ca[0].eq(add_o[-1]) # XER.CA - comb += ca[1].eq(add_o[33] ^ (a[32] ^ b[32])) # XER.CA32 + comb += ca[0].eq(mul_o[-1]) # XER.CA + comb += ca[1].eq(mul_o[33] ^ (self.i.neg_res32)) # XER.CA32 comb += cry_o.data.eq(ca) comb += cry_o.ok.eq(1) diff --git a/src/soc/fu/mul/pre_stage.py b/src/soc/fu/mul/pre_stage.py index ff1e3220..3ce2f933 100644 --- a/src/soc/fu/mul/pre_stage.py +++ b/src/soc/fu/mul/pre_stage.py @@ -4,7 +4,7 @@ from nmutil.pipemodbase import PipeModBase from soc.fu.alu.pipe_data import ALUInputData from soc.fu.mul.pipe_data import MulIntermediateData from ieee754.part.partsig import PartitionedSignal - +from nmutil.util import eq32 class MulMainStage1(PipeModBase): def __init__(self, pspec): @@ -21,32 +21,44 @@ class MulMainStage1(PipeModBase): comb = m.d.comb # convenience variables - a, b = self.i.a, self.i.b + a, b, op = self.i.a, self.i.b, self.i.ctx.op a_o, b_o, neg_res_o = self.o.a, self.o.b, self.o.neg_res + neg_res_o, neg_res32_o = self.o.neg_res, self.o.neg_res32 # check if op is 32-bit, and get sign bit from operand a is_32bit = Signal(reset_less=True) sign_a = Signal(reset_less=True) sign_b = Signal(reset_less=True) + sign32_a = Signal(reset_less=True) + sign32_b = Signal(reset_less=True) comb += is_32bit.eq(op.is_32bit) # work out if a/b are negative (check 32-bit / signed) comb += sign_a.eq(Mux(op.is_32bit, a[31], a[63]) & op.is_signed) comb += sign_b.eq(Mux(op.is_32bit, b[31], b[63]) & op.is_signed) + comb += sign32_a.eq(a[31] & op.is_signed) + comb += sign32_b.eq(b[31] & op.is_signed) # work out if result is negative sign comb += neg_res_o.eq(sign_a ^ sign_b) + comb += neg_res32_o.eq(sign32_a ^ sign32_b) # pass through for OV32 # negation of a 64-bit value produces the same lower 32-bit # result as negation of just the lower 32-bits, so we don't # need to do anything special before negating - comb += a_o.eq(Mux(sign_a, -a, a)) - comb += b_o.eq(Mux(sign_b, -b, b)) + abs_a = Signal(64, reset_less=True) + abs_b = Signal(64, reset_less=True) + comb += abs_a.eq(Mux(sign_a, -a, a)) + comb += abs_b.eq(Mux(sign_b, -b, b)) + + # set up 32/64 bit inputs + comb += eq32(is_32bit, a_o, abs_a) + comb += eq32(is_32bit, b_o, abs_b) ###### XER and context, both pass-through ##### - comb += self.o.xer_ca.data.eq(self.i.xer_ca) - comb += self.o.xer_so.data.eq(self.i.xer_so) + comb += self.o.xer_ca.eq(self.i.xer_ca) + comb += self.o.xer_so.eq(self.i.xer_so) comb += self.o.ctx.eq(self.i.ctx) return m diff --git a/src/soc/fu/mul/test/test_pipe_caller.py b/src/soc/fu/mul/test/test_pipe_caller.py index 88ac5499..5fa0779d 100644 --- a/src/soc/fu/mul/test/test_pipe_caller.py +++ b/src/soc/fu/mul/test/test_pipe_caller.py @@ -6,66 +6,46 @@ import unittest from soc.decoder.isa.caller import ISACaller, special_sprs from soc.decoder.power_decoder import (create_pdecode) from soc.decoder.power_decoder2 import (PowerDecode2) -from soc.decoder.power_enums import (XER_bits, Function) +from soc.decoder.power_enums import (XER_bits, Function, InternalOp, CryIn) from soc.decoder.selectable_int import SelectableInt from soc.simulator.program import Program from soc.decoder.isa.all import ISA + +from soc.fu.test.common import (TestCase, ALUHelpers) from soc.fu.mul.pipeline import MulBasePipe -from soc.fu.alu.alu_input_record import CompALUOpSubset from soc.fu.mul.pipe_data import MulPipeSpec import random -class TestCase: - def __init__(self, program, regs, sprs, name): - self.program = program - self.regs = regs - self.sprs = sprs - self.name = name +def get_cu_inputs(dec2, sim): + """naming (res) must conform to MulFunctionUnit input regspec + """ + res = {} + + yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA + yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB + yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca + yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so + + print ("alu get_cu_inputs", res) + + return res + def set_alu_inputs(alu, dec2, sim): - inputs = [] # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) # and place it into data_i.b - reg3_ok = yield dec2.e.read_reg3.ok - if reg3_ok: - reg3_sel = yield dec2.e.read_reg3.data - data3 = sim.gpr(reg3_sel).value - else: - data3 = 0 - reg1_ok = yield dec2.e.read_reg1.ok - if reg1_ok: - reg1_sel = yield dec2.e.read_reg1.data - data1 = sim.gpr(reg1_sel).value - else: - data1 = 0 - reg2_ok = yield dec2.e.read_reg2.ok - imm_ok = yield dec2.e.imm_data.ok - if reg2_ok: - reg2_sel = yield dec2.e.read_reg2.data - data2 = sim.gpr(reg2_sel).value - elif imm_ok: - data2 = yield dec2.e.imm_data.imm - else: - data2 = 0 - - yield alu.p.data_i.ra.eq(data1) - yield alu.p.data_i.rb.eq(data2) - yield alu.p.data_i.rs.eq(data3) - - -def set_extra_alu_inputs(alu, dec2, sim): - carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 - carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0 - yield alu.p.data_i.xer_ca[0].eq(carry) - yield alu.p.data_i.xer_ca[1].eq(carry32) - so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 - yield alu.p.data_i.xer_so.eq(so) - + inp = yield from get_cu_inputs(dec2, sim) + yield from ALUHelpers.set_int_ra(alu, dec2, inp) + yield from ALUHelpers.set_int_rb(alu, dec2, inp) + + yield from ALUHelpers.set_xer_ca(alu, dec2, inp) + yield from ALUHelpers.set_xer_so(alu, dec2, inp) + # This test bench is a bit different than is usual. Initially when I # was writing it, I had all of the tests call a function to create a @@ -85,92 +65,26 @@ def set_extra_alu_inputs(alu, dec2, sim): # massively. Before, it took around 1 minute on my computer, now it # takes around 3 seconds -test_data = [] - class MulTestCase(FHDLTestCase): + test_data = [] + def __init__(self, name): super().__init__(name) self.test_name = name - def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): - tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) - test_data.append(tc) + def run_tst_program(self, prog, initial_regs=None, initial_sprs=None): + tc = TestCase(prog, self.test_name, initial_regs, initial_sprs) + self.test_data.append(tc) - def test_shift(self): - insns = ["slw", "sld", "srw", "srd", "sraw", "srad"] - for i in range(20): + def test_rand_mullw(self): + insns = ["mullw", "mullw.", "mullwo", "mullwo."] + for i in range(40): choice = random.choice(insns) lst = [f"{choice} 3, 1, 2"] initial_regs = [0] * 32 initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, 63) - print(initial_regs[1], initial_regs[2]) - self.run_tst_program(Program(lst), initial_regs) - - - def test_shift_arith(self): - lst = ["sraw 3, 1, 2"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, 63) - print(initial_regs[1], initial_regs[2]) - self.run_tst_program(Program(lst), initial_regs) - - def test_shift_once(self): - lst = ["slw 3, 1, 4", - "slw 3, 1, 2"] - initial_regs = [0] * 32 - initial_regs[1] = 0x80000000 - initial_regs[2] = 0x40 - initial_regs[4] = 0x00 - self.run_tst_program(Program(lst), initial_regs) - - def test_rlwinm(self): - for i in range(10): - mb = random.randint(0,31) - me = random.randint(0,31) - sh = random.randint(0,31) - lst = [f"rlwinm 3, 1, {mb}, {me}, {sh}"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_rlwimi(self): - lst = ["rlwimi 3, 1, 5, 20, 6"] - initial_regs = [0] * 32 - initial_regs[1] = 0xdeadbeef - initial_regs[3] = 0x12345678 - self.run_tst_program(Program(lst), initial_regs) - - def test_rlwnm(self): - lst = ["rlwnm 3, 1, 2, 20, 6"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, 63) - self.run_tst_program(Program(lst), initial_regs) - - def test_rldicl(self): - lst = ["rldicl 3, 1, 5, 20"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_rldicr(self): - lst = ["rldicr 3, 1, 5, 20"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_rlc(self): - insns = ["rldic", "rldicl", "rldicr"] - for i in range(20): - choice = random.choice(insns) - sh = random.randint(0, 63) - m = random.randint(0, 63) - lst = [f"{choice} 3, 1, {sh}, {m}"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[2] = random.randint(0, (1<<64)-1) self.run_tst_program(Program(lst), initial_regs) def test_ilang(self): @@ -210,61 +124,93 @@ class TestRunner(FHDLTestCase): print(test.name) program = test.program self.subTest(test.name) - simulator = ISA(pdecode2, test.regs, test.sprs, 0) + sim = ISA(pdecode2, test.regs, test.sprs, test.cr, + test.mem, test.msr) gen = program.generate_instructions() instructions = list(zip(gen, program.assembly.splitlines())) - index = simulator.pc.CIA.value//4 + index = sim.pc.CIA.value//4 while index < len(instructions): ins, code = instructions[index] - print("0x{:X}".format(ins & 0xffffffff)) + print("instruction: 0x{:X}".format(ins & 0xffffffff)) print(code) + if 'XER' in sim.spr: + so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 + ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0 + ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0 + print ("before: so/ov/32", so, ov, ov32) # ask the decoder to decode this binary data (endian'd) yield pdecode2.dec.bigendian.eq(0) # little / big? yield instruction.eq(ins) # raw binary instr. yield Settle() - fn_unit = yield pdecode2.e.fn_unit - self.assertEqual(fn_unit, Function.SHIFT_ROT.value) - yield from set_alu_inputs(alu, pdecode2, simulator) - yield from set_extra_alu_inputs(alu, pdecode2, simulator) - yield + fn_unit = yield pdecode2.e.do.fn_unit + self.assertEqual(fn_unit, Function.MUL.value) + yield from set_alu_inputs(alu, pdecode2, sim) + yield opname = code.split(' ')[0] - yield from simulator.call(opname) - index = simulator.pc.CIA.value//4 + yield from sim.call(opname) + index = sim.pc.CIA.value//4 vld = yield alu.n.valid_o while not vld: yield vld = yield alu.n.valid_o yield - alu_out = yield alu.n.data_o.o - out_reg_valid = yield pdecode2.e.write_reg.ok - if out_reg_valid: - write_reg_idx = yield pdecode2.e.write_reg.data - expected = simulator.gpr(write_reg_idx).value - msg = f"expected {expected:x}, actual: {alu_out:x}" - self.assertEqual(expected, alu_out, msg) - yield from self.check_extra_alu_outputs(alu, pdecode2, - simulator) + + yield from self.check_alu_outputs(alu, pdecode2, sim, code) sim.add_sync_process(process) - with sim.write_vcd("simulator.vcd", "simulator.gtkw", + with sim.write_vcd("div_simulator.vcd", "div_simulator.gtkw", traces=[]): sim.run() - def check_extra_alu_outputs(self, alu, dec2, sim): - rc = yield dec2.e.rc.data + + def check_alu_outputs(self, alu, dec2, sim, code): + + rc = yield dec2.e.do.rc.data + cridx_ok = yield dec2.e.write_cr.ok + cridx = yield dec2.e.write_cr.data + + print ("check extra output", repr(code), cridx_ok, cridx) if rc: - cr_expected = sim.crl[0].get_range().value - cr_actual = yield alu.n.data_o.cr0 - self.assertEqual(cr_expected, cr_actual) + self.assertEqual(cridx, 0, code) + + oe = yield dec2.e.do.oe.oe + oe_ok = yield dec2.e.do.oe.ok + if not oe or not oe_ok: + # if OE not enabled, XER SO and OV must correspondingly be false + so_ok = yield alu.n.data_o.xer_so.ok + ov_ok = yield alu.n.data_o.xer_ov.ok + self.assertEqual(so_ok, False, code) + self.assertEqual(ov_ok, False, code) + + sim_o = {} + res = {} + + yield from ALUHelpers.get_cr_a(res, alu, dec2) + yield from ALUHelpers.get_xer_ov(res, alu, dec2) + yield from ALUHelpers.get_xer_ca(res, alu, dec2) + yield from ALUHelpers.get_int_o(res, alu, dec2) + yield from ALUHelpers.get_xer_so(res, alu, dec2) + + yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2) + yield from ALUHelpers.get_wr_sim_cr_a(sim_o, sim, dec2) + yield from ALUHelpers.get_sim_xer_ov(sim_o, sim, dec2) + yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2) + yield from ALUHelpers.get_sim_xer_so(sim_o, sim, dec2) + + ALUHelpers.check_int_o(self, res, sim_o, code) + ALUHelpers.check_xer_ov(self, res, sim_o, code) + ALUHelpers.check_xer_ca(self, res, sim_o, code) + ALUHelpers.check_xer_so(self, res, sim_o, code) + ALUHelpers.check_cr_a(self, res, sim_o, "CR%d %s" % (cridx, code)) if __name__ == "__main__": unittest.main(exit=False) suite = unittest.TestSuite() - suite.addTest(TestRunner(test_data)) + suite.addTest(TestRunner(MulTestCase.test_data)) runner = unittest.TextTestRunner() runner.run(suite)