From 0ffe4f4ed1a544badb243da322ee8182e965ff05 Mon Sep 17 00:00:00 2001 From: Michael Nolan Date: Fri, 15 May 2020 13:40:08 -0400 Subject: [PATCH] Add rudimentary branch unit test bench --- src/soc/branch/main_stage.py | 88 ++--------------- src/soc/branch/pipe_data.py | 45 ++++++--- src/soc/branch/pipeline.py | 16 ++-- src/soc/branch/test/test_pipe_caller.py | 120 +----------------------- src/soc/decoder/power_enums.py | 1 + 5 files changed, 51 insertions(+), 219 deletions(-) diff --git a/src/soc/branch/main_stage.py b/src/soc/branch/main_stage.py index b50afc27..e4e522b6 100644 --- a/src/soc/branch/main_stage.py +++ b/src/soc/branch/main_stage.py @@ -7,11 +7,8 @@ from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) from nmutil.pipemodbase import PipeModBase -from soc.logical.pipe_data import ALUInputData -from soc.alu.pipe_data import ALUOutputData -from ieee754.part.partsig import PartitionedSignal +from soc.branch.pipe_data import BranchInputData, BranchOutputData from soc.decoder.power_enums import InternalOp -from soc.countzero.countzero import ZeroCounter from soc.decoder.power_fields import DecodeFields from soc.decoder.power_fieldsn import SignalBitRange @@ -24,105 +21,32 @@ def array_of(count, bitwidth): return res -class LogicalMainStage(PipeModBase): +class BranchMainStage(PipeModBase): def __init__(self, pspec): super().__init__(pspec, "main") self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) self.fields.create_specs() def ispec(self): - return ALUInputData(self.pspec) + return BranchInputData(self.pspec) def ospec(self): - return ALUOutputData(self.pspec) # TODO: ALUIntermediateData + return BranchOutputData(self.pspec) # TODO: ALUIntermediateData def elaborate(self, platform): m = Module() comb = m.d.comb - op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o + op = self.i.ctx.op ########################## # main switch for logic ops AND, OR and XOR, cmpb, parity, and popcount with m.Switch(op.insn_type): + pass - ###### AND, OR, XOR ####### - with m.Case(InternalOp.OP_AND): - comb += o.eq(a & b) - with m.Case(InternalOp.OP_OR): - comb += o.eq(a | b) - with m.Case(InternalOp.OP_XOR): - comb += o.eq(a ^ b) - - ###### cmpb ####### - with m.Case(InternalOp.OP_CMPB): - l = [] - for i in range(8): - slc = slice(i*8, (i+1)*8) - l.append(Repl(a[slc] == b[slc], 8)) - comb += o.eq(Cat(*l)) - - ###### popcount ####### - with m.Case(InternalOp.OP_POPCNT): - # starting from a, perform successive addition-reductions - # creating arrays big enough to store the sum, each time - pc = [a] - # QTY32 2-bit (to take 2x 1-bit sums) etc. - work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)] - for l, b in work: - pc.append(array_of(l, b)) - pc8 = pc[3] # array of 8 8-bit counts (popcntb) - pc32 = pc[5] # array of 2 32-bit counts (popcntw) - popcnt = pc[-1] # array of 1 64-bit count (popcntd) - # cascade-tree of adds - for idx, (l, b) in enumerate(work): - for i in range(l): - stt, end = i*2, i*2+1 - src, dst = pc[idx], pc[idx+1] - comb += dst[i].eq(Cat(src[stt], Const(0, 1)) + - Cat(src[end], Const(0, 1))) - # decode operation length - with m.If(op.data_len[2:4] == 0b00): - # popcntb - pack 8x 4-bit answers into output - for i in range(8): - comb += o[i*8:i*8+4].eq(pc8[i]) - with m.Elif(op.data_len[3] == 0): - # popcntw - pack 2x 5-bit answers into output - for i in range(2): - comb += o[i*32:i*32+5].eq(pc32[i]) - with m.Else(): - # popcntd - put 1x 6-bit answer into output - comb += o.eq(popcnt[0]) - - ###### parity ####### - with m.Case(InternalOp.OP_PRTY): - # strange instruction which XORs together the LSBs of each byte - par0 = Signal(reset_less=True) - par1 = Signal(reset_less=True) - comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor()) - comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor()) - with m.If(op.data_len[3] == 1): - comb += o.eq(par0 ^ par1) - with m.Else(): - comb += o[0].eq(par0) - comb += o[32].eq(par1) - - ###### cntlz ####### - with m.Case(InternalOp.OP_CNTZ): - x_fields = self.fields.instrs['X'] - XO = Signal(x_fields['XO'][0:-1].shape()) - m.submodules.countz = countz = ZeroCounter() - comb += countz.rs_i.eq(a) - comb += countz.is_32bit_i.eq(op.is_32bit) - comb += countz.count_right_i.eq(XO[-1]) - comb += o.eq(countz.result_o) - - ###### bpermd ####### - # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt ###### sticky overflow and context, both pass-through ##### - comb += self.o.so.eq(self.i.so) comb += self.o.ctx.eq(self.i.ctx) return m diff --git a/src/soc/branch/pipe_data.py b/src/soc/branch/pipe_data.py index 34d9c0ae..26a4aae4 100644 --- a/src/soc/branch/pipe_data.py +++ b/src/soc/branch/pipe_data.py @@ -1,5 +1,6 @@ from nmigen import Signal, Const from ieee754.fpcommon.getop import FPPipeContext +from soc.decoder.power_decoder2 import Data class IntegerData: @@ -15,23 +16,43 @@ class IntegerData: return [self.ctx.eq(i.ctx)] -class ALUInputData(IntegerData): +class BranchInputData(IntegerData): def __init__(self, pspec): super().__init__(pspec) - self.a = Signal(64, reset_less=True) # RA - self.b = Signal(64, reset_less=True) # RB/immediate - self.so = Signal(reset_less=True) - self.carry_in = Signal(reset_less=True) + # We need both lr and spr for bclr and bcctrl. Bclr can read + # from both ctr and lr, and bcctrl can write to both ctr and + # lr. + self.lr = Signal(64, reset_less=True) + self.spr = Signal(64, reset_less=True) + self.cr = Signal(32, reset_less=True) + # NIA not needed, it's already part of ctx def __iter__(self): yield from super().__iter__() - yield self.a - yield self.b - yield self.carry_in - yield self.so + yield self.lr + yield self.spr + yield self.cr def eq(self, i): lst = super().eq(i) - return lst + [self.a.eq(i.a), self.b.eq(i.b), - self.carry_in.eq(i.carry_in), - self.so.eq(i.so)] + return lst + [self.lr.eq(i.lr), self.spr.eq(i.lr), + self.cr.eq(i.cr)] + + +class BranchOutputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.lr = Signal(64, reset_less=True) + self.spr = Signal(64, reset_less=True) + self.nia_out = Data(64, name="nia_out") + + def __iter__(self): + yield from super().__iter__() + yield self.lr + yield self.spr + yield from self.nia_out + + def eq(self, i): + lst = super().eq(i) + return lst + [self.lr.eq(i.lr), self.spr.eq(i.spr), + self.nia_out.eq(i.nia_out)] diff --git a/src/soc/branch/pipeline.py b/src/soc/branch/pipeline.py index f3c83276..ac132f74 100644 --- a/src/soc/branch/pipeline.py +++ b/src/soc/branch/pipeline.py @@ -1,21 +1,17 @@ from nmutil.singlepipe import ControlBase from nmutil.pipemodbase import PipeModBaseChain -from soc.alu.input_stage import ALUInputStage -from soc.logical.main_stage import LogicalMainStage -from soc.alu.output_stage import ALUOutputStage +from soc.branch.main_stage import BranchMainStage -class LogicalStages(PipeModBaseChain): +class BranchStages(PipeModBaseChain): def get_chain(self): - inp = ALUInputStage(self.pspec) - main = LogicalMainStage(self.pspec) - out = ALUOutputStage(self.pspec) - return [inp, main, out] + main = BranchMainStage(self.pspec) + return [main] -class LogicalBasePipe(ControlBase): +class BranchBasePipe(ControlBase): def __init__(self, pspec): ControlBase.__init__(self) - self.pipe1 = LogicalStages(pspec) + self.pipe1 = BranchStages(pspec) self._eqs = self.connect([self.pipe1]) def elaborate(self, platform): diff --git a/src/soc/branch/test/test_pipe_caller.py b/src/soc/branch/test/test_pipe_caller.py index d540e7a7..dbc96b11 100644 --- a/src/soc/branch/test/test_pipe_caller.py +++ b/src/soc/branch/test/test_pipe_caller.py @@ -12,7 +12,7 @@ from soc.simulator.program import Program from soc.decoder.isa.all import ISA -from soc.logical.pipeline import LogicalBasePipe +from soc.branch.pipeline import BranchBasePipe from soc.alu.alu_input_record import CompALUOpSubset from soc.alu.pipe_data import ALUPipeSpec import random @@ -33,45 +33,6 @@ def get_rec_width(rec): recwidth += width return recwidth -def set_alu_inputs(alu, dec2, sim): - # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 - # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) - # and place it into data_i.b - - reg3_ok = yield dec2.e.read_reg3.ok - reg1_ok = yield dec2.e.read_reg1.ok - assert reg3_ok != reg1_ok - if reg3_ok: - data1 = yield dec2.e.read_reg3.data - data1 = sim.gpr(data1).value - elif reg1_ok: - data1 = yield dec2.e.read_reg1.data - data1 = sim.gpr(data1).value - else: - data1 = 0 - - yield alu.p.data_i.a.eq(data1) - - # If there's an immediate, set the B operand to that - reg2_ok = yield dec2.e.read_reg2.ok - imm_ok = yield dec2.e.imm_data.imm_ok - if imm_ok: - data2 = yield dec2.e.imm_data.imm - elif reg2_ok: - data2 = yield dec2.e.read_reg2.data - data2 = sim.gpr(data2).value - else: - data2 = 0 - yield alu.p.data_i.b.eq(data2) - - - -def set_extra_alu_inputs(alu, dec2, sim): - carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 - yield alu.p.data_i.carry_in.eq(carry) - so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 - yield alu.p.data_i.so.eq(so) - # This test bench is a bit different than is usual. Initially when I # was writing it, I had all of the tests call a function to create a @@ -102,71 +63,16 @@ class LogicalTestCase(FHDLTestCase): tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) test_data.append(tc) - def test_rand(self): - insns = ["and", "or", "xor"] - for i in range(40): - choice = random.choice(insns) - lst = [f"{choice} 3, 1, 2"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_rand_imm_logical(self): - insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"] - for i in range(10): - choice = random.choice(insns) - imm = random.randint(0, (1<<16)-1) - lst = [f"{choice} 3, 1, {imm}"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - @unittest.skip("broken") - def test_cntz(self): - insns = ["cntlzd", "cnttzd"] - for i in range(10): - choice = random.choice(insns) - lst = [f"{choice} 3, 1"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_parity(self): - insns = ["prtyw", "prtyd"] - for i in range(10): - choice = random.choice(insns) - lst = [f"{choice} 3, 1"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - @unittest.skip("broken") - def test_popcnt(self): - insns = ["popcntb", "popcntw", "popcntd"] - for i in range(10): - choice = random.choice(insns) - lst = [f"{choice} 3, 1"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - def test_cmpb(self): - lst = ["cmpb 3, 1, 2"] + lst = ["b 0x1234"] initial_regs = [0] * 32 - initial_regs[1] = 0xdeadbeefcafec0de - initial_regs[2] = 0xd0adb0000afec1de self.run_tst_program(Program(lst), initial_regs) def test_ilang(self): rec = CompALUOpSubset() pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - alu = LogicalBasePipe(pspec) + alu = BranchBasePipe(pspec) vl = rtlil.convert(alu, ports=[]) with open("logical_pipeline.il", "w") as f: f.write(vl) @@ -189,7 +95,7 @@ class TestRunner(FHDLTestCase): rec = CompALUOpSubset() pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - m.submodules.alu = alu = LogicalBasePipe(pspec) + m.submodules.alu = alu = BranchBasePipe(pspec) comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) comb += alu.p.valid_i.eq(1) @@ -219,28 +125,12 @@ class TestRunner(FHDLTestCase): yield instruction.eq(ins) # raw binary instr. yield Settle() fn_unit = yield pdecode2.e.fn_unit - self.assertEqual(fn_unit, Function.LOGICAL.value, code) - yield from set_alu_inputs(alu, pdecode2, simulator) - yield from set_extra_alu_inputs(alu, pdecode2, simulator) + self.assertEqual(fn_unit, Function.BRANCH.value, code) yield opname = code.split(' ')[0] yield from simulator.call(opname) index = simulator.pc.CIA.value//4 - vld = yield alu.n.valid_o - while not vld: - yield - vld = yield alu.n.valid_o - yield - alu_out = yield alu.n.data_o.o - out_reg_valid = yield pdecode2.e.write_reg.ok - if out_reg_valid: - write_reg_idx = yield pdecode2.e.write_reg.data - expected = simulator.gpr(write_reg_idx).value - print(f"expected {expected:x}, actual: {alu_out:x}") - self.assertEqual(expected, alu_out, code) - yield from self.check_extra_alu_outputs(alu, pdecode2, - simulator) sim.add_sync_process(process) with sim.write_vcd("simulator.vcd", "simulator.gtkw", diff --git a/src/soc/decoder/power_enums.py b/src/soc/decoder/power_enums.py index 50a91855..b1e9b326 100644 --- a/src/soc/decoder/power_enums.py +++ b/src/soc/decoder/power_enums.py @@ -49,6 +49,7 @@ class Function(Enum): LDST = 2 SHIFT_ROT = 3 LOGICAL = 4 + BRANCH = 5 @unique -- 2.30.2