From 3b0999b246d64716288f6806856510d3f0c5365c Mon Sep 17 00:00:00 2001 From: colepoirier Date: Wed, 20 May 2020 12:19:40 -0700 Subject: [PATCH] Added OP_BPERMD to fu/logical pipeline, with test --- src/soc/fu/logical/main_stage.py | 13 +++++--- src/soc/fu/logical/test/test_pipe_caller.py | 33 ++++++++++++++------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/soc/fu/logical/main_stage.py b/src/soc/fu/logical/main_stage.py index 48857089..8c3703e6 100644 --- a/src/soc/fu/logical/main_stage.py +++ b/src/soc/fu/logical/main_stage.py @@ -9,6 +9,7 @@ from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) from nmutil.pipemodbase import PipeModBase from nmutil.clz import CLZ from soc.fu.logical.pipe_data import LogicalInputData +from soc.fu.logical.bpermd import Bpermd from soc.fu.alu.pipe_data import ALUOutputData from ieee754.part.partsig import PartitionedSignal from soc.decoder.power_enums import InternalOp @@ -35,7 +36,7 @@ class LogicalMainStage(PipeModBase): return LogicalInputData(self.pspec) def ospec(self): - return ALUOutputData(self.pspec) # TODO: ALUIntermediateData + return ALUOutputData(self.pspec) # TODO: ALUIntermediateData def elaborate(self, platform): m = Module() @@ -74,7 +75,7 @@ class LogicalMainStage(PipeModBase): pc.append(array_of(l, b)) pc8 = pc[3] # array of 8 8-bit counts (popcntb) pc32 = pc[5] # array of 2 32-bit counts (popcntw) - popcnt = pc[-1] # array of 1 64-bit count (popcntd) + popcnt = pc[-1] # array of 1 64-bit count (popcntd) # cascade-tree of adds for idx, (l, b) in enumerate(work): for i in range(l): @@ -100,7 +101,7 @@ class LogicalMainStage(PipeModBase): # strange instruction which XORs together the LSBs of each byte par0 = Signal(reset_less=True) par1 = Signal(reset_less=True) - comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor()) + comb += par0.eq(Cat(a[0], a[8], a[16], a[24]).xor()) comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor()) with m.If(op.data_len[3] == 1): comb += o.eq(par0 ^ par1) @@ -128,7 +129,11 @@ class LogicalMainStage(PipeModBase): comb += o.eq(Mux(op.is_32bit, clz.lz-32, clz.lz)) ###### bpermd ####### - # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt + with m.Case(InternalOp.OP_BPERM): + m.submodules.bpermd = bpermd = Bpermd(64) + comb += bpermd.rs.eq(a) + comb += bpermd.rb.eq(b) + comb += o.eq(bpermd.ra) ###### sticky overflow and context, both pass-through ##### diff --git a/src/soc/fu/logical/test/test_pipe_caller.py b/src/soc/fu/logical/test/test_pipe_caller.py index 4a22308c..b3b46408 100644 --- a/src/soc/fu/logical/test/test_pipe_caller.py +++ b/src/soc/fu/logical/test/test_pipe_caller.py @@ -25,6 +25,7 @@ class TestCase: self.sprs = sprs self.name = name + def get_rec_width(rec): recwidth = 0 # Setup random inputs for dut.op @@ -33,6 +34,7 @@ def get_rec_width(rec): recwidth += width return recwidth + def set_alu_inputs(alu, dec2, sim): # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) @@ -65,7 +67,6 @@ def set_alu_inputs(alu, dec2, sim): yield alu.p.data_i.b.eq(data2) - def set_extra_alu_inputs(alu, dec2, sim): carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0 @@ -73,7 +74,7 @@ def set_extra_alu_inputs(alu, dec2, sim): yield alu.p.data_i.xer_ca[1].eq(carry32) so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 yield alu.p.data_i.xer_so.eq(so) - + # This test bench is a bit different than is usual. Initially when I # was writing it, I had all of the tests call a function to create a @@ -100,6 +101,7 @@ class LogicalTestCase(FHDLTestCase): def __init__(self, name): super().__init__(name) self.test_name = name + def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) test_data.append(tc) @@ -110,19 +112,19 @@ class LogicalTestCase(FHDLTestCase): choice = random.choice(insns) lst = [f"{choice} 3, 1, 2"] initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, (1<<64)-1) + initial_regs[1] = random.randint(0, (1 << 64)-1) + initial_regs[2] = random.randint(0, (1 << 64)-1) self.run_tst_program(Program(lst), initial_regs) def test_rand_imm_logical(self): insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"] for i in range(10): choice = random.choice(insns) - imm = random.randint(0, (1<<16)-1) + imm = random.randint(0, (1 << 16)-1) lst = [f"{choice} 3, 1, {imm}"] print(lst) initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[1] = random.randint(0, (1 << 64)-1) self.run_tst_program(Program(lst), initial_regs) def test_cntz(self): @@ -132,7 +134,7 @@ class LogicalTestCase(FHDLTestCase): lst = [f"{choice} 3, 1"] print(lst) initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[1] = random.randint(0, (1 << 64)-1) self.run_tst_program(Program(lst), initial_regs) def test_parity(self): @@ -142,7 +144,7 @@ class LogicalTestCase(FHDLTestCase): lst = [f"{choice} 3, 1"] print(lst) initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[1] = random.randint(0, (1 << 64)-1) self.run_tst_program(Program(lst), initial_regs) def test_popcnt(self): @@ -152,7 +154,7 @@ class LogicalTestCase(FHDLTestCase): lst = [f"{choice} 3, 1"] print(lst) initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[1] = random.randint(0, (1 << 64)-1) self.run_tst_program(Program(lst), initial_regs) def test_popcnt_edge(self): @@ -170,6 +172,13 @@ class LogicalTestCase(FHDLTestCase): initial_regs[2] = 0xd0adb0000afec1de self.run_tst_program(Program(lst), initial_regs) + def test_bpermd(self): + lst = ["bpermd 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = 0xdeadbeefcafec0de + initial_regs[2] = 0xd0adb0000afec1de + self.run_tst_program(Program(lst), initial_regs) + def test_ilang(self): rec = CompALUOpSubset() @@ -206,6 +215,7 @@ class TestRunner(FHDLTestCase): sim = Simulator(m) sim.add_clock(1e-6) + def process(): for test in self.test_data: print(test.name) @@ -230,7 +240,7 @@ class TestRunner(FHDLTestCase): self.assertEqual(fn_unit, Function.LOGICAL.value, code) yield from set_alu_inputs(alu, pdecode2, simulator) yield from set_extra_alu_inputs(alu, pdecode2, simulator) - yield + yield opname = code.split(' ')[0] yield from simulator.call(opname) index = simulator.pc.CIA.value//4 @@ -252,8 +262,9 @@ class TestRunner(FHDLTestCase): sim.add_sync_process(process) with sim.write_vcd("simulator.vcd", "simulator.gtkw", - traces=[]): + traces=[]): sim.run() + def check_extra_alu_outputs(self, alu, dec2, sim, code): rc = yield dec2.e.rc.data if rc: -- 2.30.2