From 4d4ba18816f412bbac6889533fadf5ac5dff4ef5 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Wed, 13 May 2020 22:35:16 +0100 Subject: [PATCH] split out Logical operations into separate pipeline --- src/soc/logical/__init__.py | 0 src/soc/logical/formal/proof_input_stage.py | 85 ++++++++ src/soc/logical/formal/proof_main_stage.py | 92 ++++++++ src/soc/logical/input_stage.py | 63 ++++++ src/soc/logical/main_stage.py | 49 +++++ src/soc/logical/pipe_data.py | 37 ++++ src/soc/logical/pipeline.py | 25 +++ src/soc/logical/test/test_pipe_caller.py | 224 ++++++++++++++++++++ 8 files changed, 575 insertions(+) create mode 100644 src/soc/logical/__init__.py create mode 100644 src/soc/logical/formal/proof_input_stage.py create mode 100644 src/soc/logical/formal/proof_main_stage.py create mode 100644 src/soc/logical/input_stage.py create mode 100644 src/soc/logical/main_stage.py create mode 100644 src/soc/logical/pipe_data.py create mode 100644 src/soc/logical/pipeline.py create mode 100644 src/soc/logical/test/test_pipe_caller.py diff --git a/src/soc/logical/__init__.py b/src/soc/logical/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/soc/logical/formal/proof_input_stage.py b/src/soc/logical/formal/proof_input_stage.py new file mode 100644 index 00000000..bb62fb67 --- /dev/null +++ b/src/soc/logical/formal/proof_input_stage.py @@ -0,0 +1,85 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import Module, Signal, Elaboratable, Mux +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.alu.input_stage import ALUInputStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = ALUInputStage(pspec) + + a = Signal(64) + b = Signal(64) + comb += [dut.i.a.eq(a), + dut.i.b.eq(b), + a.eq(AnyConst(64)), + b.eq(AnyConst(64))] + + + comb += dut.i.ctx.op.eq(rec) + + + # Assert that op gets copied from the input to output + for p in rec.ports(): + name = p.name + rec_sig = p + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + with m.If(rec.invert_a): + comb += Assert(dut.o.a == ~a) + with m.Else(): + comb += Assert(dut.o.a == a) + + with m.If(rec.imm_data.imm_ok & + ~(rec.insn_type == InternalOp.OP_RLC)): + comb += Assert(dut.o.b == rec.imm_data.imm) + with m.Else(): + comb += Assert(dut.o.b == b) + + + + + return m + +class GTCombinerTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=4) + self.assertFormal(module, mode="cover", depth=4) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("input_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/logical/formal/proof_main_stage.py b/src/soc/logical/formal/proof_main_stage.py new file mode 100644 index 00000000..5ca9481d --- /dev/null +++ b/src/soc/logical/formal/proof_main_stage.py @@ -0,0 +1,92 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, + signed) +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.logical.main_stage import LogicalMainStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = LogicalMainStage(pspec) + + # convenience variables + a = dut.i.a + b = dut.i.b + carry_in = dut.i.carry_in + so_in = dut.i.so + carry_out = dut.o.carry_out + o = dut.o.o + + # setup random inputs + comb += [a.eq(AnyConst(64)), + b.eq(AnyConst(64)), + carry_in.eq(AnyConst(1)), + so_in.eq(AnyConst(1))] + + comb += dut.i.ctx.op.eq(rec) + + # Assert that op gets copied from the input to output + for rec_sig in rec.ports(): + name = rec_sig.name + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + # signed and signed/32 versions of input a + a_signed = Signal(signed(64)) + a_signed_32 = Signal(signed(32)) + comb += a_signed.eq(a) + comb += a_signed_32.eq(a[0:32]) + + # main assertion of arithmetic operations + with m.Switch(rec.insn_type): + with m.Case(InternalOp.OP_AND): + comb += Assert(dut.o.o == a & b) + with m.Case(InternalOp.OP_OR): + comb += Assert(dut.o.o == a | b) + with m.Case(InternalOp.OP_XOR): + comb += Assert(dut.o.o == a ^ b) + + return m + + +class LogicalTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=2) + self.assertFormal(module, mode="cover", depth=2) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("main_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/logical/input_stage.py b/src/soc/logical/input_stage.py new file mode 100644 index 00000000..e6ab48ea --- /dev/null +++ b/src/soc/logical/input_stage.py @@ -0,0 +1,63 @@ +# This stage is intended to adjust the input data before sending it to +# the acutal ALU. Things like handling inverting the input, carry_in +# generation for subtraction, and handling of immediates should happen +# here +from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed, + unsigned) +from nmutil.pipemodbase import PipeModBase +from soc.decoder.power_enums import InternalOp +from soc.alu.pipe_data import ALUInputData +from soc.decoder.power_enums import CryIn + + +class ALUInputStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "input") + + def ispec(self): + return ALUInputData(self.pspec) + + def ospec(self): + return ALUInputData(self.pspec) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + ##### operand A ##### + + # operand a to be as-is or inverted + a = Signal.like(self.i.a) + + with m.If(self.i.ctx.op.invert_a): + comb += a.eq(~self.i.a) + with m.Else(): + comb += a.eq(self.i.a) + + comb += self.o.a.eq(a) + + ##### operand B ##### + + # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 + # remove this, just do self.o.b.eq(self.i.b) and move the + # immediate-detection into set_alu_inputs in the unit test + # If there's an immediate, set the B operand to that + comb += self.o.b.eq(self.i.b) + + ##### carry-in ##### + + # either copy incoming carry or set to 1/0 as defined by op + with m.Switch(self.i.ctx.op.input_carry): + with m.Case(CryIn.ZERO): + comb += self.o.carry_in.eq(0) + with m.Case(CryIn.ONE): + comb += self.o.carry_in.eq(1) + with m.Case(CryIn.CA): + comb += self.o.carry_in.eq(self.i.carry_in) + + ##### sticky overflow and context (both pass-through) ##### + + comb += self.o.so.eq(self.i.so) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/logical/main_stage.py b/src/soc/logical/main_stage.py new file mode 100644 index 00000000..b88649d8 --- /dev/null +++ b/src/soc/logical/main_stage.py @@ -0,0 +1,49 @@ +# This stage is intended to do most of the work of executing the ALU +# instructions. This would be like the additions, logical operations, +# and shifting, as well as carry and overflow generation. This module +# however should not gate the carry or overflow, that's up to the +# output stage +from nmigen import (Module, Signal, Cat, Repl, Mux, Const) +from nmutil.pipemodbase import PipeModBase +from soc.logical.pipe_data import ALUInputData +from soc.alu.pipe_data import ALUOutputData +from ieee754.part.partsig import PartitionedSignal +from soc.decoder.power_enums import InternalOp + + +class LogicalMainStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "main") + + def ispec(self): + return ALUInputData(self.pspec) + + def ospec(self): + return ALUOutputData(self.pspec) # TODO: ALUIntermediateData + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + ########################## + # main switch-statement for handling arithmetic and logic operations + + with m.Switch(self.i.ctx.op.insn_type): + #### and #### + with m.Case(InternalOp.OP_AND): + comb += self.o.o.eq(self.i.a & self.i.b) + + #### or #### + with m.Case(InternalOp.OP_OR): + comb += self.o.o.eq(self.i.a | self.i.b) + + #### xor #### + with m.Case(InternalOp.OP_XOR): + comb += self.o.o.eq(self.i.a ^ self.i.b) + + ###### sticky overflow and context, both pass-through ##### + + comb += self.o.so.eq(self.i.so) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/logical/pipe_data.py b/src/soc/logical/pipe_data.py new file mode 100644 index 00000000..34d9c0ae --- /dev/null +++ b/src/soc/logical/pipe_data.py @@ -0,0 +1,37 @@ +from nmigen import Signal, Const +from ieee754.fpcommon.getop import FPPipeContext + + +class IntegerData: + + def __init__(self, pspec): + self.ctx = FPPipeContext(pspec) + self.muxid = self.ctx.muxid + + def __iter__(self): + yield from self.ctx + + def eq(self, i): + return [self.ctx.eq(i.ctx)] + + +class ALUInputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.a = Signal(64, reset_less=True) # RA + self.b = Signal(64, reset_less=True) # RB/immediate + self.so = Signal(reset_less=True) + self.carry_in = Signal(reset_less=True) + + def __iter__(self): + yield from super().__iter__() + yield self.a + yield self.b + yield self.carry_in + yield self.so + + def eq(self, i): + lst = super().eq(i) + return lst + [self.a.eq(i.a), self.b.eq(i.b), + self.carry_in.eq(i.carry_in), + self.so.eq(i.so)] diff --git a/src/soc/logical/pipeline.py b/src/soc/logical/pipeline.py new file mode 100644 index 00000000..f3c83276 --- /dev/null +++ b/src/soc/logical/pipeline.py @@ -0,0 +1,25 @@ +from nmutil.singlepipe import ControlBase +from nmutil.pipemodbase import PipeModBaseChain +from soc.alu.input_stage import ALUInputStage +from soc.logical.main_stage import LogicalMainStage +from soc.alu.output_stage import ALUOutputStage + +class LogicalStages(PipeModBaseChain): + def get_chain(self): + inp = ALUInputStage(self.pspec) + main = LogicalMainStage(self.pspec) + out = ALUOutputStage(self.pspec) + return [inp, main, out] + + +class LogicalBasePipe(ControlBase): + def __init__(self, pspec): + ControlBase.__init__(self) + self.pipe1 = LogicalStages(pspec) + self._eqs = self.connect([self.pipe1]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.pipe = self.pipe1 + m.d.comb += self._eqs + return m diff --git a/src/soc/logical/test/test_pipe_caller.py b/src/soc/logical/test/test_pipe_caller.py new file mode 100644 index 00000000..2b2b129d --- /dev/null +++ b/src/soc/logical/test/test_pipe_caller.py @@ -0,0 +1,224 @@ +from nmigen import Module, Signal +from nmigen.back.pysim import Simulator, Delay, Settle +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil +import unittest +from soc.decoder.isa.caller import ISACaller, special_sprs +from soc.decoder.power_decoder import (create_pdecode) +from soc.decoder.power_decoder2 import (PowerDecode2) +from soc.decoder.power_enums import (XER_bits, Function) +from soc.decoder.selectable_int import SelectableInt +from soc.simulator.program import Program +from soc.decoder.isa.all import ISA + + +from soc.logical.pipeline import LogicalBasePipe +from soc.alu.alu_input_record import CompALUOpSubset +from soc.alu.pipe_data import ALUPipeSpec +import random + + +class TestCase: + def __init__(self, program, regs, sprs, name): + self.program = program + self.regs = regs + self.sprs = sprs + self.name = name + +def get_rec_width(rec): + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + return recwidth + +def set_alu_inputs(alu, dec2, sim): + # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 + # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) + # and place it into data_i.b + + reg3_ok = yield dec2.e.read_reg3.ok + reg1_ok = yield dec2.e.read_reg1.ok + assert reg3_ok != reg1_ok + if reg3_ok: + data1 = yield dec2.e.read_reg3.data + data1 = sim.gpr(data1).value + elif reg1_ok: + data1 = yield dec2.e.read_reg1.data + data1 = sim.gpr(data1).value + else: + data1 = 0 + + yield alu.p.data_i.a.eq(data1) + + # If there's an immediate, set the B operand to that + reg2_ok = yield dec2.e.read_reg2.ok + imm_ok = yield dec2.e.imm_data.imm_ok + if imm_ok: + data2 = yield dec2.e.imm_data.imm + elif reg2_ok: + data2 = yield dec2.e.read_reg2.data + data2 = sim.gpr(data2).value + else: + data2 = 0 + yield alu.p.data_i.b.eq(data2) + + + +def set_extra_alu_inputs(alu, dec2, sim): + carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 + yield alu.p.data_i.carry_in.eq(carry) + so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 + yield alu.p.data_i.so.eq(so) + + +# This test bench is a bit different than is usual. Initially when I +# was writing it, I had all of the tests call a function to create a +# device under test and simulator, initialize the dut, run the +# simulation for ~2 cycles, and assert that the dut output what it +# should have. However, this was really slow, since it needed to +# create and tear down the dut and simulator for every test case. + +# Now, instead of doing that, every test case in ALUTestCase puts some +# data into the test_data list below, describing the instructions to +# be tested and the initial state. Once all the tests have been run, +# test_data gets passed to TestRunner which then sets up the DUT and +# simulator once, runs all the data through it, and asserts that the +# results match the pseudocode sim at every cycle. + +# By doing this, I've reduced the time it takes to run the test suite +# massively. Before, it took around 1 minute on my computer, now it +# takes around 3 seconds + +test_data = [] + + +class LogicalTestCase(FHDLTestCase): + def __init__(self, name): + super().__init__(name) + self.test_name = name + def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): + tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) + test_data.append(tc) + + def test_rand(self): + insns = ["and", "or", "xor"] + for i in range(40): + choice = random.choice(insns) + lst = [f"{choice} 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[2] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_rand_imm_logical(self): + insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"] + for i in range(10): + choice = random.choice(insns) + imm = random.randint(0, (1<<16)-1) + lst = [f"{choice} 3, 1, {imm}"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_ilang(self): + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + alu = LogicalBasePipe(pspec) + vl = rtlil.convert(alu, ports=[]) + with open("logical_pipeline.il", "w") as f: + f.write(vl) + + +class TestRunner(FHDLTestCase): + def __init__(self, test_data): + super().__init__("run_all") + self.test_data = test_data + + def run_all(self): + m = Module() + comb = m.d.comb + instruction = Signal(32) + + pdecode = create_pdecode() + + m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) + + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + m.submodules.alu = alu = LogicalBasePipe(pspec) + + comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) + comb += alu.p.valid_i.eq(1) + comb += alu.n.ready_i.eq(1) + comb += pdecode2.dec.raw_opcode_in.eq(instruction) + sim = Simulator(m) + + sim.add_clock(1e-6) + def process(): + for test in self.test_data: + print(test.name) + program = test.program + self.subTest(test.name) + simulator = ISA(pdecode2, test.regs, test.sprs) + gen = program.generate_instructions() + instructions = list(zip(gen, program.assembly.splitlines())) + + index = simulator.pc.CIA.value//4 + while index < len(instructions): + ins, code = instructions[index] + + print("0x{:X}".format(ins & 0xffffffff)) + print(code) + + # ask the decoder to decode this binary data (endian'd) + yield pdecode2.dec.bigendian.eq(0) # little / big? + yield instruction.eq(ins) # raw binary instr. + yield Settle() + fn_unit = yield pdecode2.e.fn_unit + self.assertEqual(fn_unit, Function.ALU.value) + yield from set_alu_inputs(alu, pdecode2, simulator) + yield from set_extra_alu_inputs(alu, pdecode2, simulator) + yield + opname = code.split(' ')[0] + yield from simulator.call(opname) + index = simulator.pc.CIA.value//4 + + vld = yield alu.n.valid_o + while not vld: + yield + vld = yield alu.n.valid_o + yield + alu_out = yield alu.n.data_o.o + out_reg_valid = yield pdecode2.e.write_reg.ok + if out_reg_valid: + write_reg_idx = yield pdecode2.e.write_reg.data + expected = simulator.gpr(write_reg_idx).value + print(f"expected {expected:x}, actual: {alu_out:x}") + self.assertEqual(expected, alu_out) + yield from self.check_extra_alu_outputs(alu, pdecode2, + simulator) + + sim.add_sync_process(process) + with sim.write_vcd("simulator.vcd", "simulator.gtkw", + traces=[]): + sim.run() + def check_extra_alu_outputs(self, alu, dec2, sim): + rc = yield dec2.e.rc.data + if rc: + cr_expected = sim.crl[0].get_range().value + cr_actual = yield alu.n.data_o.cr0 + self.assertEqual(cr_expected, cr_actual) + + +if __name__ == "__main__": + unittest.main(exit=False) + suite = unittest.TestSuite() + suite.addTest(TestRunner(test_data)) + + runner = unittest.TextTestRunner() + runner.run(suite) -- 2.30.2