From 75e0f44296f3915d49051b04b3a165c6f0efdb58 Mon Sep 17 00:00:00 2001 From: Michael Nolan Date: Fri, 15 May 2020 19:04:06 -0400 Subject: [PATCH] Begin adding CR pipeline --- src/soc/cr/main_stage.py | 48 ++++++++ src/soc/cr/pipe_data.py | 48 ++++++++ src/soc/cr/pipeline.py | 21 ++++ src/soc/cr/test/test_pipe_caller.py | 164 ++++++++++++++++++++++++++++ 4 files changed, 281 insertions(+) create mode 100644 src/soc/cr/main_stage.py create mode 100644 src/soc/cr/pipe_data.py create mode 100644 src/soc/cr/pipeline.py create mode 100644 src/soc/cr/test/test_pipe_caller.py diff --git a/src/soc/cr/main_stage.py b/src/soc/cr/main_stage.py new file mode 100644 index 00000000..4272736a --- /dev/null +++ b/src/soc/cr/main_stage.py @@ -0,0 +1,48 @@ +# This stage is intended to do most of the work of executing Logical +# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ +# however input and output stages also perform bit-negation on input(s) +# and output, as well as carry and overflow generation. +# This module however should not gate the carry or overflow, that's up +# to the output stage + +from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) +from nmutil.pipemodbase import PipeModBase +from soc.cr.pipe_data import CRInputData, CROutputData +from ieee754.part.partsig import PartitionedSignal +from soc.decoder.power_enums import InternalOp +from soc.countzero.countzero import ZeroCounter + +from soc.decoder.power_fields import DecodeFields +from soc.decoder.power_fieldsn import SignalBitRange + + +def array_of(count, bitwidth): + res = [] + for i in range(count): + res.append(Signal(bitwidth, reset_less=True)) + return res + + +class CRMainStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "main") + self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) + self.fields.create_specs() + + def ispec(self): + return CRInputData(self.pspec) + + def ospec(self): + return CROutputData(self.pspec) # TODO: ALUIntermediateData + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + op = self.i.ctx.op + + + with m.Switch(op.insn_type): + pass + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/cr/pipe_data.py b/src/soc/cr/pipe_data.py new file mode 100644 index 00000000..bb248c23 --- /dev/null +++ b/src/soc/cr/pipe_data.py @@ -0,0 +1,48 @@ +from nmigen import Signal, Const +from ieee754.fpcommon.getop import FPPipeContext + + +class IntegerData: + + def __init__(self, pspec): + self.ctx = FPPipeContext(pspec) + self.muxid = self.ctx.muxid + + def __iter__(self): + yield from self.ctx + + def eq(self, i): + return [self.ctx.eq(i.ctx)] + + +class CRInputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.a = Signal(64, reset_less=True) # RA + self.cr = Signal(64, reset_less=True) # CR in + + def __iter__(self): + yield from super().__iter__() + yield self.a + yield self.cr + + def eq(self, i): + lst = super().eq(i) + return lst + [self.a.eq(i.a), + self.cr.eq(i.cr)] + +class CROutputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.o = Signal(64, reset_less=True) # RA + self.cr = Signal(64, reset_less=True) # CR in + + def __iter__(self): + yield from super().__iter__() + yield self.o + yield self.cr + + def eq(self, i): + lst = super().eq(i) + return lst + [self.o.eq(i.o), + self.cr.eq(i.cr)] diff --git a/src/soc/cr/pipeline.py b/src/soc/cr/pipeline.py new file mode 100644 index 00000000..121cdf8d --- /dev/null +++ b/src/soc/cr/pipeline.py @@ -0,0 +1,21 @@ +from nmutil.singlepipe import ControlBase +from nmutil.pipemodbase import PipeModBaseChain +from soc.cr.main_stage import CRMainStage + +class CRStages(PipeModBaseChain): + def get_chain(self): + main = CRMainStage(self.pspec) + return [main] + + +class CRBasePipe(ControlBase): + def __init__(self, pspec): + ControlBase.__init__(self) + self.pipe1 = CRStages(pspec) + self._eqs = self.connect([self.pipe1]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.pipe = self.pipe1 + m.d.comb += self._eqs + return m diff --git a/src/soc/cr/test/test_pipe_caller.py b/src/soc/cr/test/test_pipe_caller.py new file mode 100644 index 00000000..f5a29669 --- /dev/null +++ b/src/soc/cr/test/test_pipe_caller.py @@ -0,0 +1,164 @@ +from nmigen import Module, Signal +from nmigen.back.pysim import Simulator, Delay, Settle +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil +import unittest +from soc.decoder.isa.caller import ISACaller, special_sprs +from soc.decoder.power_decoder import (create_pdecode) +from soc.decoder.power_decoder2 import (PowerDecode2) +from soc.decoder.power_enums import (XER_bits, Function) +from soc.decoder.selectable_int import SelectableInt +from soc.simulator.program import Program +from soc.decoder.isa.all import ISA + + +from soc.cr.pipeline import CRBasePipe +from soc.alu.alu_input_record import CompALUOpSubset +from soc.alu.pipe_data import ALUPipeSpec +import random + + +class TestCase: + def __init__(self, program, regs, sprs, name): + self.program = program + self.regs = regs + self.sprs = sprs + self.name = name + +def get_rec_width(rec): + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + return recwidth + + +# This test bench is a bit different than is usual. Initially when I +# was writing it, I had all of the tests call a function to create a +# device under test and simulator, initialize the dut, run the +# simulation for ~2 cycles, and assert that the dut output what it +# should have. However, this was really slow, since it needed to +# create and tear down the dut and simulator for every test case. + +# Now, instead of doing that, every test case in ALUTestCase puts some +# data into the test_data list below, describing the instructions to +# be tested and the initial state. Once all the tests have been run, +# test_data gets passed to TestRunner which then sets up the DUT and +# simulator once, runs all the data through it, and asserts that the +# results match the pseudocode sim at every cycle. + +# By doing this, I've reduced the time it takes to run the test suite +# massively. Before, it took around 1 minute on my computer, now it +# takes around 3 seconds + +test_data = [] + + +class CRTestCase(FHDLTestCase): + def __init__(self, name): + super().__init__(name) + self.test_name = name + def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): + tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) + test_data.append(tc) + + def test_crand(self): + lst = ["crandc 1, 2, 3"] + self.run_tst_program(Program(lst)) + + def test_ilang(self): + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + alu = CRBasePipe(pspec) + vl = rtlil.convert(alu, ports=[]) + with open("logical_pipeline.il", "w") as f: + f.write(vl) + + +class TestRunner(FHDLTestCase): + def __init__(self, test_data): + super().__init__("run_all") + self.test_data = test_data + + def run_all(self): + m = Module() + comb = m.d.comb + instruction = Signal(32) + + pdecode = create_pdecode() + + m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) + + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + m.submodules.alu = alu = CRBasePipe(pspec) + + comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) + comb += alu.p.valid_i.eq(1) + comb += alu.n.ready_i.eq(1) + comb += pdecode2.dec.raw_opcode_in.eq(instruction) + sim = Simulator(m) + + sim.add_clock(1e-6) + def process(): + for test in self.test_data: + print(test.name) + program = test.program + self.subTest(test.name) + simulator = ISA(pdecode2, test.regs, test.sprs, 0) + gen = program.generate_instructions() + instructions = list(zip(gen, program.assembly.splitlines())) + + index = simulator.pc.CIA.value//4 + while index < len(instructions): + ins, code = instructions[index] + + print("0x{:X}".format(ins & 0xffffffff)) + print(code) + + # ask the decoder to decode this binary data (endian'd) + yield pdecode2.dec.bigendian.eq(0) # little / big? + yield instruction.eq(ins) # raw binary instr. + yield Settle() + fn_unit = yield pdecode2.e.fn_unit + self.assertEqual(fn_unit, Function.CR.value, code) + yield + opname = code.split(' ')[0] + yield from simulator.call(opname) + index = simulator.pc.CIA.value//4 + + vld = yield alu.n.valid_o + while not vld: + yield + vld = yield alu.n.valid_o + yield + alu_out = yield alu.n.data_o.o + out_reg_valid = yield pdecode2.e.write_reg.ok + if out_reg_valid: + write_reg_idx = yield pdecode2.e.write_reg.data + expected = simulator.gpr(write_reg_idx).value + print(f"expected {expected:x}, actual: {alu_out:x}") + self.assertEqual(expected, alu_out, code) + + sim.add_sync_process(process) + with sim.write_vcd("simulator.vcd", "simulator.gtkw", + traces=[]): + sim.run() + def check_extra_alu_outputs(self, alu, dec2, sim): + rc = yield dec2.e.rc.data + if rc: + cr_expected = sim.crl[0].get_range().value + cr_actual = yield alu.n.data_o.cr0 + self.assertEqual(cr_expected, cr_actual) + + +if __name__ == "__main__": + unittest.main(exit=False) + suite = unittest.TestSuite() + suite.addTest(TestRunner(test_data)) + + runner = unittest.TextTestRunner() + runner.run(suite) -- 2.30.2