From: Luke Kenneth Casson Leighton Date: Mon, 18 May 2020 03:52:43 +0000 (+0100) Subject: rename pipe to fu X-Git-Tag: div_pipeline~1093 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6e30af026706829d5af7820c4bd23111aeb012e9;p=soc.git rename pipe to fu --- diff --git a/src/soc/fu/__init__.py b/src/soc/fu/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/soc/fu/alu/alu_input_record.py b/src/soc/fu/alu/alu_input_record.py new file mode 100644 index 00000000..41a40ebf --- /dev/null +++ b/src/soc/fu/alu/alu_input_record.py @@ -0,0 +1,80 @@ +from nmigen.hdl.rec import Record, Layout + +from soc.decoder.power_enums import InternalOp, Function, CryIn + + +class CompALUOpSubset(Record): + """CompALUOpSubset + + a copy of the relevant subset information from Decode2Execute1Type + needed for ALU operations. use with eq_from_execute1 (below) to + grab subsets. + """ + def __init__(self, name=None): + layout = (('insn_type', InternalOp), + ('fn_unit', Function), + ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))), + #'cr = Signal(32, reset_less=True) # NO: this is from the CR SPR + #'xerc = XerBits() # NO: this is from the XER SPR + ('lk', 1), + ('rc', Layout((("rc", 1), ("rc_ok", 1)))), + ('oe', Layout((("oe", 1), ("oe_ok", 1)))), + ('invert_a', 1), + ('invert_out', 1), + ('input_carry', CryIn), + ('output_carry', 1), + ('input_cr', 1), + ('output_cr', 1), + ('is_32bit', 1), + ('is_signed', 1), + ('data_len', 4), # TODO: should be in separate CompLDSTSubset + ('insn', 32), + ('byte_reverse', 1), + ('sign_extend', 1)) + + Record.__init__(self, Layout(layout), name=name) + + # grrr. Record does not have kwargs + self.insn_type.reset_less = True + self.fn_unit.reset_less = True + #self.cr = Signal(32, reset_less = True + #self.xerc = XerBits( + self.lk.reset_less = True + self.invert_a.reset_less = True + self.invert_out.reset_less = True + self.input_carry.reset_less = True + self.output_carry.reset_less = True + self.input_cr.reset_less = True + self.output_cr.reset_less = True + self.is_32bit.reset_less = True + self.is_signed.reset_less = True + self.data_len.reset_less = True + self.byte_reverse.reset_less = True + self.sign_extend.reset_less = True + + def eq_from_execute1(self, other): + """ use this to copy in from Decode2Execute1Type + """ + res = [] + for fname, sig in self.fields.items(): + eqfrom = other.fields[fname] + res.append(sig.eq(eqfrom)) + return res + + def ports(self): + return [self.insn_type, + #self.cr, + #self.xerc, + self.lk, + self.invert_a, + self.invert_out, + self.input_carry, + self.output_carry, + self.input_cr, + self.output_cr, + self.is_32bit, + self.is_signed, + self.data_len, + self.byte_reverse, + self.sign_extend, + ] diff --git a/src/soc/fu/alu/formal/.gitignore b/src/soc/fu/alu/formal/.gitignore new file mode 100644 index 00000000..150f68c8 --- /dev/null +++ b/src/soc/fu/alu/formal/.gitignore @@ -0,0 +1 @@ +*/* diff --git a/src/soc/fu/alu/formal/proof_input_stage.py b/src/soc/fu/alu/formal/proof_input_stage.py new file mode 100644 index 00000000..347ab7d4 --- /dev/null +++ b/src/soc/fu/alu/formal/proof_input_stage.py @@ -0,0 +1,77 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import Module, Signal, Elaboratable, Mux +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.alu.input_stage import ALUInputStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = ALUInputStage(pspec) + + a = Signal(64) + b = Signal(64) + comb += [dut.i.a.eq(a), + dut.i.b.eq(b), + a.eq(AnyConst(64)), + b.eq(AnyConst(64))] + + comb += dut.i.ctx.op.eq(rec) + + # Assert that op gets copied from the input to output + for p in rec.ports(): + name = p.name + rec_sig = p + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + with m.If(rec.invert_a): + comb += Assert(dut.o.a == ~a) + with m.Else(): + comb += Assert(dut.o.a == a) + + comb += Assert(dut.o.b == b) + + return m + + +class GTCombinerTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=4) + self.assertFormal(module, mode="cover", depth=4) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("input_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/alu/formal/proof_main_stage.py b/src/soc/fu/alu/formal/proof_main_stage.py new file mode 100644 index 00000000..f102fc2b --- /dev/null +++ b/src/soc/fu/alu/formal/proof_main_stage.py @@ -0,0 +1,88 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, + signed) +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.alu.main_stage import ALUMainStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = ALUMainStage(pspec) + + # convenience variables + a = dut.i.a + b = dut.i.b + carry_in = dut.i.carry_in + so_in = dut.i.so + carry_out = dut.o.carry_out + o = dut.o.o + + # setup random inputs + comb += [a.eq(AnyConst(64)), + b.eq(AnyConst(64)), + carry_in.eq(AnyConst(1)), + so_in.eq(AnyConst(1))] + + comb += dut.i.ctx.op.eq(rec) + + # Assert that op gets copied from the input to output + for rec_sig in rec.ports(): + name = rec_sig.name + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + # signed and signed/32 versions of input a + a_signed = Signal(signed(64)) + a_signed_32 = Signal(signed(32)) + comb += a_signed.eq(a) + comb += a_signed_32.eq(a[0:32]) + + # main assertion of arithmetic operations + with m.Switch(rec.insn_type): + with m.Case(InternalOp.OP_ADD): + comb += Assert(Cat(o, carry_out) == (a + b + carry_in)) + + return m + + +class ALUTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=2) + self.assertFormal(module, mode="cover", depth=2) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("main_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/alu/formal/proof_output_stage.py b/src/soc/fu/alu/formal/proof_output_stage.py new file mode 100644 index 00000000..288da071 --- /dev/null +++ b/src/soc/fu/alu/formal/proof_output_stage.py @@ -0,0 +1,115 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import Module, Signal, Elaboratable, Mux, Cat, signed +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.alu.output_stage import ALUOutputStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = ALUOutputStage(pspec) + + o = Signal(64) + carry_out = Signal() + carry_out32 = Signal() + ov = Signal() + ov32 = Signal() + cr0 = Signal(4) + so = Signal() + comb += [dut.i.o.eq(o), + dut.i.carry_out.eq(carry_out), + dut.i.so.eq(so), + dut.i.carry_out32.eq(carry_out32), + dut.i.cr0.eq(cr0), + dut.i.ov.eq(ov), + dut.i.ov32.eq(ov32), + o.eq(AnyConst(64)), + carry_out.eq(AnyConst(1)), + carry_out32.eq(AnyConst(1)), + ov.eq(AnyConst(1)), + ov32.eq(AnyConst(1)), + cr0.eq(AnyConst(4)), + so.eq(AnyConst(1))] + + comb += dut.i.ctx.op.eq(rec) + + with m.If(dut.i.ctx.op.invert_out): + comb += Assert(dut.o.o == ~o) + with m.Else(): + comb += Assert(dut.o.o == o) + + cr_out = Signal.like(cr0) + comb += cr_out.eq(dut.o.cr0) + + o_signed = Signal(signed(64)) + comb += o_signed.eq(dut.o.o) + # Assert only one of the comparison bits is set + comb += Assert(cr_out[3] + cr_out[2] + cr_out[1] == 1) + with m.If(o_signed == 0): + comb += Assert(cr_out[1] == 1) + with m.Elif(o_signed > 0): + # sigh. see https://bugs.libre-soc.org/show_bug.cgi?id=305#c61 + # for OP_CMP we do b-a rather than a-b (just like ADD) and + # then invert the *test condition*. + with m.If(rec.insn_type == InternalOp.OP_CMP): + comb += Assert(cr_out[3] == 1) + with m.Else(): + comb += Assert(cr_out[2] == 1) + with m.Elif(o_signed < 0): + # ditto as above + with m.If(rec.insn_type == InternalOp.OP_CMP): + comb += Assert(cr_out[2] == 1) + with m.Else(): + comb += Assert(cr_out[3] == 1) + + + # Assert that op gets copied from the input to output + for p in rec.ports(): + name = p.name + rec_sig = p + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + + return m + +class GTCombinerTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=4) + self.assertFormal(module, mode="cover", depth=4) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("output_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/alu/input_stage.py b/src/soc/fu/alu/input_stage.py new file mode 100644 index 00000000..75207324 --- /dev/null +++ b/src/soc/fu/alu/input_stage.py @@ -0,0 +1,57 @@ +# This stage is intended to adjust the input data before sending it to +# the acutal ALU. Things like handling inverting the input, carry_in +# generation for subtraction, and handling of immediates should happen +# here +from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed, + unsigned) +from nmutil.pipemodbase import PipeModBase +from soc.decoder.power_enums import InternalOp +from soc.alu.pipe_data import ALUInputData +from soc.decoder.power_enums import CryIn + + +class ALUInputStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "input") + + def ispec(self): + return ALUInputData(self.pspec) + + def ospec(self): + return ALUInputData(self.pspec) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + ctx = self.i.ctx + + ##### operand A ##### + + # operand a to be as-is or inverted + a = Signal.like(self.i.a) + + with m.If(ctx.op.invert_a): + comb += a.eq(~self.i.a) + with m.Else(): + comb += a.eq(self.i.a) + + comb += self.o.a.eq(a) + comb += self.o.b.eq(self.i.b) + + ##### carry-in ##### + + # either copy incoming carry or set to 1/0 as defined by op + with m.Switch(ctx.op.input_carry): + with m.Case(CryIn.ZERO): + comb += self.o.carry_in.eq(0) + with m.Case(CryIn.ONE): + comb += self.o.carry_in.eq(1) + with m.Case(CryIn.CA): + comb += self.o.carry_in.eq(self.i.carry_in) + + ##### sticky overflow and context (both pass-through) ##### + + comb += self.o.so.eq(self.i.so) + comb += self.o.ctx.eq(ctx) + + return m diff --git a/src/soc/fu/alu/main_stage.py b/src/soc/fu/alu/main_stage.py new file mode 100644 index 00000000..51001663 --- /dev/null +++ b/src/soc/fu/alu/main_stage.py @@ -0,0 +1,84 @@ +# This stage is intended to do most of the work of executing the Arithmetic +# instructions. This would be like the additions, compares, and sign-extension +# as well as carry and overflow generation. This module +# however should not gate the carry or overflow, that's up to the +# output stage +from nmigen import (Module, Signal, Cat, Repl, Mux, Const) +from nmutil.pipemodbase import PipeModBase +from soc.alu.pipe_data import ALUInputData, ALUOutputData +from ieee754.part.partsig import PartitionedSignal +from soc.decoder.power_enums import InternalOp + + +class ALUMainStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "main") + + def ispec(self): + return ALUInputData(self.pspec) + + def ospec(self): + return ALUOutputData(self.pspec) # TODO: ALUIntermediateData + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + carry_out, o = self.o.carry_out, self.o.o + + # check if op is 32-bit, and get sign bit from operand a + is_32bit = Signal(reset_less=True) + sign_bit = Signal(reset_less=True) + comb += is_32bit.eq(self.i.ctx.op.is_32bit) + comb += sign_bit.eq(Mux(is_32bit, self.i.a[31], self.i.a[63])) + + # little trick: do the add using only one add (not 2) + add_a = Signal(self.i.a.width + 2, reset_less=True) + add_b = Signal(self.i.a.width + 2, reset_less=True) + add_output = Signal(self.i.a.width + 2, reset_less=True) + with m.If((self.i.ctx.op.insn_type == InternalOp.OP_ADD) | + (self.i.ctx.op.insn_type == InternalOp.OP_CMP)): + # in bit 0, 1+carry_in creates carry into bit 1 and above + comb += add_a.eq(Cat(self.i.carry_in, self.i.a, Const(0, 1))) + comb += add_b.eq(Cat(Const(1, 1), self.i.b, Const(0, 1))) + comb += add_output.eq(add_a + add_b) + + ########################## + # main switch-statement for handling arithmetic operations + + with m.Switch(self.i.ctx.op.insn_type): + #### CMP, CMPL #### + with m.Case(InternalOp.OP_CMP): + # this is supposed to be inverted (b-a, not a-b) + # however we have a trick: instead of adding either 2x 64-bit + # MUXes to invert a and b, or messing with a 64-bit output, + # swap +ve and -ve test in the *output* stage using an XOR gate + comb += o.eq(add_output[1:-1]) + + #### add #### + with m.Case(InternalOp.OP_ADD): + # bit 0 is not part of the result, top bit is the carry-out + comb += o.eq(add_output[1:-1]) + comb += carry_out.eq(add_output[-1]) + + #### exts (sign-extend) #### + with m.Case(InternalOp.OP_EXTS): + with m.If(self.i.ctx.op.data_len == 1): + comb += o.eq(Cat(self.i.a[0:8], Repl(self.i.a[7], 64-8))) + with m.If(self.i.ctx.op.data_len == 2): + comb += o.eq(Cat(self.i.a[0:16], Repl(self.i.a[15], 64-16))) + with m.If(self.i.ctx.op.data_len == 4): + comb += o.eq(Cat(self.i.a[0:32], Repl(self.i.a[31], 64-32))) + with m.Case(InternalOp.OP_CMPEQB): + eqs = Signal(8, reset_less=True) + src1 = Signal(8, reset_less=True) + comb += src1.eq(self.i.a[0:8]) + for i in range(8): + comb += eqs[i].eq(src1 == self.i.b[8*i:8*(i+1)]) + comb += self.o.cr0.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1))) + + ###### sticky overflow and context, both pass-through ##### + + comb += self.o.so.eq(self.i.so) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/alu/output_stage.py b/src/soc/fu/alu/output_stage.py new file mode 100644 index 00000000..12537957 --- /dev/null +++ b/src/soc/fu/alu/output_stage.py @@ -0,0 +1,61 @@ +# This stage is intended to handle the gating of carry and overflow +# out, summary overflow generation, and updating the condition +# register +from nmigen import (Module, Signal, Cat, Repl) +from nmutil.pipemodbase import PipeModBase +from soc.alu.pipe_data import ALUInputData, ALUOutputData +from ieee754.part.partsig import PartitionedSignal +from soc.decoder.power_enums import InternalOp + + +class ALUOutputStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "output") + + def ispec(self): + return ALUOutputData(self.pspec) # TODO: ALUIntermediateData + + def ospec(self): + return ALUOutputData(self.pspec) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + # op requests inversion of the output + o = Signal.like(self.i.o) + with m.If(self.i.ctx.op.invert_out): + comb += o.eq(~self.i.o) + with m.Else(): + comb += o.eq(self.i.o) + + # create condition register cr0 and sticky-overflow + is_zero = Signal(reset_less=True) + is_positive = Signal(reset_less=True) + is_negative = Signal(reset_less=True) + msb_test = Signal(reset_less=True) # set equal to MSB, invert if OP=CMP + is_cmp = Signal(reset_less=True) # true if OP=CMP + so = Signal(reset_less=True) + + # TODO: if o[63] is XORed with "operand == OP_CMP" + # that can be used as a test + # see https://bugs.libre-soc.org/show_bug.cgi?id=305#c60 + + comb += is_cmp.eq(self.i.ctx.op.insn_type == InternalOp.OP_CMP) + comb += msb_test.eq(o[-1] ^ is_cmp) + comb += is_zero.eq(o == 0) + comb += is_positive.eq(~is_zero & ~msb_test) + comb += is_negative.eq(~is_zero & msb_test) + comb += so.eq(self.i.so | self.i.ov) + + comb += self.o.o.eq(o) + with m.If(self.i.ctx.op.insn_type != InternalOp.OP_CMPEQB): + comb += self.o.cr0.eq(Cat(so, is_zero, is_positive, is_negative)) + with m.Else(): + comb += self.o.cr0.eq(self.i.cr0) + + comb += self.o.so.eq(so) + + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/alu/pipe_data.py b/src/soc/fu/alu/pipe_data.py new file mode 100644 index 00000000..c386397a --- /dev/null +++ b/src/soc/fu/alu/pipe_data.py @@ -0,0 +1,90 @@ +from nmigen import Signal, Const +from nmutil.dynamicpipe import SimpleHandshakeRedir +from soc.alu.alu_input_record import CompALUOpSubset +from ieee754.fpcommon.getop import FPPipeContext + + +class IntegerData: + + def __init__(self, pspec): + self.ctx = FPPipeContext(pspec) + self.muxid = self.ctx.muxid + + def __iter__(self): + yield from self.ctx + + def eq(self, i): + return [self.ctx.eq(i.ctx)] + + def ports(self): + return self.ctx.ports() + + +class ALUInputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.a = Signal(64, reset_less=True) # RA + self.b = Signal(64, reset_less=True) # RB/immediate + self.so = Signal(reset_less=True) + self.carry_in = Signal(reset_less=True) + + def __iter__(self): + yield from super().__iter__() + yield self.a + yield self.b + yield self.carry_in + yield self.so + + def eq(self, i): + lst = super().eq(i) + return lst + [self.a.eq(i.a), self.b.eq(i.b), + self.carry_in.eq(i.carry_in), + self.so.eq(i.so)] + +# TODO: ALUIntermediateData which does not have +# cr0, ov, ov32 in it (because they are generated as outputs by +# the final output stage, not by the intermediate stage) +# https://bugs.libre-soc.org/show_bug.cgi?id=305#c19 + +class ALUOutputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.o = Signal(64, reset_less=True, name="stage_o") + self.carry_out = Signal(reset_less=True) + self.carry_out32 = Signal(reset_less=True) + self.cr0 = Signal(4, reset_less=True) + self.ov = Signal(reset_less=True) + self.ov32 = Signal(reset_less=True) + self.so = Signal(reset_less=True) + + def __iter__(self): + yield from super().__iter__() + yield self.o + yield self.carry_out + yield self.carry_out32 + yield self.cr0 + yield self.ov + yield self.ov32 + yield self.so + + def eq(self, i): + lst = super().eq(i) + return lst + [self.o.eq(i.o), + self.carry_out.eq(i.carry_out), + self.carry_out32.eq(i.carry_out32), + self.cr0.eq(i.cr0), self.ov.eq(i.ov), + self.ov32.eq(i.ov32), self.so.eq(i.so)] + + +class IntPipeSpec: + def __init__(self, id_wid=2, op_wid=1): + self.id_wid = id_wid + self.op_wid = op_wid + self.opkls = lambda _: CompALUOpSubset(name="op") + self.stage = None + + +class ALUPipeSpec(IntPipeSpec): + def __init__(self, id_wid, op_wid): + super().__init__(id_wid, op_wid) + self.pipekls = SimpleHandshakeRedir diff --git a/src/soc/fu/alu/pipeline.py b/src/soc/fu/alu/pipeline.py new file mode 100644 index 00000000..e8dd1991 --- /dev/null +++ b/src/soc/fu/alu/pipeline.py @@ -0,0 +1,25 @@ +from nmutil.singlepipe import ControlBase +from nmutil.pipemodbase import PipeModBaseChain +from soc.alu.input_stage import ALUInputStage +from soc.alu.main_stage import ALUMainStage +from soc.alu.output_stage import ALUOutputStage + +class ALUStages(PipeModBaseChain): + def get_chain(self): + inp = ALUInputStage(self.pspec) + main = ALUMainStage(self.pspec) + out = ALUOutputStage(self.pspec) + return [inp, main, out] + + +class ALUBasePipe(ControlBase): + def __init__(self, pspec): + ControlBase.__init__(self) + self.pipe1 = ALUStages(pspec) + self._eqs = self.connect([self.pipe1]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.pipe = self.pipe1 + m.d.comb += self._eqs + return m diff --git a/src/soc/fu/alu/test/test_pipe_caller.py b/src/soc/fu/alu/test/test_pipe_caller.py new file mode 100644 index 00000000..f42112e1 --- /dev/null +++ b/src/soc/fu/alu/test/test_pipe_caller.py @@ -0,0 +1,270 @@ +from nmigen import Module, Signal +from nmigen.back.pysim import Simulator, Delay, Settle +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil +import unittest +from soc.decoder.isa.caller import ISACaller, special_sprs +from soc.decoder.power_decoder import (create_pdecode) +from soc.decoder.power_decoder2 import (PowerDecode2) +from soc.decoder.power_enums import (XER_bits, Function, InternalOp) +from soc.decoder.selectable_int import SelectableInt +from soc.simulator.program import Program +from soc.decoder.isa.all import ISA + + +from soc.alu.pipeline import ALUBasePipe +from soc.alu.alu_input_record import CompALUOpSubset +from soc.alu.pipe_data import ALUPipeSpec +import random + +class TestCase: + def __init__(self, program, regs, sprs, name): + self.program = program + self.regs = regs + self.sprs = sprs + self.name = name + +def get_rec_width(rec): + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + return recwidth + +def set_alu_inputs(alu, dec2, sim): + # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 + # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) + # and place it into data_i.b + + reg3_ok = yield dec2.e.read_reg3.ok + reg1_ok = yield dec2.e.read_reg1.ok + assert reg3_ok != reg1_ok + if reg3_ok: + data1 = yield dec2.e.read_reg3.data + data1 = sim.gpr(data1).value + elif reg1_ok: + data1 = yield dec2.e.read_reg1.data + data1 = sim.gpr(data1).value + else: + data1 = 0 + + yield alu.p.data_i.a.eq(data1) + + # If there's an immediate, set the B operand to that + reg2_ok = yield dec2.e.read_reg2.ok + imm_ok = yield dec2.e.imm_data.imm_ok + if imm_ok: + data2 = yield dec2.e.imm_data.imm + elif reg2_ok: + data2 = yield dec2.e.read_reg2.data + data2 = sim.gpr(data2).value + else: + data2 = 0 + yield alu.p.data_i.b.eq(data2) + + + +def set_extra_alu_inputs(alu, dec2, sim): + carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 + yield alu.p.data_i.carry_in.eq(carry) + so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 + yield alu.p.data_i.so.eq(so) + + +# This test bench is a bit different than is usual. Initially when I +# was writing it, I had all of the tests call a function to create a +# device under test and simulator, initialize the dut, run the +# simulation for ~2 cycles, and assert that the dut output what it +# should have. However, this was really slow, since it needed to +# create and tear down the dut and simulator for every test case. + +# Now, instead of doing that, every test case in ALUTestCase puts some +# data into the test_data list below, describing the instructions to +# be tested and the initial state. Once all the tests have been run, +# test_data gets passed to TestRunner which then sets up the DUT and +# simulator once, runs all the data through it, and asserts that the +# results match the pseudocode sim at every cycle. + +# By doing this, I've reduced the time it takes to run the test suite +# massively. Before, it took around 1 minute on my computer, now it +# takes around 3 seconds + +test_data = [] + + +class ALUTestCase(FHDLTestCase): + def __init__(self, name): + super().__init__(name) + self.test_name = name + def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): + tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) + test_data.append(tc) + + def test_rand(self): + insns = ["add", "add.", "subf"] + for i in range(40): + choice = random.choice(insns) + lst = [f"{choice} 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[2] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_rand_imm(self): + insns = ["addi", "addis", "subfic"] + for i in range(10): + choice = random.choice(insns) + imm = random.randint(-(1<<15), (1<<15)-1) + lst = [f"{choice} 3, 1, {imm}"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_adde(self): + lst = ["adde. 5, 6, 7"] + initial_regs = [0] * 32 + initial_regs[6] = random.randint(0, (1<<64)-1) + initial_regs[7] = random.randint(0, (1<<64)-1) + initial_sprs = {} + xer = SelectableInt(0, 64) + xer[XER_bits['CA']] = 1 + initial_sprs[special_sprs['XER']] = xer + self.run_tst_program(Program(lst), initial_regs, initial_sprs) + + def test_cmp(self): + lst = ["subf. 1, 6, 7", + "cmp cr2, 1, 6, 7"] + initial_regs = [0] * 32 + initial_regs[6] = 0x10 + initial_regs[7] = 0x05 + self.run_tst_program(Program(lst), initial_regs, {}) + + def test_extsb(self): + insns = ["extsb", "extsh", "extsw"] + for i in range(10): + choice = random.choice(insns) + lst = [f"{choice} 3, 1"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_cmpeqb(self): + lst = ["cmpeqb cr0, 1, 2"] + for i in range(20): + initial_regs = [0] * 32 + initial_regs[1] = i + initial_regs[2] = 0x01030507090b0d0f11 + self.run_tst_program(Program(lst), initial_regs, {}) + + def test_ilang(self): + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + alu = ALUBasePipe(pspec) + vl = rtlil.convert(alu, ports=alu.ports()) + with open("pipeline.il", "w") as f: + f.write(vl) + + +class TestRunner(FHDLTestCase): + def __init__(self, test_data): + super().__init__("run_all") + self.test_data = test_data + + def run_all(self): + m = Module() + comb = m.d.comb + instruction = Signal(32) + + pdecode = create_pdecode() + + m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) + + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + m.submodules.alu = alu = ALUBasePipe(pspec) + + comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) + comb += alu.p.valid_i.eq(1) + comb += alu.n.ready_i.eq(1) + comb += pdecode2.dec.raw_opcode_in.eq(instruction) + sim = Simulator(m) + + sim.add_clock(1e-6) + def process(): + for test in self.test_data: + print(test.name) + program = test.program + self.subTest(test.name) + simulator = ISA(pdecode2, test.regs, test.sprs, 0) + gen = program.generate_instructions() + instructions = list(zip(gen, program.assembly.splitlines())) + + index = simulator.pc.CIA.value//4 + while index < len(instructions): + ins, code = instructions[index] + + print("0x{:X}".format(ins & 0xffffffff)) + print(code) + + # ask the decoder to decode this binary data (endian'd) + yield pdecode2.dec.bigendian.eq(0) # little / big? + yield instruction.eq(ins) # raw binary instr. + yield Settle() + fn_unit = yield pdecode2.e.fn_unit + self.assertEqual(fn_unit, Function.ALU.value) + yield from set_alu_inputs(alu, pdecode2, simulator) + yield from set_extra_alu_inputs(alu, pdecode2, simulator) + yield + opname = code.split(' ')[0] + yield from simulator.call(opname) + index = simulator.pc.CIA.value//4 + + vld = yield alu.n.valid_o + while not vld: + yield + vld = yield alu.n.valid_o + yield + alu_out = yield alu.n.data_o.o + out_reg_valid = yield pdecode2.e.write_reg.ok + if out_reg_valid: + write_reg_idx = yield pdecode2.e.write_reg.data + expected = simulator.gpr(write_reg_idx).value + print(f"expected {expected:x}, actual: {alu_out:x}") + self.assertEqual(expected, alu_out) + yield from self.check_extra_alu_outputs(alu, pdecode2, + simulator, code) + + sim.add_sync_process(process) + with sim.write_vcd("simulator.vcd", "simulator.gtkw", + traces=[]): + sim.run() + + def check_extra_alu_outputs(self, alu, dec2, sim, code): + rc = yield dec2.e.rc.data + if rc: + cr_expected = sim.crl[0].get_range().value + cr_actual = yield alu.n.data_o.cr0 + self.assertEqual(cr_expected, cr_actual, code) + + op = yield dec2.e.insn_type + if op == InternalOp.OP_CMP.value or \ + op == InternalOp.OP_CMPEQB.value: + bf = yield dec2.dec.BF + cr_actual = yield alu.n.data_o.cr0 + cr_expected = sim.crl[bf].get_range().value + self.assertEqual(cr_expected, cr_actual, code) + + + +if __name__ == "__main__": + unittest.main(exit=False) + suite = unittest.TestSuite() + suite.addTest(TestRunner(test_data)) + + runner = unittest.TextTestRunner() + runner.run(suite) diff --git a/src/soc/fu/branch/__init__.py b/src/soc/fu/branch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/soc/fu/branch/br_input_record.py b/src/soc/fu/branch/br_input_record.py new file mode 100644 index 00000000..d4f039cd --- /dev/null +++ b/src/soc/fu/branch/br_input_record.py @@ -0,0 +1,81 @@ +from nmigen.hdl.rec import Record, Layout + +from soc.decoder.power_enums import InternalOp, Function, CryIn + + +class CompBROpSubset(Record): + """CompBROpSubset + + TODO: remove anything not needed by the Branch pipeline (determine this + after all branch operations have been written. see + https://bugs.libre-soc.org/show_bug.cgi?id=313#c3) + + a copy of the relevant subset information from Decode2Execute1Type + needed for Branch operations. use with eq_from_execute1 (below) to + grab subsets. + """ + def __init__(self, name=None): + layout = (('insn_type', InternalOp), + ('fn_unit', Function), + ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))), + #'cr = Signal(32) # NO: this is from the CR SPR + #'xerc = XerBits() # NO: this is from the XER SPR + ('lk', 1), + ('rc', Layout((("rc", 1), ("rc_ok", 1)))), + ('oe', Layout((("oe", 1), ("oe_ok", 1)))), + ('invert_a', 1), + ('invert_out', 1), + ('input_carry', CryIn), + ('output_carry', 1), + ('input_cr', 1), + ('output_cr', 1), + ('is_32bit', 1), + ('is_signed', 1), + ('insn', 32), + ('byte_reverse', 1), + ('sign_extend', 1)) + + Record.__init__(self, Layout(layout), name=name) + + # grrr. Record does not have kwargs + self.insn_type.reset_less = True + self.fn_unit.reset_less = True + #self.cr = Signal(32, reset_less = True + #self.xerc = XerBits( + self.lk.reset_less = True + self.invert_a.reset_less = True + self.invert_out.reset_less = True + self.input_carry.reset_less = True + self.output_carry.reset_less = True + self.input_cr.reset_less = True + self.output_cr.reset_less = True + self.is_32bit.reset_less = True + self.is_signed.reset_less = True + self.byte_reverse.reset_less = True + self.sign_extend.reset_less = True + + def eq_from_execute1(self, other): + """ use this to copy in from Decode2Execute1Type + """ + res = [] + for fname, sig in self.fields.items(): + eqfrom = other.fields[fname] + res.append(sig.eq(eqfrom)) + return res + + def ports(self): + return [self.insn_type, + #self.cr, + #self.xerc, + self.lk, + self.invert_a, + self.invert_out, + self.input_carry, + self.output_carry, + self.input_cr, + self.output_cr, + self.is_32bit, + self.is_signed, + self.byte_reverse, + self.sign_extend, + ] diff --git a/src/soc/fu/branch/formal/proof_input_stage.py b/src/soc/fu/branch/formal/proof_input_stage.py new file mode 100644 index 00000000..fb097c87 --- /dev/null +++ b/src/soc/fu/branch/formal/proof_input_stage.py @@ -0,0 +1,80 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import Module, Signal, Elaboratable, Mux +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.alu.input_stage import ALUInputStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.branch.br_input_record import CompBROpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompBROpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = ALUInputStage(pspec) + + a = Signal(64) + b = Signal(64) + comb += [dut.i.a.eq(a), + dut.i.b.eq(b), + a.eq(AnyConst(64)), + b.eq(AnyConst(64))] + + comb += dut.i.ctx.op.eq(rec) + + # Assert that op gets copied from the input to output + for p in rec.ports(): + name = p.name + rec_sig = p + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + with m.If(rec.invert_a): + comb += Assert(dut.o.a == ~a) + with m.Else(): + comb += Assert(dut.o.a == a) + + with m.If(rec.imm_data.imm_ok & + ~(rec.insn_type == InternalOp.OP_RLC)): + comb += Assert(dut.o.b == rec.imm_data.imm) + with m.Else(): + comb += Assert(dut.o.b == b) + + return m + +class GTCombinerTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=4) + self.assertFormal(module, mode="cover", depth=4) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("input_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/branch/formal/proof_main_stage.py b/src/soc/fu/branch/formal/proof_main_stage.py new file mode 100644 index 00000000..5ca9481d --- /dev/null +++ b/src/soc/fu/branch/formal/proof_main_stage.py @@ -0,0 +1,92 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, + signed) +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.logical.main_stage import LogicalMainStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = LogicalMainStage(pspec) + + # convenience variables + a = dut.i.a + b = dut.i.b + carry_in = dut.i.carry_in + so_in = dut.i.so + carry_out = dut.o.carry_out + o = dut.o.o + + # setup random inputs + comb += [a.eq(AnyConst(64)), + b.eq(AnyConst(64)), + carry_in.eq(AnyConst(1)), + so_in.eq(AnyConst(1))] + + comb += dut.i.ctx.op.eq(rec) + + # Assert that op gets copied from the input to output + for rec_sig in rec.ports(): + name = rec_sig.name + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + # signed and signed/32 versions of input a + a_signed = Signal(signed(64)) + a_signed_32 = Signal(signed(32)) + comb += a_signed.eq(a) + comb += a_signed_32.eq(a[0:32]) + + # main assertion of arithmetic operations + with m.Switch(rec.insn_type): + with m.Case(InternalOp.OP_AND): + comb += Assert(dut.o.o == a & b) + with m.Case(InternalOp.OP_OR): + comb += Assert(dut.o.o == a | b) + with m.Case(InternalOp.OP_XOR): + comb += Assert(dut.o.o == a ^ b) + + return m + + +class LogicalTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=2) + self.assertFormal(module, mode="cover", depth=2) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("main_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/branch/input_stage.py b/src/soc/fu/branch/input_stage.py new file mode 100644 index 00000000..e6ab48ea --- /dev/null +++ b/src/soc/fu/branch/input_stage.py @@ -0,0 +1,63 @@ +# This stage is intended to adjust the input data before sending it to +# the acutal ALU. Things like handling inverting the input, carry_in +# generation for subtraction, and handling of immediates should happen +# here +from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed, + unsigned) +from nmutil.pipemodbase import PipeModBase +from soc.decoder.power_enums import InternalOp +from soc.alu.pipe_data import ALUInputData +from soc.decoder.power_enums import CryIn + + +class ALUInputStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "input") + + def ispec(self): + return ALUInputData(self.pspec) + + def ospec(self): + return ALUInputData(self.pspec) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + ##### operand A ##### + + # operand a to be as-is or inverted + a = Signal.like(self.i.a) + + with m.If(self.i.ctx.op.invert_a): + comb += a.eq(~self.i.a) + with m.Else(): + comb += a.eq(self.i.a) + + comb += self.o.a.eq(a) + + ##### operand B ##### + + # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 + # remove this, just do self.o.b.eq(self.i.b) and move the + # immediate-detection into set_alu_inputs in the unit test + # If there's an immediate, set the B operand to that + comb += self.o.b.eq(self.i.b) + + ##### carry-in ##### + + # either copy incoming carry or set to 1/0 as defined by op + with m.Switch(self.i.ctx.op.input_carry): + with m.Case(CryIn.ZERO): + comb += self.o.carry_in.eq(0) + with m.Case(CryIn.ONE): + comb += self.o.carry_in.eq(1) + with m.Case(CryIn.CA): + comb += self.o.carry_in.eq(self.i.carry_in) + + ##### sticky overflow and context (both pass-through) ##### + + comb += self.o.so.eq(self.i.so) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/branch/main_stage.py b/src/soc/fu/branch/main_stage.py new file mode 100644 index 00000000..6f6d488a --- /dev/null +++ b/src/soc/fu/branch/main_stage.py @@ -0,0 +1,139 @@ +# This stage is intended to do most of the work of executing Logical +# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ +# however input and output stages also perform bit-negation on input(s) +# and output, as well as carry and overflow generation. +# This module however should not gate the carry or overflow, that's up +# to the output stage + +from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) +from nmutil.pipemodbase import PipeModBase +from soc.branch.pipe_data import BranchInputData, BranchOutputData +from soc.decoder.power_enums import InternalOp + +from soc.decoder.power_fields import DecodeFields +from soc.decoder.power_fieldsn import SignalBitRange + +def br_ext(bd): + return Cat(Const(0, 2), bd, Repl(bd[-1], 64-(bd.shape().width + 2))) + +""" +Notes on BO Field: + +BO Description +0000z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=0 +0001z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=0 +001at Branch if CR[BI]=0 +0100z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=1 +0101z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=1 +011at Branch if CR[BI]=1 +1a00t Decrement the CTR, then branch if decremented CTR[M:63]!=0 +1a01t Decrement the CTR, then branch if decremented CTR[M:63]=0 +1z1zz Branch always +""" + +class BranchMainStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "main") + self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) + self.fields.create_specs() + + def ispec(self): + return BranchInputData(self.pspec) + + def ospec(self): + return BranchOutputData(self.pspec) # TODO: ALUIntermediateData + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + op = self.i.ctx.op + lk = op.lk # see PowerDecode2 as to why this is done + nia_o, lr_o = self.o.nia, self.o.lr + + # obtain relevant instruction fields + i_fields = self.fields.FormI + aa = Signal(i_fields.AA[0:-1].shape()) + comb += aa.eq(i_fields.AA[0:-1]) + + br_imm_addr = Signal(64, reset_less=True) + br_addr = Signal(64, reset_less=True) + br_taken = Signal(reset_less=True) + + # Handle absolute or relative branches + with m.If(aa): + comb += br_addr.eq(br_imm_addr) + with m.Else(): + comb += br_addr.eq(br_imm_addr + self.i.cia) + + # fields for conditional branches (BO and BI are same for BC and BCREG) + # NOTE: here, BO and BI we would like be treated as CR regfile + # selectors (similar to RA, RB, RS, RT). see comment here: + # https://bugs.libre-soc.org/show_bug.cgi?id=313#c2 + b_fields = self.fields.FormB + BO = b_fields.BO[0:-1] + BI = b_fields.BI[0:-1] + + # The bit of CR selected by BI + cr_bit = Signal(reset_less=True) + comb += cr_bit.eq((self.i.cr & (1<<(31-BI))) != 0) + + # Whether the conditional branch should be taken + bc_taken = Signal(reset_less=True) + with m.If(BO[2]): + comb += bc_taken.eq((cr_bit == BO[3]) | BO[4]) + with m.Else(): + # decrement the counter and place into output + ctr = Signal(64, reset_less=True) + comb += ctr.eq(self.i.ctr - 1) + comb += self.o.ctr.data.eq(ctr) + comb += self.o.ctr.ok.eq(1) + # take either all 64 bits or only 32 of post-incremented counter + ctr_m = Signal(64, reset_less=True) + with m.If((op.is_32bit): + comb += ctr_m.eq(ctr[:32]) + with m.Else(): + comb += ctr_m.eq(ctr) + # check CTR zero/non-zero against BO[1] + ctr_zero_bo1 = Signal(reset_less=True) # BO[1] == (ctr==0) + comb += ctr_zero_bo1.eq(BO[1] ^ ctr_m.any()) + with m.If(BO[3:5] == 0b00): + comb += bc_taken.eq(ctr_zero_bo1 & ~cr_bit) + with m.Elif(BO[3:5] == 0b01): + comb += bc_taken.eq(ctr_zero_bo1 & cr_bit) + with m.Elif(BO[4] == 1): + comb += bc_taken.eq(ctr_zero_bo1) + + ### Main Switch Statement ### + with m.Switch(op.insn_type): + #### branch #### + with m.Case(InternalOp.OP_B): + LI = i_fields.LI[0:-1] + comb += br_imm_addr.eq(br_ext(LI)) + comb += br_taken.eq(1) + #### branch conditional #### + with m.Case(InternalOp.OP_BC): + BD = b_fields.BD[0:-1] + comb += br_imm_addr.eq(br_ext(BD)) + comb += br_taken.eq(bc_taken) + #### branch conditional reg #### + with m.Case(InternalOp.OP_BCREG): + comb += br_imm_addr.eq(self.i.spr1) # SPR1 is set by decode unit + comb += br_taken.eq(bc_taken) + + ###### output next instruction address ##### + + comb += nia_o.data.eq(br_addr) + comb += nia_o.ok.eq(br_taken) + + ###### link register - only activate on operations marked as "lk" ##### + + with m.If(lk): + # ctx.op.lk is the AND of the insn LK field *and* whether the + # op is to "listen" to the link field + comb += lr_o.data.eq(self.i.cia + 4) + comb += lr_o.ok.eq(1) + + ###### and context ##### + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/branch/pipe_data.py b/src/soc/fu/branch/pipe_data.py new file mode 100644 index 00000000..0ef4f000 --- /dev/null +++ b/src/soc/fu/branch/pipe_data.py @@ -0,0 +1,90 @@ +""" + Optional Register allocation listed below. mandatory input + (CompBROpSubset, CIA) not included. + + * CR is Condition Register (not an SPR) + * SPR1, SPR2 and SPR3 are all from the SPR regfile. 3 ports are needed + + insn CR SPR1 SPR2 SPR3 + ---- -- ---- ---- ---- + op_b xx xx xx xx + op_ba xx xx xx xx + op_bl xx xx xx xx + op_bla xx xx xx xx + op_bc CR, xx, CTR xx + op_bca CR, xx, CTR xx + op_bcl CR, xx, CTR xx + op_bcla CR, xx, CTR xx + op_bclr CR, LR, CTR xx + op_bclrl CR, LR, CTR xx + op_bcctr CR, xx, CTR xx + op_bcctrl CR, xx, CTR xx + op_bctar CR, TAR, CTR, xx + op_bctarl CR, TAR, CTR, xx + + op_sc xx xx xx MSR + op_scv xx LR, SRR1, MSR + op_rfscv xx LR, CTR, MSR + op_rfid xx SRR0, SRR1, MSR + op_hrfid xx HSRR0, HSRR1, MSR +""" + +from nmigen import Signal, Const +from ieee754.fpcommon.getop import FPPipeContext +from soc.decoder.power_decoder2 import Data +from soc.alu.pipe_data import IntegerData + + +class BranchInputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + # Note: for OP_BCREG, SPR1 will either be CTR, LR, or TAR + # this involves the *decode* unit selecting the register, based + # on detecting the operand being bcctr, bclr or bctar + + self.spr1 = Signal(64, reset_less=True) # see table above, SPR1 + self.spr2 = Signal(64, reset_less=True) # see table above, SPR2 + self.spr3 = Signal(64, reset_less=True) # see table above, SPR3 + self.cr = Signal(32, reset_less=True) # Condition Register(s) CR0-7 + self.cia = Signal(64, reset_less=True) # Current Instruction Address + + # convenience variables. not all of these are used at once + self.ctr = self.srr0 = self.hsrr0 = self.spr2 + self.lr = self.tar = self.srr1 = self.hsrr1 = self.spr1 + self.msr = self.spr3 + + def __iter__(self): + yield from super().__iter__() + yield self.spr1 + yield self.spr2 + yield self.spr3 + yield self.cr + yield self.cia + + def eq(self, i): + lst = super().eq(i) + return lst + [self.spr1.eq(i.spr1), self.spr2.eq(i.spr2), + self.spr3.eq(i.spr3), + self.cr.eq(i.cr), self.cia.eq(i.cia)] + + +class BranchOutputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.lr = Data(64, name="lr") + self.spr = Data(64, name="spr") + self.nia = Data(64, name="nia") + + # convenience variables. + self.ctr = self.spr + + def __iter__(self): + yield from super().__iter__() + yield from self.lr + yield from self.spr + yield from self.nia + + def eq(self, i): + lst = super().eq(i) + return lst + [self.lr.eq(i.lr), self.spr.eq(i.spr), + self.nia.eq(i.nia)] diff --git a/src/soc/fu/branch/pipeline.py b/src/soc/fu/branch/pipeline.py new file mode 100644 index 00000000..ac132f74 --- /dev/null +++ b/src/soc/fu/branch/pipeline.py @@ -0,0 +1,21 @@ +from nmutil.singlepipe import ControlBase +from nmutil.pipemodbase import PipeModBaseChain +from soc.branch.main_stage import BranchMainStage + +class BranchStages(PipeModBaseChain): + def get_chain(self): + main = BranchMainStage(self.pspec) + return [main] + + +class BranchBasePipe(ControlBase): + def __init__(self, pspec): + ControlBase.__init__(self) + self.pipe1 = BranchStages(pspec) + self._eqs = self.connect([self.pipe1]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.pipe = self.pipe1 + m.d.comb += self._eqs + return m diff --git a/src/soc/fu/branch/test/test_pipe_caller.py b/src/soc/fu/branch/test/test_pipe_caller.py new file mode 100644 index 00000000..10d2bba2 --- /dev/null +++ b/src/soc/fu/branch/test/test_pipe_caller.py @@ -0,0 +1,210 @@ +from nmigen import Module, Signal +from nmigen.back.pysim import Simulator, Delay, Settle +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil +import unittest +from soc.decoder.isa.caller import ISACaller, special_sprs +from soc.decoder.power_decoder import (create_pdecode) +from soc.decoder.power_decoder2 import (PowerDecode2) +from soc.decoder.power_enums import (XER_bits, Function) +from soc.decoder.selectable_int import SelectableInt +from soc.simulator.program import Program +from soc.decoder.isa.all import ISA + + +from soc.branch.pipeline import BranchBasePipe +from soc.branch.br_input_record import CompBROpSubset +from soc.alu.pipe_data import ALUPipeSpec +import random + + +class TestCase: + def __init__(self, program, regs, sprs, cr, name): + self.program = program + self.regs = regs + self.sprs = sprs + self.name = name + self.cr = cr + +def get_rec_width(rec): + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + return recwidth + + +# This test bench is a bit different than is usual. Initially when I +# was writing it, I had all of the tests call a function to create a +# device under test and simulator, initialize the dut, run the +# simulation for ~2 cycles, and assert that the dut output what it +# should have. However, this was really slow, since it needed to +# create and tear down the dut and simulator for every test case. + +# Now, instead of doing that, every test case in ALUTestCase puts some +# data into the test_data list below, describing the instructions to +# be tested and the initial state. Once all the tests have been run, +# test_data gets passed to TestRunner which then sets up the DUT and +# simulator once, runs all the data through it, and asserts that the +# results match the pseudocode sim at every cycle. + +# By doing this, I've reduced the time it takes to run the test suite +# massively. Before, it took around 1 minute on my computer, now it +# takes around 3 seconds + +test_data = [] + + +class BranchTestCase(FHDLTestCase): + def __init__(self, name): + super().__init__(name) + self.test_name = name + def run_tst_program(self, prog, initial_regs=[0] * 32, + initial_sprs={}, initial_cr=0): + tc = TestCase(prog, initial_regs, initial_sprs, initial_cr, + self.test_name) + test_data.append(tc) + + def test_unconditional(self): + choices = ["b", "ba", "bl", "bla"] + for i in range(20): + choice = random.choice(choices) + imm = random.randrange(-1<<23, (1<<23)-1) * 4 + lst = [f"{choice} {imm}"] + initial_regs = [0] * 32 + self.run_tst_program(Program(lst), initial_regs) + + def test_bc_cr(self): + for i in range(20): + bc = random.randrange(-1<<13, (1<<13)-1) * 4 + bo = random.choice([0b01100, 0b00100, 0b10100]) + bi = random.randrange(0, 31) + cr = random.randrange(0, (1<<32)-1) + lst = [f"bc {bo}, {bi}, {bc}"] + initial_regs = [0] * 32 + self.run_tst_program(Program(lst), initial_cr=cr) + + def test_bc_ctr(self): + for i in range(20): + bc = random.randrange(-1<<13, (1<<13)-1) * 4 + bo = random.choice([0, 2, 8, 10, 16, 18]) + bi = random.randrange(0, 31) + cr = random.randrange(0, (1<<32)-1) + ctr = random.randint(0, (1<<32)-1) + lst = [f"bc {bo}, {bi}, {bc}"] + initial_sprs={9: SelectableInt(ctr, 64)} + self.run_tst_program(Program(lst), + initial_sprs=initial_sprs, + initial_cr=cr) + + def test_ilang(self): + rec = CompBROpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + alu = BranchBasePipe(pspec) + vl = rtlil.convert(alu, ports=alu.ports()) + with open("logical_pipeline.il", "w") as f: + f.write(vl) + + +class TestRunner(FHDLTestCase): + def __init__(self, test_data): + super().__init__("run_all") + self.test_data = test_data + + def run_all(self): + m = Module() + comb = m.d.comb + instruction = Signal(32) + + pdecode = create_pdecode() + + m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) + + rec = CompBROpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + m.submodules.branch = branch = BranchBasePipe(pspec) + + comb += branch.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) + comb += branch.p.valid_i.eq(1) + comb += branch.n.ready_i.eq(1) + comb += pdecode2.dec.raw_opcode_in.eq(instruction) + sim = Simulator(m) + + sim.add_clock(1e-6) + def process(): + for test in self.test_data: + print(test.name) + program = test.program + self.subTest(test.name) + simulator = ISA(pdecode2, test.regs, test.sprs, test.cr) + initial_cia = 0x2000 + simulator.set_pc(initial_cia) + gen = program.generate_instructions() + instructions = list(zip(gen, program.assembly.splitlines())) + + index = (simulator.pc.CIA.value - initial_cia)//4 + while index < len(instructions) and index >= 0: + print(index) + ins, code = instructions[index] + + print("0x{:X}".format(ins & 0xffffffff)) + print(code) + + # ask the decoder to decode this binary data (endian'd) + yield pdecode2.dec.bigendian.eq(0) # little / big? + yield instruction.eq(ins) # raw binary instr. + yield branch.p.data_i.cia.eq(simulator.pc.CIA.value) + yield branch.p.data_i.cr.eq(simulator.cr.get_range().value) + # note, here, the op will need further decoding in order + # to set the correct SPRs on SPR1/2/3. op_bc* require + # spr2 to be set to CTR, op_bctar require spr1 to be + # set to TAR, op_bclr* require spr1 to be set to LR. + # if op_sc*, op_rf* and op_hrfid are to be added here + # then additional op-decoding is required, accordingly + yield branch.p.data_i.spr2.eq(simulator.spr['CTR'].value) + print(f"cr0: {simulator.crl[0].get_range()}") + yield Settle() + fn_unit = yield pdecode2.e.fn_unit + self.assertEqual(fn_unit, Function.BRANCH.value, code) + yield + yield + opname = code.split(' ')[0] + prev_nia = simulator.pc.NIA.value + yield from simulator.call(opname) + index = (simulator.pc.CIA.value - initial_cia)//4 + + yield from self.assert_outputs(branch, pdecode2, + simulator, prev_nia, code) + + + sim.add_sync_process(process) + with sim.write_vcd("simulator.vcd", "simulator.gtkw", + traces=[]): + sim.run() + + def assert_outputs(self, branch, dec2, sim, prev_nia, code): + branch_taken = yield branch.n.data_o.nia.ok + sim_branch_taken = prev_nia != sim.pc.CIA + self.assertEqual(branch_taken, sim_branch_taken, code) + if branch_taken: + branch_addr = yield branch.n.data_o.nia.data + self.assertEqual(branch_addr, sim.pc.CIA.value, code) + + lk = yield dec2.e.lk + branch_lk = yield branch.n.data_o.lr.ok + self.assertEqual(lk, branch_lk, code) + if lk: + branch_lr = yield branch.n.data_o.lr.data + self.assertEqual(sim.spr['LR'], branch_lr, code) + + +if __name__ == "__main__": + unittest.main(exit=False) + suite = unittest.TestSuite() + suite.addTest(TestRunner(test_data)) + + runner = unittest.TextTestRunner() + runner.run(suite) diff --git a/src/soc/fu/countzero/countzero.py b/src/soc/fu/countzero/countzero.py new file mode 100644 index 00000000..bd61f571 --- /dev/null +++ b/src/soc/fu/countzero/countzero.py @@ -0,0 +1,136 @@ +# https://github.com/antonblanchard/microwatt/blob/master/countzero.vhdl +from nmigen import Memory, Module, Signal, Cat, Elaboratable +from nmigen.hdl.rec import Record, Layout +from nmigen.cli import main + + +def or4(a, b, c, d): + return Cat(a.any(), b.any(), c.any(), d.any()) + + +class IntermediateResult(Record): + def __init__(self, name=None): + layout = (('v16', 15), + ('sel_hi', 2), + ('is_32bit', 1), + ('count_right', 1)) + Record.__init__(self, Layout(layout), name=name) + + +class ZeroCounter(Elaboratable): + def __init__(self): + self.rs_i = Signal(64, reset_less=True) + self.count_right_i = Signal(1, reset_less=True) + self.is_32bit_i = Signal(1, reset_less=True) + self.result_o = Signal(64, reset_less=True) + + def ports(self): + return [self.rs_i, self.count_right_i, self.is_32bit_i, self.result_o] + + def elaborate(self, platform): + m = Module() + + # TODO: replace this with m.submodule.pe1 = PriorityEncoder(4) + # m.submodule.pe2 = PriorityEncoder(4) + # m.submodule.pe3 = PriorityEncoder(4) + # etc. + # and where right will assign input to v and !right will assign v[::-1] + # so as to reverse the order of the input bits. + + def encoder(v, right): + """ + Return the index of the leftmost or rightmost 1 in a set of 4 bits. + Assumes v is not "0000"; if it is, return (right ? "11" : "00"). + """ + ret = Signal(2, reset_less=True) + with m.If(right): + with m.If(v[0]): + m.d.comb += ret.eq(0) + with m.Elif(v[1]): + m.d.comb += ret.eq(1) + with m.Elif(v[2]): + m.d.comb += ret.eq(2) + with m.Else(): + m.d.comb += ret.eq(3) + with m.Else(): + with m.If(v[3]): + m.d.comb += ret.eq(3) + with m.Elif(v[2]): + m.d.comb += ret.eq(2) + with m.Elif(v[1]): + m.d.comb += ret.eq(1) + with m.Else(): + m.d.comb += ret.eq(0) + return ret + + r = IntermediateResult() + r_in = IntermediateResult() + + m.d.comb += r.eq(r_in) # make the module entirely combinatorial for now + + v = IntermediateResult() + y = Signal(4, reset_less=True) + z = Signal(4, reset_less=True) + sel = Signal(6, reset_less=True) + v4 = Signal(4, reset_less=True) + + # Test 4 groups of 16 bits each. + # The top 2 groups are considered to be zero in 32-bit mode. + m.d.comb += z.eq(or4(self.rs_i[0:16], self.rs_i[16:32], + self.rs_i[32:48], self.rs_i[48:64])) + with m.If(self.is_32bit_i): + m.d.comb += v.sel_hi[1].eq(0) + with m.If(self.count_right_i): + m.d.comb += v.sel_hi[0].eq(~z[0]) + with m.Else(): + m.d.comb += v.sel_hi[0].eq(z[1]) + with m.Else(): + m.d.comb += v.sel_hi.eq(encoder(z, self.count_right_i)) + + # Select the leftmost/rightmost non-zero group of 16 bits + with m.Switch(v.sel_hi): + with m.Case(0): + m.d.comb += v.v16.eq(self.rs_i[0:16]) + with m.Case(1): + m.d.comb += v.v16.eq(self.rs_i[16:32]) + with m.Case(2): + m.d.comb += v.v16.eq(self.rs_i[32:48]) + with m.Case(3): + m.d.comb += v.v16.eq(self.rs_i[48:64]) + + # Latch this and do the rest in the next cycle, for the sake of timing + m.d.comb += v.is_32bit.eq(self.is_32bit_i) + m.d.comb += v.count_right.eq(self.count_right_i) + m.d.comb += r_in.eq(v) + m.d.comb += sel[4:6].eq(r.sel_hi) + + # Test 4 groups of 4 bits + m.d.comb += y.eq(or4(r.v16[0:4], r.v16[4:8], + r.v16[8:12], r.v16[12:16])) + m.d.comb += sel[2:4].eq(encoder(y, r.count_right)) + + # Select the leftmost/rightmost non-zero group of 4 bits + with m.Switch(sel[2:4]): + with m.Case(0): + m.d.comb += v4.eq(r.v16[0:4]) + with m.Case(1): + m.d.comb += v4.eq(r.v16[4:8]) + with m.Case(2): + m.d.comb += v4.eq(r.v16[8:12]) + with m.Case(3): + m.d.comb += v4.eq(r.v16[12:16]) + + m.d.comb += sel[0:2].eq(encoder(v4, r.count_right)) + + # sel is now the index of the leftmost/rightmost 1 bit in rs + o = self.result_o + with m.If(v4 == 0): + # operand is zero, return 32 for 32-bit, else 64 + m.d.comb += o[5:7].eq(Cat(r.is_32bit, ~r.is_32bit)) + with m.Elif(r.count_right): + # return (63 - sel), trimmed to 5 bits in 32-bit mode + m.d.comb += o.eq(Cat(~sel[0:5], ~(sel[5] | r.is_32bit))) + with m.Else(): + m.d.comb += o.eq(sel) + + return m diff --git a/src/soc/fu/countzero/test/test_countzero.py b/src/soc/fu/countzero/test/test_countzero.py new file mode 100644 index 00000000..60185196 --- /dev/null +++ b/src/soc/fu/countzero/test/test_countzero.py @@ -0,0 +1,105 @@ +# https://github.com/antonblanchard/microwatt/blob/master/countzero_tb.vhdl +from nmigen import Module, Signal +from nmigen.cli import rtlil +from nmigen.back.pysim import Simulator, Delay +from nmigen.test.utils import FHDLTestCase +import unittest +from soc.countzero.countzero import ZeroCounter + + +class ZeroCounterTestCase(FHDLTestCase): + def test_zerocounter(self): + m = Module() + comb = m.d.comb + m.submodules.dut = dut = ZeroCounter() + + sim = Simulator(m) + # sim.add_clock(1e-6) + + def process(): + print("test zero input") + yield dut.rs_i.eq(0) + yield dut.is_32bit_i.eq(0) + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 0x40 + # report "bad cntlzd 0 = " & to_hstring(result); + assert(result == 0x40) + yield dut.count_right_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + # report "bad cntlzd 0 = " & to_hstring(result); + assert(result == 0x40) + yield dut.is_32bit_i.eq(1) + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + # report "bad cntlzw 0 = " & to_hstring(result); + assert(result == 0x20) + yield dut.count_right_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + # report "bad cntlzw 0 = " & to_hstring(result); + assert(result == 0x20) + # TODO next tests + + yield dut.rs_i.eq(0b00010000) + yield dut.is_32bit_i.eq(0) + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 4, "result %d" % result + + yield dut.count_right_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 59, "result %d" % result + + yield dut.is_32bit_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 27, "result %d" % result + + yield dut.rs_i.eq(0b1100000100000000) + yield dut.is_32bit_i.eq(0) + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 14, "result %d" % result + + yield dut.count_right_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 55, "result %d" % result + + yield dut.is_32bit_i.eq(1) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 23, "result %d" % result + + yield dut.count_right_i.eq(0) + yield Delay(1e-6) + result = yield dut.result_o + assert result == 14, "result %d" % result + + + sim.add_process(process) # or sim.add_sync_process(process), see below + + # run test and write vcd + fn = "genullnau" + with sim.write_vcd(fn+".vcd", fn+".gtkw", traces=dut.ports()): + sim.run() + + # cntlzd_w + # cnttzd_w + + +if __name__ == "__main__": + + dut = ZeroCounter() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("countzero.il", "w") as f: + f.write(vl) + + unittest.main() diff --git a/src/soc/fu/cr/main_stage.py b/src/soc/fu/cr/main_stage.py new file mode 100644 index 00000000..67bd78ed --- /dev/null +++ b/src/soc/fu/cr/main_stage.py @@ -0,0 +1,124 @@ +# This stage is intended to do Condition Register instructions +# and output, as well as carry and overflow generation. +# NOTE: with the exception of mtcrf and mfcr, we really should be doing +# the field decoding which +# selects which bits of CR are to be read / written, back in the +# decoder / insn-isue, have both self.i.cr and self.o.cr +# be broken down into 4-bit-wide "registers", with their +# own "Register File" (indexed by bt, ba and bb), +# exactly how INT regs are done (by RA, RB, RS and RT) +# however we are pushed for time so do it as *one* register. + +from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) +from nmutil.pipemodbase import PipeModBase +from soc.cr.pipe_data import CRInputData, CROutputData +from soc.decoder.power_enums import InternalOp + +from soc.decoder.power_fields import DecodeFields +from soc.decoder.power_fieldsn import SignalBitRange + + +class CRMainStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "main") + self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) + self.fields.create_specs() + + def ispec(self): + return CRInputData(self.pspec) + + def ospec(self): + return CROutputData(self.pspec) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + op = self.i.ctx.op + xl_fields = self.fields.FormXL + xfx_fields = self.fields.FormXFX + # default: cr_o remains same as cr input unless modified, below + cr_o = Signal.like(self.i.cr) + comb += cr_o.eq(self.i.cr) + + ##### prepare inputs / temp ##### + + # Generate array for cr input so bits can be selected + cr_arr = Array([Signal(name=f"cr_arr_{i}") for i in range(32)]) + for i in range(32): + comb += cr_arr[i].eq(self.i.cr[31-i]) + + # Generate array for cr output so the bit to write to can be + # selected by a signal + cr_out_arr = Array([Signal(name=f"cr_out_{i}") for i in range(32)]) + for i in range(32): + comb += cr_o[31-i].eq(cr_out_arr[i]) + comb += cr_out_arr[i].eq(cr_arr[i]) + + # Generate the mask for mtcrf, mtocrf, and mfocrf + # replicate every fxm field in the insn to 4-bit, as a mask + FXM = xfx_fields.FXM[0:-1] + mask = Signal(32, reset_less=True) + comb += mask.eq(Cat(*[Repl(FXM[i], 4) for i in range(8)])) + + ################################# + ##### main switch statement ##### + + with m.Switch(op.insn_type): + ##### mcrf ##### + with m.Case(InternalOp.OP_MCRF): + # MCRF copies the 4 bits of crA to crB (for instance + # copying cr2 to cr1) + BF = xl_fields.BF[0:-1] # destination CR + BFA = xl_fields.BFA[0:-1] # source CR + + for i in range(4): + comb += cr_out_arr[BF*4 + i].eq(cr_arr[BFA*4 + i]) + + ##### crand, cror, crnor etc. ##### + with m.Case(InternalOp.OP_CROP): + # crand/cror and friends get decoded to the same opcode, but + # one of the fields inside the instruction is a 4 bit lookup + # table. This lookup table gets indexed by bits a and b from + # the CR to determine what the resulting bit should be. + + # Grab the lookup table for cr_op type instructions + lut = Array([Signal(name=f"lut{i}") for i in range(4)]) + # There's no field, just have to grab it directly from the insn + for i in range(4): + comb += lut[i].eq(self.i.ctx.op.insn[6+i]) + + # Get the bit selector fields from the instruction + BT = xl_fields.BT[0:-1] + BA = xl_fields.BA[0:-1] + BB = xl_fields.BB[0:-1] + + # Use the two input bits to look up the result in the LUT + comb += cr_out_arr[BT].eq(lut[Cat(cr_arr[BB], cr_arr[BA])]) + + ##### mtcrf ##### + with m.Case(InternalOp.OP_MTCRF): + # mtocrf and mtcrf are essentially identical + # put input (RA) - mask-selected - into output CR, leave + # rest of CR alone. + comb += cr_o.eq((self.i.a[0:32] & mask) | (self.i.cr & ~mask)) + + ##### mfcr ##### + with m.Case(InternalOp.OP_MFCR): + # Ugh. mtocrf and mtcrf have one random bit differentiating + # them. This bit is not in any particular field, so this + # extracts that bit from the instruction + move_one = Signal(reset_less=True) + comb += move_one.eq(self.i.ctx.op.insn[20]) + + # mfocrf + with m.If(move_one): + comb += self.o.o.eq(self.i.cr & mask) + # mfcrf + with m.Else(): + comb += self.o.o.eq(self.i.cr) + + # output and context + comb += self.o.cr.eq(cr_o) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/cr/pipe_data.py b/src/soc/fu/cr/pipe_data.py new file mode 100644 index 00000000..d56c8f3f --- /dev/null +++ b/src/soc/fu/cr/pipe_data.py @@ -0,0 +1,36 @@ +from nmigen import Signal, Const +from ieee754.fpcommon.getop import FPPipeContext +from soc.alu.pipe_data import IntegerData + + +class CRInputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.a = Signal(64, reset_less=True) # RA + self.cr = Signal(64, reset_less=True) # CR in + + def __iter__(self): + yield from super().__iter__() + yield self.a + yield self.cr + + def eq(self, i): + lst = super().eq(i) + return lst + [self.a.eq(i.a), + self.cr.eq(i.cr)] + +class CROutputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.o = Signal(64, reset_less=True) # RA + self.cr = Signal(64, reset_less=True) # CR in + + def __iter__(self): + yield from super().__iter__() + yield self.o + yield self.cr + + def eq(self, i): + lst = super().eq(i) + return lst + [self.o.eq(i.o), + self.cr.eq(i.cr)] diff --git a/src/soc/fu/cr/pipeline.py b/src/soc/fu/cr/pipeline.py new file mode 100644 index 00000000..121cdf8d --- /dev/null +++ b/src/soc/fu/cr/pipeline.py @@ -0,0 +1,21 @@ +from nmutil.singlepipe import ControlBase +from nmutil.pipemodbase import PipeModBaseChain +from soc.cr.main_stage import CRMainStage + +class CRStages(PipeModBaseChain): + def get_chain(self): + main = CRMainStage(self.pspec) + return [main] + + +class CRBasePipe(ControlBase): + def __init__(self, pspec): + ControlBase.__init__(self) + self.pipe1 = CRStages(pspec) + self._eqs = self.connect([self.pipe1]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.pipe = self.pipe1 + m.d.comb += self._eqs + return m diff --git a/src/soc/fu/cr/test/test_pipe_caller.py b/src/soc/fu/cr/test/test_pipe_caller.py new file mode 100644 index 00000000..fa08fb66 --- /dev/null +++ b/src/soc/fu/cr/test/test_pipe_caller.py @@ -0,0 +1,232 @@ +from nmigen import Module, Signal +from nmigen.back.pysim import Simulator, Delay, Settle +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil +import unittest +from soc.decoder.isa.caller import ISACaller, special_sprs +from soc.decoder.power_decoder import (create_pdecode) +from soc.decoder.power_decoder2 import (PowerDecode2) +from soc.decoder.power_enums import (XER_bits, Function) +from soc.decoder.selectable_int import SelectableInt +from soc.simulator.program import Program +from soc.decoder.isa.all import ISA + + +from soc.cr.pipeline import CRBasePipe +from soc.alu.alu_input_record import CompALUOpSubset +from soc.alu.pipe_data import ALUPipeSpec +import random + + +class TestCase: + def __init__(self, program, regs, sprs, cr, name): + self.program = program + self.regs = regs + self.sprs = sprs + self.name = name + self.cr = cr + +def get_rec_width(rec): + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + return recwidth + + +# This test bench is a bit different than is usual. Initially when I +# was writing it, I had all of the tests call a function to create a +# device under test and simulator, initialize the dut, run the +# simulation for ~2 cycles, and assert that the dut output what it +# should have. However, this was really slow, since it needed to +# create and tear down the dut and simulator for every test case. + +# Now, instead of doing that, every test case in ALUTestCase puts some +# data into the test_data list below, describing the instructions to +# be tested and the initial state. Once all the tests have been run, +# test_data gets passed to TestRunner which then sets up the DUT and +# simulator once, runs all the data through it, and asserts that the +# results match the pseudocode sim at every cycle. + +# By doing this, I've reduced the time it takes to run the test suite +# massively. Before, it took around 1 minute on my computer, now it +# takes around 3 seconds + +test_data = [] + + +class CRTestCase(FHDLTestCase): + def __init__(self, name): + super().__init__(name) + self.test_name = name + def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}, + initial_cr=0): + tc = TestCase(prog, initial_regs, initial_sprs, initial_cr, + self.test_name) + test_data.append(tc) + + def test_crop(self): + insns = ["crand", "cror", "crnand", "crnor", "crxor", "creqv", + "crandc", "crorc"] + for i in range(40): + choice = random.choice(insns) + ba = random.randint(0, 31) + bb = random.randint(0, 31) + bt = random.randint(0, 31) + lst = [f"{choice} {ba}, {bb}, {bt}"] + cr = random.randint(0, 7) + self.run_tst_program(Program(lst), initial_cr=cr) + + def test_mcrf(self): + lst = ["mcrf 0, 5"] + cr = 0xffff0000 + self.run_tst_program(Program(lst), initial_cr=cr) + + def test_mtcrf(self): + for i in range(20): + mask = random.randint(0, 255) + lst = [f"mtcrf {mask}, 2"] + cr = random.randint(0, (1<<32)-1) + initial_regs = [0] * 32 + initial_regs[2] = random.randint(0, (1<<32)-1) + self.run_tst_program(Program(lst), initial_regs=initial_regs, + initial_cr=cr) + def test_mtocrf(self): + for i in range(20): + mask = 1< + +from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, + signed) +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.logical.bperm import Bpermd + +import unittest + + +# So formal verification is a little different than writing a test +# case, as you're actually generating logic around your module to +# check that it behaves a certain way. So here, I'm going to create a +# module to put my formal assertions in +class Driver(Elaboratable): + def __init__(self): + # We don't need any inputs and outputs here, so I won't + # declare any + pass + + def elaborate(self, platform): + # standard stuff + m = Module() + comb = m.d.comb + + # instantiate the device under test as a submodule + m.submodules.bperm = bperm = Bpermd(64) + + # Grab the inputs and outputs of the DUT to make them more + # convenient to access + rs = bperm.rs + rb = bperm.rb + ra = bperm.ra + + # Before we prove any properties about the DUT, we need to set + # up its inputs. There's a couple ways to do this, you could + # define some inputs and outputs for the driver module and + # wire them up to the DUT, but that's kind of a pain. The + # other option is to use AnyConst/AnySeq, which tells yosys + # that those inputs can take on any value. + + # AnyConst should be used when the input should take on a + # random value, but that value should be constant throughout + # the test. + # AnySeq should be used when the input can change on every + # cycle + + # Since this is a combinatorial circuit, it really doesn't + # matter which one you choose, so I chose AnyConst. If this + # was a sequential circuit, (especially a state machine) you'd + # want to use AnySeq + comb += [rs.eq(AnyConst(64)), + rb.eq(AnyConst(64))] + + + # The pseudocode in the Power ISA manual (v3.1) is as follows: + # do i = 0 to 7 + # index <- RS[8*i:8*i+8] + # if index < 64: + # perm[i] <- RB[index] + # else: + # perm[i] <- 0 + # RA <- 56'b0 || perm[0:8] # big endian though + + # Looking at this, I can identify 3 properties that the bperm + # module should keep: + # 1. RA[8:64] should always equal 0 + # 2. If RB[i*8:i*8+8] >= 64 then RA[i] should equal 0 + # 3. If RB[i*8:i*8+8] < 64 then RA[i] should RS[index] + + # Now we need to Assert that the properties above hold: + + # Property 1: RA[8:64] should always equal 0 + comb += Assert(ra[8:] == 0) + # Notice how we're adding Assert to comb like it's a circuit? + # That's because it kind of is. If you run this proof and have + # yosys graph the ilang, you'll be able to see an equals + # comparison cell feeding into an assert cell + + # Now we need to prove property #2. I'm going to leave this to + # you Cole. I'd start by writing a for loop and extracting the + # 8 indices into signals. Then I'd write an if statement + # checking if the index is >= 64 (it's hardware, so use an + # m.If()). Finally, I'd add an assert that checks whether + # ra[i] is equal to 0 + + + + return m + + +class TestCase(FHDLTestCase): + # This bit here is actually in charge of running the formal + # proof. It has nmigen spit out the ilang, and feeds it to + # SymbiYosys to run the proof. If the proof fails, yosys will + # generate a .vcd file showing how it was able to violate your + # assertions in proof_bperm_formal/engine_0/trace.vcd. From that + # you should be able to figure out what went wrong, and either + # correct the assertion or fix the DUT + def test_formal(self): + module = Driver() + # This runs a Bounded Model Check on the driver module + # above. What that does is it starts at some initial state, + # and steps it through `depth` cycles, checking that the + # assertions hold at every cycle. Since this is a + # combinatorial module, it only needs 1 cycle to prove + # everything. + self.assertFormal(module, mode="bmc", depth=2) + self.assertFormal(module, mode="cover", depth=2) + + # As mentioned above, you can look at the graph in yosys and see + # all the assertion cells + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("bperm.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/logical/formal/proof_input_stage.py b/src/soc/fu/logical/formal/proof_input_stage.py new file mode 100644 index 00000000..bb62fb67 --- /dev/null +++ b/src/soc/fu/logical/formal/proof_input_stage.py @@ -0,0 +1,85 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import Module, Signal, Elaboratable, Mux +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.alu.input_stage import ALUInputStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = ALUInputStage(pspec) + + a = Signal(64) + b = Signal(64) + comb += [dut.i.a.eq(a), + dut.i.b.eq(b), + a.eq(AnyConst(64)), + b.eq(AnyConst(64))] + + + comb += dut.i.ctx.op.eq(rec) + + + # Assert that op gets copied from the input to output + for p in rec.ports(): + name = p.name + rec_sig = p + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + with m.If(rec.invert_a): + comb += Assert(dut.o.a == ~a) + with m.Else(): + comb += Assert(dut.o.a == a) + + with m.If(rec.imm_data.imm_ok & + ~(rec.insn_type == InternalOp.OP_RLC)): + comb += Assert(dut.o.b == rec.imm_data.imm) + with m.Else(): + comb += Assert(dut.o.b == b) + + + + + return m + +class GTCombinerTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=4) + self.assertFormal(module, mode="cover", depth=4) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("input_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/logical/formal/proof_main_stage.py b/src/soc/fu/logical/formal/proof_main_stage.py new file mode 100644 index 00000000..5ca9481d --- /dev/null +++ b/src/soc/fu/logical/formal/proof_main_stage.py @@ -0,0 +1,92 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, + signed) +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.logical.main_stage import LogicalMainStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = LogicalMainStage(pspec) + + # convenience variables + a = dut.i.a + b = dut.i.b + carry_in = dut.i.carry_in + so_in = dut.i.so + carry_out = dut.o.carry_out + o = dut.o.o + + # setup random inputs + comb += [a.eq(AnyConst(64)), + b.eq(AnyConst(64)), + carry_in.eq(AnyConst(1)), + so_in.eq(AnyConst(1))] + + comb += dut.i.ctx.op.eq(rec) + + # Assert that op gets copied from the input to output + for rec_sig in rec.ports(): + name = rec_sig.name + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + # signed and signed/32 versions of input a + a_signed = Signal(signed(64)) + a_signed_32 = Signal(signed(32)) + comb += a_signed.eq(a) + comb += a_signed_32.eq(a[0:32]) + + # main assertion of arithmetic operations + with m.Switch(rec.insn_type): + with m.Case(InternalOp.OP_AND): + comb += Assert(dut.o.o == a & b) + with m.Case(InternalOp.OP_OR): + comb += Assert(dut.o.o == a | b) + with m.Case(InternalOp.OP_XOR): + comb += Assert(dut.o.o == a ^ b) + + return m + + +class LogicalTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=2) + self.assertFormal(module, mode="cover", depth=2) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("main_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/logical/input_stage.py b/src/soc/fu/logical/input_stage.py new file mode 100644 index 00000000..e6ab48ea --- /dev/null +++ b/src/soc/fu/logical/input_stage.py @@ -0,0 +1,63 @@ +# This stage is intended to adjust the input data before sending it to +# the acutal ALU. Things like handling inverting the input, carry_in +# generation for subtraction, and handling of immediates should happen +# here +from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed, + unsigned) +from nmutil.pipemodbase import PipeModBase +from soc.decoder.power_enums import InternalOp +from soc.alu.pipe_data import ALUInputData +from soc.decoder.power_enums import CryIn + + +class ALUInputStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "input") + + def ispec(self): + return ALUInputData(self.pspec) + + def ospec(self): + return ALUInputData(self.pspec) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + ##### operand A ##### + + # operand a to be as-is or inverted + a = Signal.like(self.i.a) + + with m.If(self.i.ctx.op.invert_a): + comb += a.eq(~self.i.a) + with m.Else(): + comb += a.eq(self.i.a) + + comb += self.o.a.eq(a) + + ##### operand B ##### + + # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 + # remove this, just do self.o.b.eq(self.i.b) and move the + # immediate-detection into set_alu_inputs in the unit test + # If there's an immediate, set the B operand to that + comb += self.o.b.eq(self.i.b) + + ##### carry-in ##### + + # either copy incoming carry or set to 1/0 as defined by op + with m.Switch(self.i.ctx.op.input_carry): + with m.Case(CryIn.ZERO): + comb += self.o.carry_in.eq(0) + with m.Case(CryIn.ONE): + comb += self.o.carry_in.eq(1) + with m.Case(CryIn.CA): + comb += self.o.carry_in.eq(self.i.carry_in) + + ##### sticky overflow and context (both pass-through) ##### + + comb += self.o.so.eq(self.i.so) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/logical/main_stage.py b/src/soc/fu/logical/main_stage.py new file mode 100644 index 00000000..e740d07a --- /dev/null +++ b/src/soc/fu/logical/main_stage.py @@ -0,0 +1,127 @@ +# This stage is intended to do most of the work of executing Logical +# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ +# however input and output stages also perform bit-negation on input(s) +# and output, as well as carry and overflow generation. +# This module however should not gate the carry or overflow, that's up +# to the output stage + +from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) +from nmutil.pipemodbase import PipeModBase +from soc.logical.pipe_data import ALUInputData +from soc.alu.pipe_data import ALUOutputData +from ieee754.part.partsig import PartitionedSignal +from soc.decoder.power_enums import InternalOp +from soc.countzero.countzero import ZeroCounter + +from soc.decoder.power_fields import DecodeFields +from soc.decoder.power_fieldsn import SignalBitRange + + +def array_of(count, bitwidth): + res = [] + for i in range(count): + res.append(Signal(bitwidth, reset_less=True)) + return res + + +class LogicalMainStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "main") + self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) + self.fields.create_specs() + + def ispec(self): + return ALUInputData(self.pspec) + + def ospec(self): + return ALUOutputData(self.pspec) # TODO: ALUIntermediateData + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o + + ########################## + # main switch for logic ops AND, OR and XOR, cmpb, parity, and popcount + + with m.Switch(op.insn_type): + + ###### AND, OR, XOR ####### + with m.Case(InternalOp.OP_AND): + comb += o.eq(a & b) + with m.Case(InternalOp.OP_OR): + comb += o.eq(a | b) + with m.Case(InternalOp.OP_XOR): + comb += o.eq(a ^ b) + + ###### cmpb ####### + with m.Case(InternalOp.OP_CMPB): + l = [] + for i in range(8): + slc = slice(i*8, (i+1)*8) + l.append(Repl(a[slc] == b[slc], 8)) + comb += o.eq(Cat(*l)) + + ###### popcount ####### + with m.Case(InternalOp.OP_POPCNT): + # starting from a, perform successive addition-reductions + # creating arrays big enough to store the sum, each time + pc = [a] + # QTY32 2-bit (to take 2x 1-bit sums) etc. + work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)] + for l, b in work: + pc.append(array_of(l, b)) + pc8 = pc[3] # array of 8 8-bit counts (popcntb) + pc32 = pc[5] # array of 2 32-bit counts (popcntw) + popcnt = pc[-1] # array of 1 64-bit count (popcntd) + # cascade-tree of adds + for idx, (l, b) in enumerate(work): + for i in range(l): + stt, end = i*2, i*2+1 + src, dst = pc[idx], pc[idx+1] + comb += dst[i].eq(Cat(src[stt], Const(0, 1)) + + Cat(src[end], Const(0, 1))) + # decode operation length + with m.If(op.data_len[2:4] == 0b00): + # popcntb - pack 8x 4-bit answers into output + for i in range(8): + comb += o[i*8:i*8+4].eq(pc8[i]) + with m.Elif(op.data_len[3] == 0): + # popcntw - pack 2x 5-bit answers into output + for i in range(2): + comb += o[i*32:i*32+5].eq(pc32[i]) + with m.Else(): + # popcntd - put 1x 6-bit answer into output + comb += o.eq(popcnt[0]) + + ###### parity ####### + with m.Case(InternalOp.OP_PRTY): + # strange instruction which XORs together the LSBs of each byte + par0 = Signal(reset_less=True) + par1 = Signal(reset_less=True) + comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor()) + comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor()) + with m.If(op.data_len[3] == 1): + comb += o.eq(par0 ^ par1) + with m.Else(): + comb += o[0].eq(par0) + comb += o[32].eq(par1) + + ###### cntlz ####### + with m.Case(InternalOp.OP_CNTZ): + XO = self.fields.FormX.XO[0:-1] + m.submodules.countz = countz = ZeroCounter() + comb += countz.rs_i.eq(a) + comb += countz.is_32bit_i.eq(op.is_32bit) + comb += countz.count_right_i.eq(XO[-1]) + comb += o.eq(countz.result_o) + + ###### bpermd ####### + # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt + + ###### sticky overflow and context, both pass-through ##### + + comb += self.o.so.eq(self.i.so) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/logical/pipe_data.py b/src/soc/fu/logical/pipe_data.py new file mode 100644 index 00000000..4bf064fe --- /dev/null +++ b/src/soc/fu/logical/pipe_data.py @@ -0,0 +1,25 @@ +from nmigen import Signal, Const +from ieee754.fpcommon.getop import FPPipeContext +from soc.alu.pipe_data import IntegerData + + +class ALUInputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.a = Signal(64, reset_less=True) # RA + self.b = Signal(64, reset_less=True) # RB/immediate + self.so = Signal(reset_less=True) + self.carry_in = Signal(reset_less=True) + + def __iter__(self): + yield from super().__iter__() + yield self.a + yield self.b + yield self.carry_in + yield self.so + + def eq(self, i): + lst = super().eq(i) + return lst + [self.a.eq(i.a), self.b.eq(i.b), + self.carry_in.eq(i.carry_in), + self.so.eq(i.so)] diff --git a/src/soc/fu/logical/pipeline.py b/src/soc/fu/logical/pipeline.py new file mode 100644 index 00000000..f3c83276 --- /dev/null +++ b/src/soc/fu/logical/pipeline.py @@ -0,0 +1,25 @@ +from nmutil.singlepipe import ControlBase +from nmutil.pipemodbase import PipeModBaseChain +from soc.alu.input_stage import ALUInputStage +from soc.logical.main_stage import LogicalMainStage +from soc.alu.output_stage import ALUOutputStage + +class LogicalStages(PipeModBaseChain): + def get_chain(self): + inp = ALUInputStage(self.pspec) + main = LogicalMainStage(self.pspec) + out = ALUOutputStage(self.pspec) + return [inp, main, out] + + +class LogicalBasePipe(ControlBase): + def __init__(self, pspec): + ControlBase.__init__(self) + self.pipe1 = LogicalStages(pspec) + self._eqs = self.connect([self.pipe1]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.pipe = self.pipe1 + m.d.comb += self._eqs + return m diff --git a/src/soc/fu/logical/test/test_bperm.py b/src/soc/fu/logical/test/test_bperm.py new file mode 100644 index 00000000..7a742b0b --- /dev/null +++ b/src/soc/fu/logical/test/test_bperm.py @@ -0,0 +1 @@ +'''Empty until I write the unit test''' diff --git a/src/soc/fu/logical/test/test_pipe_caller.py b/src/soc/fu/logical/test/test_pipe_caller.py new file mode 100644 index 00000000..79c1e291 --- /dev/null +++ b/src/soc/fu/logical/test/test_pipe_caller.py @@ -0,0 +1,262 @@ +from nmigen import Module, Signal +from nmigen.back.pysim import Simulator, Delay, Settle +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil +import unittest +from soc.decoder.isa.caller import ISACaller, special_sprs +from soc.decoder.power_decoder import (create_pdecode) +from soc.decoder.power_decoder2 import (PowerDecode2) +from soc.decoder.power_enums import (XER_bits, Function) +from soc.decoder.selectable_int import SelectableInt +from soc.simulator.program import Program +from soc.decoder.isa.all import ISA + + +from soc.logical.pipeline import LogicalBasePipe +from soc.alu.alu_input_record import CompALUOpSubset +from soc.alu.pipe_data import ALUPipeSpec +import random + + +class TestCase: + def __init__(self, program, regs, sprs, name): + self.program = program + self.regs = regs + self.sprs = sprs + self.name = name + +def get_rec_width(rec): + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + return recwidth + +def set_alu_inputs(alu, dec2, sim): + # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 + # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) + # and place it into data_i.b + + reg3_ok = yield dec2.e.read_reg3.ok + reg1_ok = yield dec2.e.read_reg1.ok + assert reg3_ok != reg1_ok + if reg3_ok: + data1 = yield dec2.e.read_reg3.data + data1 = sim.gpr(data1).value + elif reg1_ok: + data1 = yield dec2.e.read_reg1.data + data1 = sim.gpr(data1).value + else: + data1 = 0 + + yield alu.p.data_i.a.eq(data1) + + # If there's an immediate, set the B operand to that + reg2_ok = yield dec2.e.read_reg2.ok + imm_ok = yield dec2.e.imm_data.imm_ok + if imm_ok: + data2 = yield dec2.e.imm_data.imm + elif reg2_ok: + data2 = yield dec2.e.read_reg2.data + data2 = sim.gpr(data2).value + else: + data2 = 0 + yield alu.p.data_i.b.eq(data2) + + + +def set_extra_alu_inputs(alu, dec2, sim): + carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 + yield alu.p.data_i.carry_in.eq(carry) + so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 + yield alu.p.data_i.so.eq(so) + + +# This test bench is a bit different than is usual. Initially when I +# was writing it, I had all of the tests call a function to create a +# device under test and simulator, initialize the dut, run the +# simulation for ~2 cycles, and assert that the dut output what it +# should have. However, this was really slow, since it needed to +# create and tear down the dut and simulator for every test case. + +# Now, instead of doing that, every test case in ALUTestCase puts some +# data into the test_data list below, describing the instructions to +# be tested and the initial state. Once all the tests have been run, +# test_data gets passed to TestRunner which then sets up the DUT and +# simulator once, runs all the data through it, and asserts that the +# results match the pseudocode sim at every cycle. + +# By doing this, I've reduced the time it takes to run the test suite +# massively. Before, it took around 1 minute on my computer, now it +# takes around 3 seconds + +test_data = [] + + +class LogicalTestCase(FHDLTestCase): + def __init__(self, name): + super().__init__(name) + self.test_name = name + def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): + tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) + test_data.append(tc) + + def test_rand(self): + insns = ["and", "or", "xor"] + for i in range(40): + choice = random.choice(insns) + lst = [f"{choice} 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[2] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_rand_imm_logical(self): + insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"] + for i in range(10): + choice = random.choice(insns) + imm = random.randint(0, (1<<16)-1) + lst = [f"{choice} 3, 1, {imm}"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + @unittest.skip("broken") + def test_cntz(self): + insns = ["cntlzd", "cnttzd"] + for i in range(10): + choice = random.choice(insns) + lst = [f"{choice} 3, 1"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_parity(self): + insns = ["prtyw", "prtyd"] + for i in range(10): + choice = random.choice(insns) + lst = [f"{choice} 3, 1"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_popcnt(self): + insns = ["popcntb", "popcntw", "popcntd"] + for i in range(10): + choice = random.choice(insns) + lst = [f"{choice} 3, 1"] + print(lst) + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_cmpb(self): + lst = ["cmpb 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = 0xdeadbeefcafec0de + initial_regs[2] = 0xd0adb0000afec1de + self.run_tst_program(Program(lst), initial_regs) + + def test_ilang(self): + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + alu = LogicalBasePipe(pspec) + vl = rtlil.convert(alu, ports=alu.ports()) + with open("logical_pipeline.il", "w") as f: + f.write(vl) + + +class TestRunner(FHDLTestCase): + def __init__(self, test_data): + super().__init__("run_all") + self.test_data = test_data + + def run_all(self): + m = Module() + comb = m.d.comb + instruction = Signal(32) + + pdecode = create_pdecode() + + m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) + + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + m.submodules.alu = alu = LogicalBasePipe(pspec) + + comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) + comb += alu.p.valid_i.eq(1) + comb += alu.n.ready_i.eq(1) + comb += pdecode2.dec.raw_opcode_in.eq(instruction) + sim = Simulator(m) + + sim.add_clock(1e-6) + def process(): + for test in self.test_data: + print(test.name) + program = test.program + self.subTest(test.name) + simulator = ISA(pdecode2, test.regs, test.sprs, 0) + gen = program.generate_instructions() + instructions = list(zip(gen, program.assembly.splitlines())) + + index = simulator.pc.CIA.value//4 + while index < len(instructions): + ins, code = instructions[index] + + print("0x{:X}".format(ins & 0xffffffff)) + print(code) + + # ask the decoder to decode this binary data (endian'd) + yield pdecode2.dec.bigendian.eq(0) # little / big? + yield instruction.eq(ins) # raw binary instr. + yield Settle() + fn_unit = yield pdecode2.e.fn_unit + self.assertEqual(fn_unit, Function.LOGICAL.value, code) + yield from set_alu_inputs(alu, pdecode2, simulator) + yield from set_extra_alu_inputs(alu, pdecode2, simulator) + yield + opname = code.split(' ')[0] + yield from simulator.call(opname) + index = simulator.pc.CIA.value//4 + + vld = yield alu.n.valid_o + while not vld: + yield + vld = yield alu.n.valid_o + yield + alu_out = yield alu.n.data_o.o + out_reg_valid = yield pdecode2.e.write_reg.ok + if out_reg_valid: + write_reg_idx = yield pdecode2.e.write_reg.data + expected = simulator.gpr(write_reg_idx).value + print(f"expected {expected:x}, actual: {alu_out:x}") + self.assertEqual(expected, alu_out, code) + yield from self.check_extra_alu_outputs(alu, pdecode2, + simulator) + + sim.add_sync_process(process) + with sim.write_vcd("simulator.vcd", "simulator.gtkw", + traces=[]): + sim.run() + def check_extra_alu_outputs(self, alu, dec2, sim): + rc = yield dec2.e.rc.data + if rc: + cr_expected = sim.crl[0].get_range().value + cr_actual = yield alu.n.data_o.cr0 + self.assertEqual(cr_expected, cr_actual) + + +if __name__ == "__main__": + unittest.main(exit=False) + suite = unittest.TestSuite() + suite.addTest(TestRunner(test_data)) + + runner = unittest.TextTestRunner() + runner.run(suite) diff --git a/src/soc/fu/shift_rot/formal/.gitignore b/src/soc/fu/shift_rot/formal/.gitignore new file mode 100644 index 00000000..150f68c8 --- /dev/null +++ b/src/soc/fu/shift_rot/formal/.gitignore @@ -0,0 +1 @@ +*/* diff --git a/src/soc/fu/shift_rot/formal/proof_main_stage.py b/src/soc/fu/shift_rot/formal/proof_main_stage.py new file mode 100644 index 00000000..50264d5c --- /dev/null +++ b/src/soc/fu/shift_rot/formal/proof_main_stage.py @@ -0,0 +1,108 @@ +# Proof of correctness for partitioned equal signal combiner +# Copyright (C) 2020 Michael Nolan + +from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, + signed) +from nmigen.asserts import Assert, AnyConst, Assume, Cover +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil + +from soc.shift_rot.main_stage import ShiftRotMainStage +from soc.alu.pipe_data import ALUPipeSpec +from soc.alu.alu_input_record import CompALUOpSubset +from soc.decoder.power_enums import InternalOp +import unittest + + +# This defines a module to drive the device under test and assert +# properties about its outputs +class Driver(Elaboratable): + def __init__(self): + # inputs and outputs + pass + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + rec = CompALUOpSubset() + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + comb += p.eq(AnyConst(width)) + + pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) + m.submodules.dut = dut = ShiftRotMainStage(pspec) + + # convenience variables + a = dut.i.rs + b = dut.i.rb + ra = dut.i.ra + carry_in = dut.i.carry_in + so_in = dut.i.so + carry_out = dut.o.carry_out + o = dut.o.o + + # setup random inputs + comb += [a.eq(AnyConst(64)), + b.eq(AnyConst(64)), + carry_in.eq(AnyConst(1)), + so_in.eq(AnyConst(1))] + + comb += dut.i.ctx.op.eq(rec) + + # Assert that op gets copied from the input to output + for rec_sig in rec.ports(): + name = rec_sig.name + dut_sig = getattr(dut.o.ctx.op, name) + comb += Assert(dut_sig == rec_sig) + + # signed and signed/32 versions of input a + a_signed = Signal(signed(64)) + a_signed_32 = Signal(signed(32)) + comb += a_signed.eq(a) + comb += a_signed_32.eq(a[0:32]) + + # main assertion of arithmetic operations + with m.Switch(rec.insn_type): + with m.Case(InternalOp.OP_SHL): + comb += Assume(ra == 0) + with m.If(rec.is_32bit): + comb += Assert(o[0:32] == ((a << b[0:6]) & 0xffffffff)) + comb += Assert(o[32:64] == 0) + with m.Else(): + comb += Assert(o == ((a << b[0:7]) & ((1 << 64)-1))) + with m.Case(InternalOp.OP_SHR): + comb += Assume(ra == 0) + with m.If(~rec.is_signed): + with m.If(rec.is_32bit): + comb += Assert(o[0:32] == (a[0:32] >> b[0:6])) + comb += Assert(o[32:64] == 0) + with m.Else(): + comb += Assert(o == (a >> b[0:7])) + with m.Else(): + with m.If(rec.is_32bit): + comb += Assert(o[0:32] == (a_signed_32 >> b[0:6])) + comb += Assert(o[32:64] == Repl(a[31], 32)) + with m.Else(): + comb += Assert(o == (a_signed >> b[0:7])) + + return m + + +class ALUTestCase(FHDLTestCase): + def test_formal(self): + module = Driver() + self.assertFormal(module, mode="bmc", depth=2) + self.assertFormal(module, mode="cover", depth=2) + def test_ilang(self): + dut = Driver() + vl = rtlil.convert(dut, ports=[]) + with open("main_stage.il", "w") as f: + f.write(vl) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/shift_rot/input_stage.py b/src/soc/fu/shift_rot/input_stage.py new file mode 100644 index 00000000..72e4c925 --- /dev/null +++ b/src/soc/fu/shift_rot/input_stage.py @@ -0,0 +1,58 @@ +# This stage is intended to adjust the input data before sending it to +# the acutal ALU. Things like handling inverting the input, carry_in +# generation for subtraction, and handling of immediates should happen +# here +from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed, + unsigned) +from nmutil.pipemodbase import PipeModBase +from soc.decoder.power_enums import InternalOp +from soc.shift_rot.pipe_data import ShiftRotInputData +from soc.decoder.power_enums import CryIn + + +class ShiftRotInputStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "input") + + def ispec(self): + return ShiftRotInputData(self.pspec) + + def ospec(self): + return ShiftRotInputData(self.pspec) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + ##### operand A ##### + + # operand a to be as-is or inverted + a = Signal.like(self.i.ra) + + with m.If(self.i.ctx.op.invert_a): + comb += a.eq(~self.i.ra) + with m.Else(): + comb += a.eq(self.i.ra) + + comb += self.o.ra.eq(a) + comb += self.o.rb.eq(self.i.rb) + comb += self.o.rs.eq(self.i.rs) + + + ##### carry-in ##### + + # either copy incoming carry or set to 1/0 as defined by op + with m.Switch(self.i.ctx.op.input_carry): + with m.Case(CryIn.ZERO): + comb += self.o.carry_in.eq(0) + with m.Case(CryIn.ONE): + comb += self.o.carry_in.eq(1) + with m.Case(CryIn.CA): + comb += self.o.carry_in.eq(self.i.carry_in) + + ##### sticky overflow and context (both pass-through) ##### + + comb += self.o.so.eq(self.i.so) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/shift_rot/main_stage.py b/src/soc/fu/shift_rot/main_stage.py new file mode 100644 index 00000000..f2375283 --- /dev/null +++ b/src/soc/fu/shift_rot/main_stage.py @@ -0,0 +1,78 @@ +# This stage is intended to do most of the work of executing shift +# instructions, as well as carry and overflow generation. This module +# however should not gate the carry or overflow, that's up to the +# output stage +from nmigen import (Module, Signal, Cat, Repl, Mux, Const) +from nmutil.pipemodbase import PipeModBase +from soc.alu.pipe_data import ALUOutputData +from soc.shift_rot.pipe_data import ShiftRotInputData +from ieee754.part.partsig import PartitionedSignal +from soc.decoder.power_enums import InternalOp +from soc.shift_rot.rotator import Rotator + +from soc.decoder.power_fields import DecodeFields +from soc.decoder.power_fieldsn import SignalBitRange + + +class ShiftRotMainStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "main") + self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) + self.fields.create_specs() + + def ispec(self): + return ShiftRotInputData(self.pspec) + + def ospec(self): + return ALUOutputData(self.pspec) # TODO: ALUIntermediateData + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + # obtain me and mb fields from instruction. + m_fields = self.fields.instrs['M'] + md_fields = self.fields.instrs['MD'] + mb = Signal(m_fields['MB'][0:-1].shape()) + me = Signal(m_fields['ME'][0:-1].shape()) + mb_extra = Signal(1, reset_less=True) + comb += mb.eq(m_fields['MB'][0:-1]) + comb += me.eq(m_fields['ME'][0:-1]) + comb += mb_extra.eq(md_fields['mb'][0:-1][0]) + + # set up microwatt rotator module + m.submodules.rotator = rotator = Rotator() + comb += [ + rotator.me.eq(me), + rotator.mb.eq(mb), + rotator.mb_extra.eq(mb_extra), + rotator.rs.eq(self.i.rs), + rotator.ra.eq(self.i.ra), + rotator.shift.eq(self.i.rb), + rotator.is_32bit.eq(self.i.ctx.op.is_32bit), + rotator.arith.eq(self.i.ctx.op.is_signed), + ] + + # instruction rotate type + mode = Signal(3, reset_less=True) + with m.Switch(self.i.ctx.op.insn_type): + with m.Case(InternalOp.OP_SHL): comb += mode.eq(0b000) + with m.Case(InternalOp.OP_SHR): comb += mode.eq(0b001) # R-shift + with m.Case(InternalOp.OP_RLC): comb += mode.eq(0b110) # clear LR + with m.Case(InternalOp.OP_RLCL): comb += mode.eq(0b010) # clear L + with m.Case(InternalOp.OP_RLCR): comb += mode.eq(0b100) # clear R + + comb += Cat(rotator.right_shift, + rotator.clear_left, + rotator.clear_right).eq(mode) + + # outputs from the microwatt rotator module + comb += [self.o.o.eq(rotator.result_o), + self.o.carry_out.eq(rotator.carry_out_o)] + + ###### sticky overflow and context, both pass-through ##### + + comb += self.o.so.eq(self.i.so) + comb += self.o.ctx.eq(self.i.ctx) + + return m diff --git a/src/soc/fu/shift_rot/maskgen.py b/src/soc/fu/shift_rot/maskgen.py new file mode 100644 index 00000000..89246e0b --- /dev/null +++ b/src/soc/fu/shift_rot/maskgen.py @@ -0,0 +1,47 @@ +from nmigen import (Elaboratable, Signal, Module) +import math + +class MaskGen(Elaboratable): + """MaskGen - create a diff mask + + example: x=5 --> a=0b11111 + y=3 --> b=0b00111 + o: 0b11000 + x=2 --> a=0b00011 + y=4 --> b=0b01111 + o: 0b10011 + """ + def __init__(self, width): + self.width = width + self.shiftwidth = math.ceil(math.log2(width)) + self.mb = Signal(self.shiftwidth, reset_less=True) + self.me = Signal(self.shiftwidth, reset_less=True) + + self.o = Signal(width, reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + x = Signal.like(self.mb) + y = Signal.like(self.mb) + + comb += x.eq(64 - self.mb) + comb += y.eq(63 - self.me) + + mask_a = Signal.like(self.o) + mask_b = Signal.like(self.o) + + comb += mask_a.eq((1< y): + comb += self.o.eq(mask_a ^ mask_b) + with m.Else(): + comb += self.o.eq(mask_a ^ ~mask_b) + + + return m + + def ports(self): + return [self.mb, self.me, self.o] diff --git a/src/soc/fu/shift_rot/pipe_data.py b/src/soc/fu/shift_rot/pipe_data.py new file mode 100644 index 00000000..7f98d16b --- /dev/null +++ b/src/soc/fu/shift_rot/pipe_data.py @@ -0,0 +1,30 @@ +from nmigen import Signal, Const +from nmutil.dynamicpipe import SimpleHandshakeRedir +from soc.alu.alu_input_record import CompALUOpSubset +from ieee754.fpcommon.getop import FPPipeContext +from soc.alu.pipe_data import IntegerData + + +class ShiftRotInputData(IntegerData): + def __init__(self, pspec): + super().__init__(pspec) + self.ra = Signal(64, reset_less=True) # RA + self.rs = Signal(64, reset_less=True) # RS + self.rb = Signal(64, reset_less=True) # RB/immediate + self.so = Signal(reset_less=True) + self.carry_in = Signal(reset_less=True) + + def __iter__(self): + yield from super().__iter__() + yield self.ra + yield self.rs + yield self.rb + yield self.carry_in + yield self.so + + def eq(self, i): + lst = super().eq(i) + return lst + [self.rs.eq(i.rs), self.ra.eq(i.ra), + self.rb.eq(i.rb), + self.carry_in.eq(i.carry_in), + self.so.eq(i.so)] diff --git a/src/soc/fu/shift_rot/pipeline.py b/src/soc/fu/shift_rot/pipeline.py new file mode 100644 index 00000000..1080aa8d --- /dev/null +++ b/src/soc/fu/shift_rot/pipeline.py @@ -0,0 +1,25 @@ +from nmutil.singlepipe import ControlBase +from nmutil.pipemodbase import PipeModBaseChain +from soc.shift_rot.input_stage import ShiftRotInputStage +from soc.shift_rot.main_stage import ShiftRotMainStage +from soc.alu.output_stage import ALUOutputStage + +class ShiftRotStages(PipeModBaseChain): + def get_chain(self): + inp = ShiftRotInputStage(self.pspec) + main = ShiftRotMainStage(self.pspec) + out = ALUOutputStage(self.pspec) + return [inp, main, out] + + +class ShiftRotBasePipe(ControlBase): + def __init__(self, pspec): + ControlBase.__init__(self) + self.pipe1 = ShiftRotStages(pspec) + self._eqs = self.connect([self.pipe1]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.pipe = self.pipe1 + m.d.comb += self._eqs + return m diff --git a/src/soc/fu/shift_rot/rotator.py b/src/soc/fu/shift_rot/rotator.py new file mode 100644 index 00000000..23aa0e43 --- /dev/null +++ b/src/soc/fu/shift_rot/rotator.py @@ -0,0 +1,156 @@ +# Manual translation and adaptation of rotator.vhdl from microwatt into nmigen +# + +from nmigen import (Elaboratable, Signal, Module, Const, Cat, + unsigned, signed) +from soc.shift_rot.rotl import ROTL + +# note BE bit numbering +def right_mask(m, mask_begin): + ret = Signal(64, name="right_mask", reset_less=True) + with m.If(mask_begin <= 64): + m.d.comb += ret.eq((1<<(64-mask_begin)) - 1) + return ret + +def left_mask(m, mask_end): + ret = Signal(64, name="left_mask", reset_less=True) + m.d.comb += ret.eq(~((1<<(63-mask_end)) - 1)) + return ret + + +class Rotator(Elaboratable): + """Rotator: covers multiple POWER9 rotate functions + + supported modes: + + * sl[wd] + * rlw*, rldic, rldicr, rldimi + * rldicl, sr[wd] + * sra[wd][i] + + use as follows: + + * shift = RB[0:7] + * arith = 1 when is_signed + * right_shift = 1 when insn_type is OP_SHR + * clear_left = 1 when insn_type is OP_RLC or OP_RLCL + * clear_right = 1 when insn_type is OP_RLC or OP_RLCR + """ + def __init__(self): + # input + self.me = Signal(5, reset_less=True) # ME field + self.mb = Signal(5, reset_less=True) # MB field + self.mb_extra = Signal(1, reset_less=True) # extra bit of mb in MD-form + self.ra = Signal(64, reset_less=True) # RA + self.rs = Signal(64, reset_less=True) # RS + self.ra = Signal(64, reset_less=True) # RA + self.shift = Signal(7, reset_less=True) # RB[0:7] + self.is_32bit = Signal(reset_less=True) + self.right_shift = Signal(reset_less=True) + self.arith = Signal(reset_less=True) + self.clear_left = Signal(reset_less=True) + self.clear_right = Signal(reset_less=True) + # output + self.result_o = Signal(64, reset_less=True) + self.carry_out_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + ra, rs = self.ra, self.rs + + # temporaries + rot_in = Signal(64, reset_less=True) + rot_count = Signal(6, reset_less=True) + rot = Signal(64, reset_less=True) + sh = Signal(7, reset_less=True) + mb = Signal(7, reset_less=True) + me = Signal(7, reset_less=True) + mr = Signal(64, reset_less=True) + ml = Signal(64, reset_less=True) + output_mode = Signal(2, reset_less=True) + + # First replicate bottom 32 bits to both halves if 32-bit + comb += rot_in[0:32].eq(rs[0:32]) + with m.If(self.is_32bit): + comb += rot_in[32:64].eq(rs[0:32]) + with m.Else(): + comb += rot_in[32:64].eq(rs[32:64]) + + shift_signed = Signal(signed(6)) + comb += shift_signed.eq(self.shift[0:6]) + + # Negate shift count for right shifts + with m.If(self.right_shift): + comb += rot_count.eq(-shift_signed) + with m.Else(): + comb += rot_count.eq(self.shift[0:6]) + + # ROTL submodule + m.submodules.rotl = rotl = ROTL(64) + comb += rotl.a.eq(rot_in) + comb += rotl.b.eq(rot_count) + comb += rot.eq(rotl.o) + + # Trim shift count to 6 bits for 32-bit shifts + comb += sh.eq(Cat(self.shift[0:6], self.shift[6] & ~self.is_32bit)) + + # XXX errr... we should already have these, in Fields? oh well + # Work out mask begin/end indexes (caution, big-endian bit numbering) + + # mask-begin (mb) + with m.If(self.clear_left): + comb += mb.eq(self.mb) + with m.If(self.is_32bit): + comb += mb[5:7].eq(Const(0b01, 2)) + with m.Else(): + comb += mb[5:7].eq(Cat(self.mb_extra, Const(0b0, 1))) + with m.Elif(self.right_shift): + # this is basically mb = sh + (is_32bit? 32: 0); + comb += mb.eq(sh) + with m.If(self.is_32bit): + comb += mb[5:7].eq(Cat(~sh[5], sh[5])) + with m.Else(): + comb += mb.eq(Cat(Const(0b0, 5), self.is_32bit, Const(0b0, 1))) + + # mask-end (me) + with m.If(self.clear_right & self.is_32bit): + # TODO: track down where this is. have to use fields. + comb += me.eq(Cat(self.me, Const(0b01, 2))) + with m.Elif(self.clear_right & ~self.clear_left): + # this is me, have to use fields + comb += me.eq(Cat(self.mb, self.mb_extra, Const(0b0, 1))) + with m.Else(): + # effectively, 63 - sh + comb += me.eq(Cat(~sh[0:6], sh[6])) + + # Calculate left and right masks + comb += mr.eq(right_mask(m, mb)) + comb += ml.eq(left_mask(m, me)) + + # Work out output mode + # 00 for sl[wd] + # 0w for rlw*, rldic, rldicr, rldimi, where w = 1 iff mb > me + # 10 for rldicl, sr[wd] + # 1z for sra[wd][i], z = 1 if rs is negative + with m.If((self.clear_left & ~self.clear_right) | self.right_shift): + comb += output_mode.eq(Cat(self.arith & rot_in[63], Const(1, 1))) + with m.Else(): + mbgt = self.clear_right & (mb[0:6] > me[0:6]) + comb += output_mode.eq(Cat(mbgt, Const(0, 1))) + + # Generate output from rotated input and masks + with m.Switch(output_mode): + with m.Case(0b00): + comb += self.result_o.eq((rot & (mr & ml)) | (ra & ~(mr & ml))) + with m.Case(0b01): + comb += self.result_o.eq((rot & (mr | ml)) | (ra & ~(mr | ml))) + with m.Case(0b10): + comb += self.result_o.eq(rot & mr) + with m.Case(0b11): + comb += self.result_o.eq(rot | ~mr) + # Generate carry output for arithmetic shift right of -ve value + comb += self.carry_out_o.eq(rs & ~ml) + + return m + diff --git a/src/soc/fu/shift_rot/rotl.py b/src/soc/fu/shift_rot/rotl.py new file mode 100644 index 00000000..d2ebfcf7 --- /dev/null +++ b/src/soc/fu/shift_rot/rotl.py @@ -0,0 +1,24 @@ +from nmigen import (Elaboratable, Signal, Module) +import math + +class ROTL(Elaboratable): + def __init__(self, width): + self.width = width + self.shiftwidth = math.ceil(math.log2(width)) + self.a = Signal(width, reset_less=True) + self.b = Signal(self.shiftwidth, reset_less=True) + + self.o = Signal(width, reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + shl = Signal.like(self.a) + shr = Signal.like(self.a) + + comb += shl.eq(self.a << self.b) + comb += shr.eq(self.a >> (self.width - self.b)) + + comb += self.o.eq(shl | shr) + return m diff --git a/src/soc/fu/shift_rot/test/test_maskgen.py b/src/soc/fu/shift_rot/test/test_maskgen.py new file mode 100644 index 00000000..1a4d34e6 --- /dev/null +++ b/src/soc/fu/shift_rot/test/test_maskgen.py @@ -0,0 +1,48 @@ +from nmigen import Signal, Module +from nmigen.back.pysim import Simulator, Delay, Settle +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil +from soc.alu.maskgen import MaskGen +from soc.decoder.helpers import MASK +import random +import unittest + +class MaskGenTestCase(FHDLTestCase): + def test_maskgen(self): + m = Module() + comb = m.d.comb + m.submodules.dut = dut = MaskGen(64) + mb = Signal.like(dut.mb) + me = Signal.like(dut.me) + o = Signal.like(dut.o) + + comb += [ + dut.mb.eq(mb), + dut.me.eq(me), + o.eq(dut.o)] + + sim = Simulator(m) + + def process(): + for x in range(0, 64): + for y in range(0, 64): + yield mb.eq(x) + yield me.eq(y) + yield Delay(1e-6) + + expected = MASK(x, y) + result = yield o + self.assertEqual(expected, result) + + sim.add_process(process) # or sim.add_sync_process(process), see below + with sim.write_vcd("maskgen.vcd", "maskgen.gtkw", traces=dut.ports()): + sim.run() + + def test_ilang(self): + dut = MaskGen(64) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("maskgen.il", "w") as f: + f.write(vl) + +if __name__ == '__main__': + unittest.main() diff --git a/src/soc/fu/shift_rot/test/test_pipe_caller.py b/src/soc/fu/shift_rot/test/test_pipe_caller.py new file mode 100644 index 00000000..dbd40923 --- /dev/null +++ b/src/soc/fu/shift_rot/test/test_pipe_caller.py @@ -0,0 +1,279 @@ +from nmigen import Module, Signal +from nmigen.back.pysim import Simulator, Delay, Settle +from nmigen.test.utils import FHDLTestCase +from nmigen.cli import rtlil +import unittest +from soc.decoder.isa.caller import ISACaller, special_sprs +from soc.decoder.power_decoder import (create_pdecode) +from soc.decoder.power_decoder2 import (PowerDecode2) +from soc.decoder.power_enums import (XER_bits, Function) +from soc.decoder.selectable_int import SelectableInt +from soc.simulator.program import Program +from soc.decoder.isa.all import ISA + + +from soc.shift_rot.pipeline import ShiftRotBasePipe +from soc.alu.alu_input_record import CompALUOpSubset +from soc.alu.pipe_data import ALUPipeSpec +import random + +class TestCase: + def __init__(self, program, regs, sprs, name): + self.program = program + self.regs = regs + self.sprs = sprs + self.name = name + +def get_rec_width(rec): + recwidth = 0 + # Setup random inputs for dut.op + for p in rec.ports(): + width = p.width + recwidth += width + return recwidth + +def set_alu_inputs(alu, dec2, sim): + inputs = [] + # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 + # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) + # and place it into data_i.b + + reg3_ok = yield dec2.e.read_reg3.ok + if reg3_ok: + reg3_sel = yield dec2.e.read_reg3.data + data3 = sim.gpr(reg3_sel).value + else: + data3 = 0 + reg1_ok = yield dec2.e.read_reg1.ok + if reg1_ok: + reg1_sel = yield dec2.e.read_reg1.data + data1 = sim.gpr(reg1_sel).value + else: + data1 = 0 + reg2_ok = yield dec2.e.read_reg2.ok + imm_ok = yield dec2.e.imm_data.ok + if reg2_ok: + reg2_sel = yield dec2.e.read_reg2.data + data2 = sim.gpr(reg2_sel).value + elif imm_ok: + data2 = yield dec2.e.imm_data.imm + else: + data2 = 0 + + yield alu.p.data_i.ra.eq(data1) + yield alu.p.data_i.rb.eq(data2) + yield alu.p.data_i.rs.eq(data3) + + +def set_extra_alu_inputs(alu, dec2, sim): + carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 + yield alu.p.data_i.carry_in.eq(carry) + so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 + yield alu.p.data_i.so.eq(so) + + +# This test bench is a bit different than is usual. Initially when I +# was writing it, I had all of the tests call a function to create a +# device under test and simulator, initialize the dut, run the +# simulation for ~2 cycles, and assert that the dut output what it +# should have. However, this was really slow, since it needed to +# create and tear down the dut and simulator for every test case. + +# Now, instead of doing that, every test case in ALUTestCase puts some +# data into the test_data list below, describing the instructions to +# be tested and the initial state. Once all the tests have been run, +# test_data gets passed to TestRunner which then sets up the DUT and +# simulator once, runs all the data through it, and asserts that the +# results match the pseudocode sim at every cycle. + +# By doing this, I've reduced the time it takes to run the test suite +# massively. Before, it took around 1 minute on my computer, now it +# takes around 3 seconds + +test_data = [] + + +class ALUTestCase(FHDLTestCase): + def __init__(self, name): + super().__init__(name) + self.test_name = name + def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): + tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) + test_data.append(tc) + + + def test_shift(self): + insns = ["slw", "sld", "srw", "srd", "sraw", "srad"] + for i in range(20): + choice = random.choice(insns) + lst = [f"{choice} 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[2] = random.randint(0, 63) + print(initial_regs[1], initial_regs[2]) + self.run_tst_program(Program(lst), initial_regs) + + + def test_shift_arith(self): + lst = ["sraw 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[2] = random.randint(0, 63) + print(initial_regs[1], initial_regs[2]) + self.run_tst_program(Program(lst), initial_regs) + + def test_shift_once(self): + lst = ["slw 3, 1, 4", + "slw 3, 1, 2"] + initial_regs = [0] * 32 + initial_regs[1] = 0x80000000 + initial_regs[2] = 0x40 + initial_regs[4] = 0x00 + self.run_tst_program(Program(lst), initial_regs) + + def test_rlwinm(self): + for i in range(10): + mb = random.randint(0,31) + me = random.randint(0,31) + sh = random.randint(0,31) + lst = [f"rlwinm 3, 1, {mb}, {me}, {sh}"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_rlwimi(self): + lst = ["rlwimi 3, 1, 5, 20, 6"] + initial_regs = [0] * 32 + initial_regs[1] = 0xdeadbeef + initial_regs[3] = 0x12345678 + self.run_tst_program(Program(lst), initial_regs) + + def test_rlwnm(self): + lst = ["rlwnm 3, 1, 2, 20, 6"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + initial_regs[2] = random.randint(0, 63) + self.run_tst_program(Program(lst), initial_regs) + + def test_rldicl(self): + lst = ["rldicl 3, 1, 5, 20"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_rldicr(self): + lst = ["rldicr 3, 1, 5, 20"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_rlc(self): + insns = ["rldic", "rldicl", "rldicr"] + for i in range(20): + choice = random.choice(insns) + sh = random.randint(0, 63) + m = random.randint(0, 63) + lst = [f"{choice} 3, 1, {sh}, {m}"] + initial_regs = [0] * 32 + initial_regs[1] = random.randint(0, (1<<64)-1) + self.run_tst_program(Program(lst), initial_regs) + + def test_ilang(self): + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + alu = ShiftRotBasePipe(pspec) + vl = rtlil.convert(alu, ports=alu.ports()) + with open("pipeline.il", "w") as f: + f.write(vl) + + +class TestRunner(FHDLTestCase): + def __init__(self, test_data): + super().__init__("run_all") + self.test_data = test_data + + def run_all(self): + m = Module() + comb = m.d.comb + instruction = Signal(32) + + pdecode = create_pdecode() + + m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) + + rec = CompALUOpSubset() + + pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) + m.submodules.alu = alu = ShiftRotBasePipe(pspec) + + comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) + comb += alu.p.valid_i.eq(1) + comb += alu.n.ready_i.eq(1) + comb += pdecode2.dec.raw_opcode_in.eq(instruction) + sim = Simulator(m) + + sim.add_clock(1e-6) + def process(): + for test in self.test_data: + print(test.name) + program = test.program + self.subTest(test.name) + simulator = ISA(pdecode2, test.regs, test.sprs, 0) + gen = program.generate_instructions() + instructions = list(zip(gen, program.assembly.splitlines())) + + index = simulator.pc.CIA.value//4 + while index < len(instructions): + ins, code = instructions[index] + + print("0x{:X}".format(ins & 0xffffffff)) + print(code) + + # ask the decoder to decode this binary data (endian'd) + yield pdecode2.dec.bigendian.eq(0) # little / big? + yield instruction.eq(ins) # raw binary instr. + yield Settle() + fn_unit = yield pdecode2.e.fn_unit + self.assertEqual(fn_unit, Function.SHIFT_ROT.value) + yield from set_alu_inputs(alu, pdecode2, simulator) + yield from set_extra_alu_inputs(alu, pdecode2, simulator) + yield + opname = code.split(' ')[0] + yield from simulator.call(opname) + index = simulator.pc.CIA.value//4 + + vld = yield alu.n.valid_o + while not vld: + yield + vld = yield alu.n.valid_o + yield + alu_out = yield alu.n.data_o.o + out_reg_valid = yield pdecode2.e.write_reg.ok + if out_reg_valid: + write_reg_idx = yield pdecode2.e.write_reg.data + expected = simulator.gpr(write_reg_idx).value + msg = f"expected {expected:x}, actual: {alu_out:x}" + self.assertEqual(expected, alu_out, msg) + yield from self.check_extra_alu_outputs(alu, pdecode2, + simulator) + + sim.add_sync_process(process) + with sim.write_vcd("simulator.vcd", "simulator.gtkw", + traces=[]): + sim.run() + def check_extra_alu_outputs(self, alu, dec2, sim): + rc = yield dec2.e.rc.data + if rc: + cr_expected = sim.crl[0].get_range().value + cr_actual = yield alu.n.data_o.cr0 + self.assertEqual(cr_expected, cr_actual) + + +if __name__ == "__main__": + unittest.main(exit=False) + suite = unittest.TestSuite() + suite.addTest(TestRunner(test_data)) + + runner = unittest.TextTestRunner() + runner.run(suite) diff --git a/src/soc/pipe/__init__.py b/src/soc/pipe/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/soc/pipe/alu/alu_input_record.py b/src/soc/pipe/alu/alu_input_record.py deleted file mode 100644 index 41a40ebf..00000000 --- a/src/soc/pipe/alu/alu_input_record.py +++ /dev/null @@ -1,80 +0,0 @@ -from nmigen.hdl.rec import Record, Layout - -from soc.decoder.power_enums import InternalOp, Function, CryIn - - -class CompALUOpSubset(Record): - """CompALUOpSubset - - a copy of the relevant subset information from Decode2Execute1Type - needed for ALU operations. use with eq_from_execute1 (below) to - grab subsets. - """ - def __init__(self, name=None): - layout = (('insn_type', InternalOp), - ('fn_unit', Function), - ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))), - #'cr = Signal(32, reset_less=True) # NO: this is from the CR SPR - #'xerc = XerBits() # NO: this is from the XER SPR - ('lk', 1), - ('rc', Layout((("rc", 1), ("rc_ok", 1)))), - ('oe', Layout((("oe", 1), ("oe_ok", 1)))), - ('invert_a', 1), - ('invert_out', 1), - ('input_carry', CryIn), - ('output_carry', 1), - ('input_cr', 1), - ('output_cr', 1), - ('is_32bit', 1), - ('is_signed', 1), - ('data_len', 4), # TODO: should be in separate CompLDSTSubset - ('insn', 32), - ('byte_reverse', 1), - ('sign_extend', 1)) - - Record.__init__(self, Layout(layout), name=name) - - # grrr. Record does not have kwargs - self.insn_type.reset_less = True - self.fn_unit.reset_less = True - #self.cr = Signal(32, reset_less = True - #self.xerc = XerBits( - self.lk.reset_less = True - self.invert_a.reset_less = True - self.invert_out.reset_less = True - self.input_carry.reset_less = True - self.output_carry.reset_less = True - self.input_cr.reset_less = True - self.output_cr.reset_less = True - self.is_32bit.reset_less = True - self.is_signed.reset_less = True - self.data_len.reset_less = True - self.byte_reverse.reset_less = True - self.sign_extend.reset_less = True - - def eq_from_execute1(self, other): - """ use this to copy in from Decode2Execute1Type - """ - res = [] - for fname, sig in self.fields.items(): - eqfrom = other.fields[fname] - res.append(sig.eq(eqfrom)) - return res - - def ports(self): - return [self.insn_type, - #self.cr, - #self.xerc, - self.lk, - self.invert_a, - self.invert_out, - self.input_carry, - self.output_carry, - self.input_cr, - self.output_cr, - self.is_32bit, - self.is_signed, - self.data_len, - self.byte_reverse, - self.sign_extend, - ] diff --git a/src/soc/pipe/alu/formal/.gitignore b/src/soc/pipe/alu/formal/.gitignore deleted file mode 100644 index 150f68c8..00000000 --- a/src/soc/pipe/alu/formal/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*/* diff --git a/src/soc/pipe/alu/formal/proof_input_stage.py b/src/soc/pipe/alu/formal/proof_input_stage.py deleted file mode 100644 index 347ab7d4..00000000 --- a/src/soc/pipe/alu/formal/proof_input_stage.py +++ /dev/null @@ -1,77 +0,0 @@ -# Proof of correctness for partitioned equal signal combiner -# Copyright (C) 2020 Michael Nolan - -from nmigen import Module, Signal, Elaboratable, Mux -from nmigen.asserts import Assert, AnyConst, Assume, Cover -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil - -from soc.alu.input_stage import ALUInputStage -from soc.alu.pipe_data import ALUPipeSpec -from soc.alu.alu_input_record import CompALUOpSubset -from soc.decoder.power_enums import InternalOp -import unittest - - -# This defines a module to drive the device under test and assert -# properties about its outputs -class Driver(Elaboratable): - def __init__(self): - # inputs and outputs - pass - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - rec = CompALUOpSubset() - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - comb += p.eq(AnyConst(width)) - - pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) - m.submodules.dut = dut = ALUInputStage(pspec) - - a = Signal(64) - b = Signal(64) - comb += [dut.i.a.eq(a), - dut.i.b.eq(b), - a.eq(AnyConst(64)), - b.eq(AnyConst(64))] - - comb += dut.i.ctx.op.eq(rec) - - # Assert that op gets copied from the input to output - for p in rec.ports(): - name = p.name - rec_sig = p - dut_sig = getattr(dut.o.ctx.op, name) - comb += Assert(dut_sig == rec_sig) - - with m.If(rec.invert_a): - comb += Assert(dut.o.a == ~a) - with m.Else(): - comb += Assert(dut.o.a == a) - - comb += Assert(dut.o.b == b) - - return m - - -class GTCombinerTestCase(FHDLTestCase): - def test_formal(self): - module = Driver() - self.assertFormal(module, mode="bmc", depth=4) - self.assertFormal(module, mode="cover", depth=4) - def test_ilang(self): - dut = Driver() - vl = rtlil.convert(dut, ports=[]) - with open("input_stage.il", "w") as f: - f.write(vl) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/alu/formal/proof_main_stage.py b/src/soc/pipe/alu/formal/proof_main_stage.py deleted file mode 100644 index f102fc2b..00000000 --- a/src/soc/pipe/alu/formal/proof_main_stage.py +++ /dev/null @@ -1,88 +0,0 @@ -# Proof of correctness for partitioned equal signal combiner -# Copyright (C) 2020 Michael Nolan - -from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, - signed) -from nmigen.asserts import Assert, AnyConst, Assume, Cover -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil - -from soc.alu.main_stage import ALUMainStage -from soc.alu.pipe_data import ALUPipeSpec -from soc.alu.alu_input_record import CompALUOpSubset -from soc.decoder.power_enums import InternalOp -import unittest - - -# This defines a module to drive the device under test and assert -# properties about its outputs -class Driver(Elaboratable): - def __init__(self): - # inputs and outputs - pass - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - rec = CompALUOpSubset() - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - comb += p.eq(AnyConst(width)) - - pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) - m.submodules.dut = dut = ALUMainStage(pspec) - - # convenience variables - a = dut.i.a - b = dut.i.b - carry_in = dut.i.carry_in - so_in = dut.i.so - carry_out = dut.o.carry_out - o = dut.o.o - - # setup random inputs - comb += [a.eq(AnyConst(64)), - b.eq(AnyConst(64)), - carry_in.eq(AnyConst(1)), - so_in.eq(AnyConst(1))] - - comb += dut.i.ctx.op.eq(rec) - - # Assert that op gets copied from the input to output - for rec_sig in rec.ports(): - name = rec_sig.name - dut_sig = getattr(dut.o.ctx.op, name) - comb += Assert(dut_sig == rec_sig) - - # signed and signed/32 versions of input a - a_signed = Signal(signed(64)) - a_signed_32 = Signal(signed(32)) - comb += a_signed.eq(a) - comb += a_signed_32.eq(a[0:32]) - - # main assertion of arithmetic operations - with m.Switch(rec.insn_type): - with m.Case(InternalOp.OP_ADD): - comb += Assert(Cat(o, carry_out) == (a + b + carry_in)) - - return m - - -class ALUTestCase(FHDLTestCase): - def test_formal(self): - module = Driver() - self.assertFormal(module, mode="bmc", depth=2) - self.assertFormal(module, mode="cover", depth=2) - def test_ilang(self): - dut = Driver() - vl = rtlil.convert(dut, ports=[]) - with open("main_stage.il", "w") as f: - f.write(vl) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/alu/formal/proof_output_stage.py b/src/soc/pipe/alu/formal/proof_output_stage.py deleted file mode 100644 index 288da071..00000000 --- a/src/soc/pipe/alu/formal/proof_output_stage.py +++ /dev/null @@ -1,115 +0,0 @@ -# Proof of correctness for partitioned equal signal combiner -# Copyright (C) 2020 Michael Nolan - -from nmigen import Module, Signal, Elaboratable, Mux, Cat, signed -from nmigen.asserts import Assert, AnyConst, Assume, Cover -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil - -from soc.alu.output_stage import ALUOutputStage -from soc.alu.pipe_data import ALUPipeSpec -from soc.alu.alu_input_record import CompALUOpSubset -from soc.decoder.power_enums import InternalOp -import unittest - - -# This defines a module to drive the device under test and assert -# properties about its outputs -class Driver(Elaboratable): - def __init__(self): - # inputs and outputs - pass - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - rec = CompALUOpSubset() - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - comb += p.eq(AnyConst(width)) - - pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) - m.submodules.dut = dut = ALUOutputStage(pspec) - - o = Signal(64) - carry_out = Signal() - carry_out32 = Signal() - ov = Signal() - ov32 = Signal() - cr0 = Signal(4) - so = Signal() - comb += [dut.i.o.eq(o), - dut.i.carry_out.eq(carry_out), - dut.i.so.eq(so), - dut.i.carry_out32.eq(carry_out32), - dut.i.cr0.eq(cr0), - dut.i.ov.eq(ov), - dut.i.ov32.eq(ov32), - o.eq(AnyConst(64)), - carry_out.eq(AnyConst(1)), - carry_out32.eq(AnyConst(1)), - ov.eq(AnyConst(1)), - ov32.eq(AnyConst(1)), - cr0.eq(AnyConst(4)), - so.eq(AnyConst(1))] - - comb += dut.i.ctx.op.eq(rec) - - with m.If(dut.i.ctx.op.invert_out): - comb += Assert(dut.o.o == ~o) - with m.Else(): - comb += Assert(dut.o.o == o) - - cr_out = Signal.like(cr0) - comb += cr_out.eq(dut.o.cr0) - - o_signed = Signal(signed(64)) - comb += o_signed.eq(dut.o.o) - # Assert only one of the comparison bits is set - comb += Assert(cr_out[3] + cr_out[2] + cr_out[1] == 1) - with m.If(o_signed == 0): - comb += Assert(cr_out[1] == 1) - with m.Elif(o_signed > 0): - # sigh. see https://bugs.libre-soc.org/show_bug.cgi?id=305#c61 - # for OP_CMP we do b-a rather than a-b (just like ADD) and - # then invert the *test condition*. - with m.If(rec.insn_type == InternalOp.OP_CMP): - comb += Assert(cr_out[3] == 1) - with m.Else(): - comb += Assert(cr_out[2] == 1) - with m.Elif(o_signed < 0): - # ditto as above - with m.If(rec.insn_type == InternalOp.OP_CMP): - comb += Assert(cr_out[2] == 1) - with m.Else(): - comb += Assert(cr_out[3] == 1) - - - # Assert that op gets copied from the input to output - for p in rec.ports(): - name = p.name - rec_sig = p - dut_sig = getattr(dut.o.ctx.op, name) - comb += Assert(dut_sig == rec_sig) - - - return m - -class GTCombinerTestCase(FHDLTestCase): - def test_formal(self): - module = Driver() - self.assertFormal(module, mode="bmc", depth=4) - self.assertFormal(module, mode="cover", depth=4) - def test_ilang(self): - dut = Driver() - vl = rtlil.convert(dut, ports=[]) - with open("output_stage.il", "w") as f: - f.write(vl) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/alu/input_stage.py b/src/soc/pipe/alu/input_stage.py deleted file mode 100644 index 75207324..00000000 --- a/src/soc/pipe/alu/input_stage.py +++ /dev/null @@ -1,57 +0,0 @@ -# This stage is intended to adjust the input data before sending it to -# the acutal ALU. Things like handling inverting the input, carry_in -# generation for subtraction, and handling of immediates should happen -# here -from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed, - unsigned) -from nmutil.pipemodbase import PipeModBase -from soc.decoder.power_enums import InternalOp -from soc.alu.pipe_data import ALUInputData -from soc.decoder.power_enums import CryIn - - -class ALUInputStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "input") - - def ispec(self): - return ALUInputData(self.pspec) - - def ospec(self): - return ALUInputData(self.pspec) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - ctx = self.i.ctx - - ##### operand A ##### - - # operand a to be as-is or inverted - a = Signal.like(self.i.a) - - with m.If(ctx.op.invert_a): - comb += a.eq(~self.i.a) - with m.Else(): - comb += a.eq(self.i.a) - - comb += self.o.a.eq(a) - comb += self.o.b.eq(self.i.b) - - ##### carry-in ##### - - # either copy incoming carry or set to 1/0 as defined by op - with m.Switch(ctx.op.input_carry): - with m.Case(CryIn.ZERO): - comb += self.o.carry_in.eq(0) - with m.Case(CryIn.ONE): - comb += self.o.carry_in.eq(1) - with m.Case(CryIn.CA): - comb += self.o.carry_in.eq(self.i.carry_in) - - ##### sticky overflow and context (both pass-through) ##### - - comb += self.o.so.eq(self.i.so) - comb += self.o.ctx.eq(ctx) - - return m diff --git a/src/soc/pipe/alu/main_stage.py b/src/soc/pipe/alu/main_stage.py deleted file mode 100644 index 51001663..00000000 --- a/src/soc/pipe/alu/main_stage.py +++ /dev/null @@ -1,84 +0,0 @@ -# This stage is intended to do most of the work of executing the Arithmetic -# instructions. This would be like the additions, compares, and sign-extension -# as well as carry and overflow generation. This module -# however should not gate the carry or overflow, that's up to the -# output stage -from nmigen import (Module, Signal, Cat, Repl, Mux, Const) -from nmutil.pipemodbase import PipeModBase -from soc.alu.pipe_data import ALUInputData, ALUOutputData -from ieee754.part.partsig import PartitionedSignal -from soc.decoder.power_enums import InternalOp - - -class ALUMainStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "main") - - def ispec(self): - return ALUInputData(self.pspec) - - def ospec(self): - return ALUOutputData(self.pspec) # TODO: ALUIntermediateData - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - carry_out, o = self.o.carry_out, self.o.o - - # check if op is 32-bit, and get sign bit from operand a - is_32bit = Signal(reset_less=True) - sign_bit = Signal(reset_less=True) - comb += is_32bit.eq(self.i.ctx.op.is_32bit) - comb += sign_bit.eq(Mux(is_32bit, self.i.a[31], self.i.a[63])) - - # little trick: do the add using only one add (not 2) - add_a = Signal(self.i.a.width + 2, reset_less=True) - add_b = Signal(self.i.a.width + 2, reset_less=True) - add_output = Signal(self.i.a.width + 2, reset_less=True) - with m.If((self.i.ctx.op.insn_type == InternalOp.OP_ADD) | - (self.i.ctx.op.insn_type == InternalOp.OP_CMP)): - # in bit 0, 1+carry_in creates carry into bit 1 and above - comb += add_a.eq(Cat(self.i.carry_in, self.i.a, Const(0, 1))) - comb += add_b.eq(Cat(Const(1, 1), self.i.b, Const(0, 1))) - comb += add_output.eq(add_a + add_b) - - ########################## - # main switch-statement for handling arithmetic operations - - with m.Switch(self.i.ctx.op.insn_type): - #### CMP, CMPL #### - with m.Case(InternalOp.OP_CMP): - # this is supposed to be inverted (b-a, not a-b) - # however we have a trick: instead of adding either 2x 64-bit - # MUXes to invert a and b, or messing with a 64-bit output, - # swap +ve and -ve test in the *output* stage using an XOR gate - comb += o.eq(add_output[1:-1]) - - #### add #### - with m.Case(InternalOp.OP_ADD): - # bit 0 is not part of the result, top bit is the carry-out - comb += o.eq(add_output[1:-1]) - comb += carry_out.eq(add_output[-1]) - - #### exts (sign-extend) #### - with m.Case(InternalOp.OP_EXTS): - with m.If(self.i.ctx.op.data_len == 1): - comb += o.eq(Cat(self.i.a[0:8], Repl(self.i.a[7], 64-8))) - with m.If(self.i.ctx.op.data_len == 2): - comb += o.eq(Cat(self.i.a[0:16], Repl(self.i.a[15], 64-16))) - with m.If(self.i.ctx.op.data_len == 4): - comb += o.eq(Cat(self.i.a[0:32], Repl(self.i.a[31], 64-32))) - with m.Case(InternalOp.OP_CMPEQB): - eqs = Signal(8, reset_less=True) - src1 = Signal(8, reset_less=True) - comb += src1.eq(self.i.a[0:8]) - for i in range(8): - comb += eqs[i].eq(src1 == self.i.b[8*i:8*(i+1)]) - comb += self.o.cr0.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1))) - - ###### sticky overflow and context, both pass-through ##### - - comb += self.o.so.eq(self.i.so) - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/pipe/alu/output_stage.py b/src/soc/pipe/alu/output_stage.py deleted file mode 100644 index 12537957..00000000 --- a/src/soc/pipe/alu/output_stage.py +++ /dev/null @@ -1,61 +0,0 @@ -# This stage is intended to handle the gating of carry and overflow -# out, summary overflow generation, and updating the condition -# register -from nmigen import (Module, Signal, Cat, Repl) -from nmutil.pipemodbase import PipeModBase -from soc.alu.pipe_data import ALUInputData, ALUOutputData -from ieee754.part.partsig import PartitionedSignal -from soc.decoder.power_enums import InternalOp - - -class ALUOutputStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "output") - - def ispec(self): - return ALUOutputData(self.pspec) # TODO: ALUIntermediateData - - def ospec(self): - return ALUOutputData(self.pspec) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - # op requests inversion of the output - o = Signal.like(self.i.o) - with m.If(self.i.ctx.op.invert_out): - comb += o.eq(~self.i.o) - with m.Else(): - comb += o.eq(self.i.o) - - # create condition register cr0 and sticky-overflow - is_zero = Signal(reset_less=True) - is_positive = Signal(reset_less=True) - is_negative = Signal(reset_less=True) - msb_test = Signal(reset_less=True) # set equal to MSB, invert if OP=CMP - is_cmp = Signal(reset_less=True) # true if OP=CMP - so = Signal(reset_less=True) - - # TODO: if o[63] is XORed with "operand == OP_CMP" - # that can be used as a test - # see https://bugs.libre-soc.org/show_bug.cgi?id=305#c60 - - comb += is_cmp.eq(self.i.ctx.op.insn_type == InternalOp.OP_CMP) - comb += msb_test.eq(o[-1] ^ is_cmp) - comb += is_zero.eq(o == 0) - comb += is_positive.eq(~is_zero & ~msb_test) - comb += is_negative.eq(~is_zero & msb_test) - comb += so.eq(self.i.so | self.i.ov) - - comb += self.o.o.eq(o) - with m.If(self.i.ctx.op.insn_type != InternalOp.OP_CMPEQB): - comb += self.o.cr0.eq(Cat(so, is_zero, is_positive, is_negative)) - with m.Else(): - comb += self.o.cr0.eq(self.i.cr0) - - comb += self.o.so.eq(so) - - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/pipe/alu/pipe_data.py b/src/soc/pipe/alu/pipe_data.py deleted file mode 100644 index c386397a..00000000 --- a/src/soc/pipe/alu/pipe_data.py +++ /dev/null @@ -1,90 +0,0 @@ -from nmigen import Signal, Const -from nmutil.dynamicpipe import SimpleHandshakeRedir -from soc.alu.alu_input_record import CompALUOpSubset -from ieee754.fpcommon.getop import FPPipeContext - - -class IntegerData: - - def __init__(self, pspec): - self.ctx = FPPipeContext(pspec) - self.muxid = self.ctx.muxid - - def __iter__(self): - yield from self.ctx - - def eq(self, i): - return [self.ctx.eq(i.ctx)] - - def ports(self): - return self.ctx.ports() - - -class ALUInputData(IntegerData): - def __init__(self, pspec): - super().__init__(pspec) - self.a = Signal(64, reset_less=True) # RA - self.b = Signal(64, reset_less=True) # RB/immediate - self.so = Signal(reset_less=True) - self.carry_in = Signal(reset_less=True) - - def __iter__(self): - yield from super().__iter__() - yield self.a - yield self.b - yield self.carry_in - yield self.so - - def eq(self, i): - lst = super().eq(i) - return lst + [self.a.eq(i.a), self.b.eq(i.b), - self.carry_in.eq(i.carry_in), - self.so.eq(i.so)] - -# TODO: ALUIntermediateData which does not have -# cr0, ov, ov32 in it (because they are generated as outputs by -# the final output stage, not by the intermediate stage) -# https://bugs.libre-soc.org/show_bug.cgi?id=305#c19 - -class ALUOutputData(IntegerData): - def __init__(self, pspec): - super().__init__(pspec) - self.o = Signal(64, reset_less=True, name="stage_o") - self.carry_out = Signal(reset_less=True) - self.carry_out32 = Signal(reset_less=True) - self.cr0 = Signal(4, reset_less=True) - self.ov = Signal(reset_less=True) - self.ov32 = Signal(reset_less=True) - self.so = Signal(reset_less=True) - - def __iter__(self): - yield from super().__iter__() - yield self.o - yield self.carry_out - yield self.carry_out32 - yield self.cr0 - yield self.ov - yield self.ov32 - yield self.so - - def eq(self, i): - lst = super().eq(i) - return lst + [self.o.eq(i.o), - self.carry_out.eq(i.carry_out), - self.carry_out32.eq(i.carry_out32), - self.cr0.eq(i.cr0), self.ov.eq(i.ov), - self.ov32.eq(i.ov32), self.so.eq(i.so)] - - -class IntPipeSpec: - def __init__(self, id_wid=2, op_wid=1): - self.id_wid = id_wid - self.op_wid = op_wid - self.opkls = lambda _: CompALUOpSubset(name="op") - self.stage = None - - -class ALUPipeSpec(IntPipeSpec): - def __init__(self, id_wid, op_wid): - super().__init__(id_wid, op_wid) - self.pipekls = SimpleHandshakeRedir diff --git a/src/soc/pipe/alu/pipeline.py b/src/soc/pipe/alu/pipeline.py deleted file mode 100644 index e8dd1991..00000000 --- a/src/soc/pipe/alu/pipeline.py +++ /dev/null @@ -1,25 +0,0 @@ -from nmutil.singlepipe import ControlBase -from nmutil.pipemodbase import PipeModBaseChain -from soc.alu.input_stage import ALUInputStage -from soc.alu.main_stage import ALUMainStage -from soc.alu.output_stage import ALUOutputStage - -class ALUStages(PipeModBaseChain): - def get_chain(self): - inp = ALUInputStage(self.pspec) - main = ALUMainStage(self.pspec) - out = ALUOutputStage(self.pspec) - return [inp, main, out] - - -class ALUBasePipe(ControlBase): - def __init__(self, pspec): - ControlBase.__init__(self) - self.pipe1 = ALUStages(pspec) - self._eqs = self.connect([self.pipe1]) - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - m.submodules.pipe = self.pipe1 - m.d.comb += self._eqs - return m diff --git a/src/soc/pipe/alu/test/test_pipe_caller.py b/src/soc/pipe/alu/test/test_pipe_caller.py deleted file mode 100644 index f42112e1..00000000 --- a/src/soc/pipe/alu/test/test_pipe_caller.py +++ /dev/null @@ -1,270 +0,0 @@ -from nmigen import Module, Signal -from nmigen.back.pysim import Simulator, Delay, Settle -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil -import unittest -from soc.decoder.isa.caller import ISACaller, special_sprs -from soc.decoder.power_decoder import (create_pdecode) -from soc.decoder.power_decoder2 import (PowerDecode2) -from soc.decoder.power_enums import (XER_bits, Function, InternalOp) -from soc.decoder.selectable_int import SelectableInt -from soc.simulator.program import Program -from soc.decoder.isa.all import ISA - - -from soc.alu.pipeline import ALUBasePipe -from soc.alu.alu_input_record import CompALUOpSubset -from soc.alu.pipe_data import ALUPipeSpec -import random - -class TestCase: - def __init__(self, program, regs, sprs, name): - self.program = program - self.regs = regs - self.sprs = sprs - self.name = name - -def get_rec_width(rec): - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - return recwidth - -def set_alu_inputs(alu, dec2, sim): - # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 - # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) - # and place it into data_i.b - - reg3_ok = yield dec2.e.read_reg3.ok - reg1_ok = yield dec2.e.read_reg1.ok - assert reg3_ok != reg1_ok - if reg3_ok: - data1 = yield dec2.e.read_reg3.data - data1 = sim.gpr(data1).value - elif reg1_ok: - data1 = yield dec2.e.read_reg1.data - data1 = sim.gpr(data1).value - else: - data1 = 0 - - yield alu.p.data_i.a.eq(data1) - - # If there's an immediate, set the B operand to that - reg2_ok = yield dec2.e.read_reg2.ok - imm_ok = yield dec2.e.imm_data.imm_ok - if imm_ok: - data2 = yield dec2.e.imm_data.imm - elif reg2_ok: - data2 = yield dec2.e.read_reg2.data - data2 = sim.gpr(data2).value - else: - data2 = 0 - yield alu.p.data_i.b.eq(data2) - - - -def set_extra_alu_inputs(alu, dec2, sim): - carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 - yield alu.p.data_i.carry_in.eq(carry) - so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 - yield alu.p.data_i.so.eq(so) - - -# This test bench is a bit different than is usual. Initially when I -# was writing it, I had all of the tests call a function to create a -# device under test and simulator, initialize the dut, run the -# simulation for ~2 cycles, and assert that the dut output what it -# should have. However, this was really slow, since it needed to -# create and tear down the dut and simulator for every test case. - -# Now, instead of doing that, every test case in ALUTestCase puts some -# data into the test_data list below, describing the instructions to -# be tested and the initial state. Once all the tests have been run, -# test_data gets passed to TestRunner which then sets up the DUT and -# simulator once, runs all the data through it, and asserts that the -# results match the pseudocode sim at every cycle. - -# By doing this, I've reduced the time it takes to run the test suite -# massively. Before, it took around 1 minute on my computer, now it -# takes around 3 seconds - -test_data = [] - - -class ALUTestCase(FHDLTestCase): - def __init__(self, name): - super().__init__(name) - self.test_name = name - def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): - tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) - test_data.append(tc) - - def test_rand(self): - insns = ["add", "add.", "subf"] - for i in range(40): - choice = random.choice(insns) - lst = [f"{choice} 3, 1, 2"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_rand_imm(self): - insns = ["addi", "addis", "subfic"] - for i in range(10): - choice = random.choice(insns) - imm = random.randint(-(1<<15), (1<<15)-1) - lst = [f"{choice} 3, 1, {imm}"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_adde(self): - lst = ["adde. 5, 6, 7"] - initial_regs = [0] * 32 - initial_regs[6] = random.randint(0, (1<<64)-1) - initial_regs[7] = random.randint(0, (1<<64)-1) - initial_sprs = {} - xer = SelectableInt(0, 64) - xer[XER_bits['CA']] = 1 - initial_sprs[special_sprs['XER']] = xer - self.run_tst_program(Program(lst), initial_regs, initial_sprs) - - def test_cmp(self): - lst = ["subf. 1, 6, 7", - "cmp cr2, 1, 6, 7"] - initial_regs = [0] * 32 - initial_regs[6] = 0x10 - initial_regs[7] = 0x05 - self.run_tst_program(Program(lst), initial_regs, {}) - - def test_extsb(self): - insns = ["extsb", "extsh", "extsw"] - for i in range(10): - choice = random.choice(insns) - lst = [f"{choice} 3, 1"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_cmpeqb(self): - lst = ["cmpeqb cr0, 1, 2"] - for i in range(20): - initial_regs = [0] * 32 - initial_regs[1] = i - initial_regs[2] = 0x01030507090b0d0f11 - self.run_tst_program(Program(lst), initial_regs, {}) - - def test_ilang(self): - rec = CompALUOpSubset() - - pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - alu = ALUBasePipe(pspec) - vl = rtlil.convert(alu, ports=alu.ports()) - with open("pipeline.il", "w") as f: - f.write(vl) - - -class TestRunner(FHDLTestCase): - def __init__(self, test_data): - super().__init__("run_all") - self.test_data = test_data - - def run_all(self): - m = Module() - comb = m.d.comb - instruction = Signal(32) - - pdecode = create_pdecode() - - m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) - - rec = CompALUOpSubset() - - pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - m.submodules.alu = alu = ALUBasePipe(pspec) - - comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) - comb += alu.p.valid_i.eq(1) - comb += alu.n.ready_i.eq(1) - comb += pdecode2.dec.raw_opcode_in.eq(instruction) - sim = Simulator(m) - - sim.add_clock(1e-6) - def process(): - for test in self.test_data: - print(test.name) - program = test.program - self.subTest(test.name) - simulator = ISA(pdecode2, test.regs, test.sprs, 0) - gen = program.generate_instructions() - instructions = list(zip(gen, program.assembly.splitlines())) - - index = simulator.pc.CIA.value//4 - while index < len(instructions): - ins, code = instructions[index] - - print("0x{:X}".format(ins & 0xffffffff)) - print(code) - - # ask the decoder to decode this binary data (endian'd) - yield pdecode2.dec.bigendian.eq(0) # little / big? - yield instruction.eq(ins) # raw binary instr. - yield Settle() - fn_unit = yield pdecode2.e.fn_unit - self.assertEqual(fn_unit, Function.ALU.value) - yield from set_alu_inputs(alu, pdecode2, simulator) - yield from set_extra_alu_inputs(alu, pdecode2, simulator) - yield - opname = code.split(' ')[0] - yield from simulator.call(opname) - index = simulator.pc.CIA.value//4 - - vld = yield alu.n.valid_o - while not vld: - yield - vld = yield alu.n.valid_o - yield - alu_out = yield alu.n.data_o.o - out_reg_valid = yield pdecode2.e.write_reg.ok - if out_reg_valid: - write_reg_idx = yield pdecode2.e.write_reg.data - expected = simulator.gpr(write_reg_idx).value - print(f"expected {expected:x}, actual: {alu_out:x}") - self.assertEqual(expected, alu_out) - yield from self.check_extra_alu_outputs(alu, pdecode2, - simulator, code) - - sim.add_sync_process(process) - with sim.write_vcd("simulator.vcd", "simulator.gtkw", - traces=[]): - sim.run() - - def check_extra_alu_outputs(self, alu, dec2, sim, code): - rc = yield dec2.e.rc.data - if rc: - cr_expected = sim.crl[0].get_range().value - cr_actual = yield alu.n.data_o.cr0 - self.assertEqual(cr_expected, cr_actual, code) - - op = yield dec2.e.insn_type - if op == InternalOp.OP_CMP.value or \ - op == InternalOp.OP_CMPEQB.value: - bf = yield dec2.dec.BF - cr_actual = yield alu.n.data_o.cr0 - cr_expected = sim.crl[bf].get_range().value - self.assertEqual(cr_expected, cr_actual, code) - - - -if __name__ == "__main__": - unittest.main(exit=False) - suite = unittest.TestSuite() - suite.addTest(TestRunner(test_data)) - - runner = unittest.TextTestRunner() - runner.run(suite) diff --git a/src/soc/pipe/branch/__init__.py b/src/soc/pipe/branch/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/soc/pipe/branch/br_input_record.py b/src/soc/pipe/branch/br_input_record.py deleted file mode 100644 index d4f039cd..00000000 --- a/src/soc/pipe/branch/br_input_record.py +++ /dev/null @@ -1,81 +0,0 @@ -from nmigen.hdl.rec import Record, Layout - -from soc.decoder.power_enums import InternalOp, Function, CryIn - - -class CompBROpSubset(Record): - """CompBROpSubset - - TODO: remove anything not needed by the Branch pipeline (determine this - after all branch operations have been written. see - https://bugs.libre-soc.org/show_bug.cgi?id=313#c3) - - a copy of the relevant subset information from Decode2Execute1Type - needed for Branch operations. use with eq_from_execute1 (below) to - grab subsets. - """ - def __init__(self, name=None): - layout = (('insn_type', InternalOp), - ('fn_unit', Function), - ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))), - #'cr = Signal(32) # NO: this is from the CR SPR - #'xerc = XerBits() # NO: this is from the XER SPR - ('lk', 1), - ('rc', Layout((("rc", 1), ("rc_ok", 1)))), - ('oe', Layout((("oe", 1), ("oe_ok", 1)))), - ('invert_a', 1), - ('invert_out', 1), - ('input_carry', CryIn), - ('output_carry', 1), - ('input_cr', 1), - ('output_cr', 1), - ('is_32bit', 1), - ('is_signed', 1), - ('insn', 32), - ('byte_reverse', 1), - ('sign_extend', 1)) - - Record.__init__(self, Layout(layout), name=name) - - # grrr. Record does not have kwargs - self.insn_type.reset_less = True - self.fn_unit.reset_less = True - #self.cr = Signal(32, reset_less = True - #self.xerc = XerBits( - self.lk.reset_less = True - self.invert_a.reset_less = True - self.invert_out.reset_less = True - self.input_carry.reset_less = True - self.output_carry.reset_less = True - self.input_cr.reset_less = True - self.output_cr.reset_less = True - self.is_32bit.reset_less = True - self.is_signed.reset_less = True - self.byte_reverse.reset_less = True - self.sign_extend.reset_less = True - - def eq_from_execute1(self, other): - """ use this to copy in from Decode2Execute1Type - """ - res = [] - for fname, sig in self.fields.items(): - eqfrom = other.fields[fname] - res.append(sig.eq(eqfrom)) - return res - - def ports(self): - return [self.insn_type, - #self.cr, - #self.xerc, - self.lk, - self.invert_a, - self.invert_out, - self.input_carry, - self.output_carry, - self.input_cr, - self.output_cr, - self.is_32bit, - self.is_signed, - self.byte_reverse, - self.sign_extend, - ] diff --git a/src/soc/pipe/branch/formal/proof_input_stage.py b/src/soc/pipe/branch/formal/proof_input_stage.py deleted file mode 100644 index fb097c87..00000000 --- a/src/soc/pipe/branch/formal/proof_input_stage.py +++ /dev/null @@ -1,80 +0,0 @@ -# Proof of correctness for partitioned equal signal combiner -# Copyright (C) 2020 Michael Nolan - -from nmigen import Module, Signal, Elaboratable, Mux -from nmigen.asserts import Assert, AnyConst, Assume, Cover -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil - -from soc.alu.input_stage import ALUInputStage -from soc.alu.pipe_data import ALUPipeSpec -from soc.branch.br_input_record import CompBROpSubset -from soc.decoder.power_enums import InternalOp -import unittest - - -# This defines a module to drive the device under test and assert -# properties about its outputs -class Driver(Elaboratable): - def __init__(self): - # inputs and outputs - pass - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - rec = CompBROpSubset() - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - comb += p.eq(AnyConst(width)) - - pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) - m.submodules.dut = dut = ALUInputStage(pspec) - - a = Signal(64) - b = Signal(64) - comb += [dut.i.a.eq(a), - dut.i.b.eq(b), - a.eq(AnyConst(64)), - b.eq(AnyConst(64))] - - comb += dut.i.ctx.op.eq(rec) - - # Assert that op gets copied from the input to output - for p in rec.ports(): - name = p.name - rec_sig = p - dut_sig = getattr(dut.o.ctx.op, name) - comb += Assert(dut_sig == rec_sig) - - with m.If(rec.invert_a): - comb += Assert(dut.o.a == ~a) - with m.Else(): - comb += Assert(dut.o.a == a) - - with m.If(rec.imm_data.imm_ok & - ~(rec.insn_type == InternalOp.OP_RLC)): - comb += Assert(dut.o.b == rec.imm_data.imm) - with m.Else(): - comb += Assert(dut.o.b == b) - - return m - -class GTCombinerTestCase(FHDLTestCase): - def test_formal(self): - module = Driver() - self.assertFormal(module, mode="bmc", depth=4) - self.assertFormal(module, mode="cover", depth=4) - def test_ilang(self): - dut = Driver() - vl = rtlil.convert(dut, ports=[]) - with open("input_stage.il", "w") as f: - f.write(vl) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/branch/formal/proof_main_stage.py b/src/soc/pipe/branch/formal/proof_main_stage.py deleted file mode 100644 index 5ca9481d..00000000 --- a/src/soc/pipe/branch/formal/proof_main_stage.py +++ /dev/null @@ -1,92 +0,0 @@ -# Proof of correctness for partitioned equal signal combiner -# Copyright (C) 2020 Michael Nolan - -from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, - signed) -from nmigen.asserts import Assert, AnyConst, Assume, Cover -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil - -from soc.logical.main_stage import LogicalMainStage -from soc.alu.pipe_data import ALUPipeSpec -from soc.alu.alu_input_record import CompALUOpSubset -from soc.decoder.power_enums import InternalOp -import unittest - - -# This defines a module to drive the device under test and assert -# properties about its outputs -class Driver(Elaboratable): - def __init__(self): - # inputs and outputs - pass - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - rec = CompALUOpSubset() - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - comb += p.eq(AnyConst(width)) - - pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) - m.submodules.dut = dut = LogicalMainStage(pspec) - - # convenience variables - a = dut.i.a - b = dut.i.b - carry_in = dut.i.carry_in - so_in = dut.i.so - carry_out = dut.o.carry_out - o = dut.o.o - - # setup random inputs - comb += [a.eq(AnyConst(64)), - b.eq(AnyConst(64)), - carry_in.eq(AnyConst(1)), - so_in.eq(AnyConst(1))] - - comb += dut.i.ctx.op.eq(rec) - - # Assert that op gets copied from the input to output - for rec_sig in rec.ports(): - name = rec_sig.name - dut_sig = getattr(dut.o.ctx.op, name) - comb += Assert(dut_sig == rec_sig) - - # signed and signed/32 versions of input a - a_signed = Signal(signed(64)) - a_signed_32 = Signal(signed(32)) - comb += a_signed.eq(a) - comb += a_signed_32.eq(a[0:32]) - - # main assertion of arithmetic operations - with m.Switch(rec.insn_type): - with m.Case(InternalOp.OP_AND): - comb += Assert(dut.o.o == a & b) - with m.Case(InternalOp.OP_OR): - comb += Assert(dut.o.o == a | b) - with m.Case(InternalOp.OP_XOR): - comb += Assert(dut.o.o == a ^ b) - - return m - - -class LogicalTestCase(FHDLTestCase): - def test_formal(self): - module = Driver() - self.assertFormal(module, mode="bmc", depth=2) - self.assertFormal(module, mode="cover", depth=2) - def test_ilang(self): - dut = Driver() - vl = rtlil.convert(dut, ports=[]) - with open("main_stage.il", "w") as f: - f.write(vl) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/branch/input_stage.py b/src/soc/pipe/branch/input_stage.py deleted file mode 100644 index e6ab48ea..00000000 --- a/src/soc/pipe/branch/input_stage.py +++ /dev/null @@ -1,63 +0,0 @@ -# This stage is intended to adjust the input data before sending it to -# the acutal ALU. Things like handling inverting the input, carry_in -# generation for subtraction, and handling of immediates should happen -# here -from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed, - unsigned) -from nmutil.pipemodbase import PipeModBase -from soc.decoder.power_enums import InternalOp -from soc.alu.pipe_data import ALUInputData -from soc.decoder.power_enums import CryIn - - -class ALUInputStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "input") - - def ispec(self): - return ALUInputData(self.pspec) - - def ospec(self): - return ALUInputData(self.pspec) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - ##### operand A ##### - - # operand a to be as-is or inverted - a = Signal.like(self.i.a) - - with m.If(self.i.ctx.op.invert_a): - comb += a.eq(~self.i.a) - with m.Else(): - comb += a.eq(self.i.a) - - comb += self.o.a.eq(a) - - ##### operand B ##### - - # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 - # remove this, just do self.o.b.eq(self.i.b) and move the - # immediate-detection into set_alu_inputs in the unit test - # If there's an immediate, set the B operand to that - comb += self.o.b.eq(self.i.b) - - ##### carry-in ##### - - # either copy incoming carry or set to 1/0 as defined by op - with m.Switch(self.i.ctx.op.input_carry): - with m.Case(CryIn.ZERO): - comb += self.o.carry_in.eq(0) - with m.Case(CryIn.ONE): - comb += self.o.carry_in.eq(1) - with m.Case(CryIn.CA): - comb += self.o.carry_in.eq(self.i.carry_in) - - ##### sticky overflow and context (both pass-through) ##### - - comb += self.o.so.eq(self.i.so) - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/pipe/branch/main_stage.py b/src/soc/pipe/branch/main_stage.py deleted file mode 100644 index 6f6d488a..00000000 --- a/src/soc/pipe/branch/main_stage.py +++ /dev/null @@ -1,139 +0,0 @@ -# This stage is intended to do most of the work of executing Logical -# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ -# however input and output stages also perform bit-negation on input(s) -# and output, as well as carry and overflow generation. -# This module however should not gate the carry or overflow, that's up -# to the output stage - -from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) -from nmutil.pipemodbase import PipeModBase -from soc.branch.pipe_data import BranchInputData, BranchOutputData -from soc.decoder.power_enums import InternalOp - -from soc.decoder.power_fields import DecodeFields -from soc.decoder.power_fieldsn import SignalBitRange - -def br_ext(bd): - return Cat(Const(0, 2), bd, Repl(bd[-1], 64-(bd.shape().width + 2))) - -""" -Notes on BO Field: - -BO Description -0000z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=0 -0001z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=0 -001at Branch if CR[BI]=0 -0100z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=1 -0101z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=1 -011at Branch if CR[BI]=1 -1a00t Decrement the CTR, then branch if decremented CTR[M:63]!=0 -1a01t Decrement the CTR, then branch if decremented CTR[M:63]=0 -1z1zz Branch always -""" - -class BranchMainStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "main") - self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) - self.fields.create_specs() - - def ispec(self): - return BranchInputData(self.pspec) - - def ospec(self): - return BranchOutputData(self.pspec) # TODO: ALUIntermediateData - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - op = self.i.ctx.op - lk = op.lk # see PowerDecode2 as to why this is done - nia_o, lr_o = self.o.nia, self.o.lr - - # obtain relevant instruction fields - i_fields = self.fields.FormI - aa = Signal(i_fields.AA[0:-1].shape()) - comb += aa.eq(i_fields.AA[0:-1]) - - br_imm_addr = Signal(64, reset_less=True) - br_addr = Signal(64, reset_less=True) - br_taken = Signal(reset_less=True) - - # Handle absolute or relative branches - with m.If(aa): - comb += br_addr.eq(br_imm_addr) - with m.Else(): - comb += br_addr.eq(br_imm_addr + self.i.cia) - - # fields for conditional branches (BO and BI are same for BC and BCREG) - # NOTE: here, BO and BI we would like be treated as CR regfile - # selectors (similar to RA, RB, RS, RT). see comment here: - # https://bugs.libre-soc.org/show_bug.cgi?id=313#c2 - b_fields = self.fields.FormB - BO = b_fields.BO[0:-1] - BI = b_fields.BI[0:-1] - - # The bit of CR selected by BI - cr_bit = Signal(reset_less=True) - comb += cr_bit.eq((self.i.cr & (1<<(31-BI))) != 0) - - # Whether the conditional branch should be taken - bc_taken = Signal(reset_less=True) - with m.If(BO[2]): - comb += bc_taken.eq((cr_bit == BO[3]) | BO[4]) - with m.Else(): - # decrement the counter and place into output - ctr = Signal(64, reset_less=True) - comb += ctr.eq(self.i.ctr - 1) - comb += self.o.ctr.data.eq(ctr) - comb += self.o.ctr.ok.eq(1) - # take either all 64 bits or only 32 of post-incremented counter - ctr_m = Signal(64, reset_less=True) - with m.If((op.is_32bit): - comb += ctr_m.eq(ctr[:32]) - with m.Else(): - comb += ctr_m.eq(ctr) - # check CTR zero/non-zero against BO[1] - ctr_zero_bo1 = Signal(reset_less=True) # BO[1] == (ctr==0) - comb += ctr_zero_bo1.eq(BO[1] ^ ctr_m.any()) - with m.If(BO[3:5] == 0b00): - comb += bc_taken.eq(ctr_zero_bo1 & ~cr_bit) - with m.Elif(BO[3:5] == 0b01): - comb += bc_taken.eq(ctr_zero_bo1 & cr_bit) - with m.Elif(BO[4] == 1): - comb += bc_taken.eq(ctr_zero_bo1) - - ### Main Switch Statement ### - with m.Switch(op.insn_type): - #### branch #### - with m.Case(InternalOp.OP_B): - LI = i_fields.LI[0:-1] - comb += br_imm_addr.eq(br_ext(LI)) - comb += br_taken.eq(1) - #### branch conditional #### - with m.Case(InternalOp.OP_BC): - BD = b_fields.BD[0:-1] - comb += br_imm_addr.eq(br_ext(BD)) - comb += br_taken.eq(bc_taken) - #### branch conditional reg #### - with m.Case(InternalOp.OP_BCREG): - comb += br_imm_addr.eq(self.i.spr1) # SPR1 is set by decode unit - comb += br_taken.eq(bc_taken) - - ###### output next instruction address ##### - - comb += nia_o.data.eq(br_addr) - comb += nia_o.ok.eq(br_taken) - - ###### link register - only activate on operations marked as "lk" ##### - - with m.If(lk): - # ctx.op.lk is the AND of the insn LK field *and* whether the - # op is to "listen" to the link field - comb += lr_o.data.eq(self.i.cia + 4) - comb += lr_o.ok.eq(1) - - ###### and context ##### - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/pipe/branch/pipe_data.py b/src/soc/pipe/branch/pipe_data.py deleted file mode 100644 index 0ef4f000..00000000 --- a/src/soc/pipe/branch/pipe_data.py +++ /dev/null @@ -1,90 +0,0 @@ -""" - Optional Register allocation listed below. mandatory input - (CompBROpSubset, CIA) not included. - - * CR is Condition Register (not an SPR) - * SPR1, SPR2 and SPR3 are all from the SPR regfile. 3 ports are needed - - insn CR SPR1 SPR2 SPR3 - ---- -- ---- ---- ---- - op_b xx xx xx xx - op_ba xx xx xx xx - op_bl xx xx xx xx - op_bla xx xx xx xx - op_bc CR, xx, CTR xx - op_bca CR, xx, CTR xx - op_bcl CR, xx, CTR xx - op_bcla CR, xx, CTR xx - op_bclr CR, LR, CTR xx - op_bclrl CR, LR, CTR xx - op_bcctr CR, xx, CTR xx - op_bcctrl CR, xx, CTR xx - op_bctar CR, TAR, CTR, xx - op_bctarl CR, TAR, CTR, xx - - op_sc xx xx xx MSR - op_scv xx LR, SRR1, MSR - op_rfscv xx LR, CTR, MSR - op_rfid xx SRR0, SRR1, MSR - op_hrfid xx HSRR0, HSRR1, MSR -""" - -from nmigen import Signal, Const -from ieee754.fpcommon.getop import FPPipeContext -from soc.decoder.power_decoder2 import Data -from soc.alu.pipe_data import IntegerData - - -class BranchInputData(IntegerData): - def __init__(self, pspec): - super().__init__(pspec) - # Note: for OP_BCREG, SPR1 will either be CTR, LR, or TAR - # this involves the *decode* unit selecting the register, based - # on detecting the operand being bcctr, bclr or bctar - - self.spr1 = Signal(64, reset_less=True) # see table above, SPR1 - self.spr2 = Signal(64, reset_less=True) # see table above, SPR2 - self.spr3 = Signal(64, reset_less=True) # see table above, SPR3 - self.cr = Signal(32, reset_less=True) # Condition Register(s) CR0-7 - self.cia = Signal(64, reset_less=True) # Current Instruction Address - - # convenience variables. not all of these are used at once - self.ctr = self.srr0 = self.hsrr0 = self.spr2 - self.lr = self.tar = self.srr1 = self.hsrr1 = self.spr1 - self.msr = self.spr3 - - def __iter__(self): - yield from super().__iter__() - yield self.spr1 - yield self.spr2 - yield self.spr3 - yield self.cr - yield self.cia - - def eq(self, i): - lst = super().eq(i) - return lst + [self.spr1.eq(i.spr1), self.spr2.eq(i.spr2), - self.spr3.eq(i.spr3), - self.cr.eq(i.cr), self.cia.eq(i.cia)] - - -class BranchOutputData(IntegerData): - def __init__(self, pspec): - super().__init__(pspec) - self.lr = Data(64, name="lr") - self.spr = Data(64, name="spr") - self.nia = Data(64, name="nia") - - # convenience variables. - self.ctr = self.spr - - def __iter__(self): - yield from super().__iter__() - yield from self.lr - yield from self.spr - yield from self.nia - - def eq(self, i): - lst = super().eq(i) - return lst + [self.lr.eq(i.lr), self.spr.eq(i.spr), - self.nia.eq(i.nia)] diff --git a/src/soc/pipe/branch/pipeline.py b/src/soc/pipe/branch/pipeline.py deleted file mode 100644 index ac132f74..00000000 --- a/src/soc/pipe/branch/pipeline.py +++ /dev/null @@ -1,21 +0,0 @@ -from nmutil.singlepipe import ControlBase -from nmutil.pipemodbase import PipeModBaseChain -from soc.branch.main_stage import BranchMainStage - -class BranchStages(PipeModBaseChain): - def get_chain(self): - main = BranchMainStage(self.pspec) - return [main] - - -class BranchBasePipe(ControlBase): - def __init__(self, pspec): - ControlBase.__init__(self) - self.pipe1 = BranchStages(pspec) - self._eqs = self.connect([self.pipe1]) - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - m.submodules.pipe = self.pipe1 - m.d.comb += self._eqs - return m diff --git a/src/soc/pipe/branch/test/test_pipe_caller.py b/src/soc/pipe/branch/test/test_pipe_caller.py deleted file mode 100644 index 10d2bba2..00000000 --- a/src/soc/pipe/branch/test/test_pipe_caller.py +++ /dev/null @@ -1,210 +0,0 @@ -from nmigen import Module, Signal -from nmigen.back.pysim import Simulator, Delay, Settle -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil -import unittest -from soc.decoder.isa.caller import ISACaller, special_sprs -from soc.decoder.power_decoder import (create_pdecode) -from soc.decoder.power_decoder2 import (PowerDecode2) -from soc.decoder.power_enums import (XER_bits, Function) -from soc.decoder.selectable_int import SelectableInt -from soc.simulator.program import Program -from soc.decoder.isa.all import ISA - - -from soc.branch.pipeline import BranchBasePipe -from soc.branch.br_input_record import CompBROpSubset -from soc.alu.pipe_data import ALUPipeSpec -import random - - -class TestCase: - def __init__(self, program, regs, sprs, cr, name): - self.program = program - self.regs = regs - self.sprs = sprs - self.name = name - self.cr = cr - -def get_rec_width(rec): - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - return recwidth - - -# This test bench is a bit different than is usual. Initially when I -# was writing it, I had all of the tests call a function to create a -# device under test and simulator, initialize the dut, run the -# simulation for ~2 cycles, and assert that the dut output what it -# should have. However, this was really slow, since it needed to -# create and tear down the dut and simulator for every test case. - -# Now, instead of doing that, every test case in ALUTestCase puts some -# data into the test_data list below, describing the instructions to -# be tested and the initial state. Once all the tests have been run, -# test_data gets passed to TestRunner which then sets up the DUT and -# simulator once, runs all the data through it, and asserts that the -# results match the pseudocode sim at every cycle. - -# By doing this, I've reduced the time it takes to run the test suite -# massively. Before, it took around 1 minute on my computer, now it -# takes around 3 seconds - -test_data = [] - - -class BranchTestCase(FHDLTestCase): - def __init__(self, name): - super().__init__(name) - self.test_name = name - def run_tst_program(self, prog, initial_regs=[0] * 32, - initial_sprs={}, initial_cr=0): - tc = TestCase(prog, initial_regs, initial_sprs, initial_cr, - self.test_name) - test_data.append(tc) - - def test_unconditional(self): - choices = ["b", "ba", "bl", "bla"] - for i in range(20): - choice = random.choice(choices) - imm = random.randrange(-1<<23, (1<<23)-1) * 4 - lst = [f"{choice} {imm}"] - initial_regs = [0] * 32 - self.run_tst_program(Program(lst), initial_regs) - - def test_bc_cr(self): - for i in range(20): - bc = random.randrange(-1<<13, (1<<13)-1) * 4 - bo = random.choice([0b01100, 0b00100, 0b10100]) - bi = random.randrange(0, 31) - cr = random.randrange(0, (1<<32)-1) - lst = [f"bc {bo}, {bi}, {bc}"] - initial_regs = [0] * 32 - self.run_tst_program(Program(lst), initial_cr=cr) - - def test_bc_ctr(self): - for i in range(20): - bc = random.randrange(-1<<13, (1<<13)-1) * 4 - bo = random.choice([0, 2, 8, 10, 16, 18]) - bi = random.randrange(0, 31) - cr = random.randrange(0, (1<<32)-1) - ctr = random.randint(0, (1<<32)-1) - lst = [f"bc {bo}, {bi}, {bc}"] - initial_sprs={9: SelectableInt(ctr, 64)} - self.run_tst_program(Program(lst), - initial_sprs=initial_sprs, - initial_cr=cr) - - def test_ilang(self): - rec = CompBROpSubset() - - pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - alu = BranchBasePipe(pspec) - vl = rtlil.convert(alu, ports=alu.ports()) - with open("logical_pipeline.il", "w") as f: - f.write(vl) - - -class TestRunner(FHDLTestCase): - def __init__(self, test_data): - super().__init__("run_all") - self.test_data = test_data - - def run_all(self): - m = Module() - comb = m.d.comb - instruction = Signal(32) - - pdecode = create_pdecode() - - m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) - - rec = CompBROpSubset() - - pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - m.submodules.branch = branch = BranchBasePipe(pspec) - - comb += branch.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) - comb += branch.p.valid_i.eq(1) - comb += branch.n.ready_i.eq(1) - comb += pdecode2.dec.raw_opcode_in.eq(instruction) - sim = Simulator(m) - - sim.add_clock(1e-6) - def process(): - for test in self.test_data: - print(test.name) - program = test.program - self.subTest(test.name) - simulator = ISA(pdecode2, test.regs, test.sprs, test.cr) - initial_cia = 0x2000 - simulator.set_pc(initial_cia) - gen = program.generate_instructions() - instructions = list(zip(gen, program.assembly.splitlines())) - - index = (simulator.pc.CIA.value - initial_cia)//4 - while index < len(instructions) and index >= 0: - print(index) - ins, code = instructions[index] - - print("0x{:X}".format(ins & 0xffffffff)) - print(code) - - # ask the decoder to decode this binary data (endian'd) - yield pdecode2.dec.bigendian.eq(0) # little / big? - yield instruction.eq(ins) # raw binary instr. - yield branch.p.data_i.cia.eq(simulator.pc.CIA.value) - yield branch.p.data_i.cr.eq(simulator.cr.get_range().value) - # note, here, the op will need further decoding in order - # to set the correct SPRs on SPR1/2/3. op_bc* require - # spr2 to be set to CTR, op_bctar require spr1 to be - # set to TAR, op_bclr* require spr1 to be set to LR. - # if op_sc*, op_rf* and op_hrfid are to be added here - # then additional op-decoding is required, accordingly - yield branch.p.data_i.spr2.eq(simulator.spr['CTR'].value) - print(f"cr0: {simulator.crl[0].get_range()}") - yield Settle() - fn_unit = yield pdecode2.e.fn_unit - self.assertEqual(fn_unit, Function.BRANCH.value, code) - yield - yield - opname = code.split(' ')[0] - prev_nia = simulator.pc.NIA.value - yield from simulator.call(opname) - index = (simulator.pc.CIA.value - initial_cia)//4 - - yield from self.assert_outputs(branch, pdecode2, - simulator, prev_nia, code) - - - sim.add_sync_process(process) - with sim.write_vcd("simulator.vcd", "simulator.gtkw", - traces=[]): - sim.run() - - def assert_outputs(self, branch, dec2, sim, prev_nia, code): - branch_taken = yield branch.n.data_o.nia.ok - sim_branch_taken = prev_nia != sim.pc.CIA - self.assertEqual(branch_taken, sim_branch_taken, code) - if branch_taken: - branch_addr = yield branch.n.data_o.nia.data - self.assertEqual(branch_addr, sim.pc.CIA.value, code) - - lk = yield dec2.e.lk - branch_lk = yield branch.n.data_o.lr.ok - self.assertEqual(lk, branch_lk, code) - if lk: - branch_lr = yield branch.n.data_o.lr.data - self.assertEqual(sim.spr['LR'], branch_lr, code) - - -if __name__ == "__main__": - unittest.main(exit=False) - suite = unittest.TestSuite() - suite.addTest(TestRunner(test_data)) - - runner = unittest.TextTestRunner() - runner.run(suite) diff --git a/src/soc/pipe/countzero/countzero.py b/src/soc/pipe/countzero/countzero.py deleted file mode 100644 index bd61f571..00000000 --- a/src/soc/pipe/countzero/countzero.py +++ /dev/null @@ -1,136 +0,0 @@ -# https://github.com/antonblanchard/microwatt/blob/master/countzero.vhdl -from nmigen import Memory, Module, Signal, Cat, Elaboratable -from nmigen.hdl.rec import Record, Layout -from nmigen.cli import main - - -def or4(a, b, c, d): - return Cat(a.any(), b.any(), c.any(), d.any()) - - -class IntermediateResult(Record): - def __init__(self, name=None): - layout = (('v16', 15), - ('sel_hi', 2), - ('is_32bit', 1), - ('count_right', 1)) - Record.__init__(self, Layout(layout), name=name) - - -class ZeroCounter(Elaboratable): - def __init__(self): - self.rs_i = Signal(64, reset_less=True) - self.count_right_i = Signal(1, reset_less=True) - self.is_32bit_i = Signal(1, reset_less=True) - self.result_o = Signal(64, reset_less=True) - - def ports(self): - return [self.rs_i, self.count_right_i, self.is_32bit_i, self.result_o] - - def elaborate(self, platform): - m = Module() - - # TODO: replace this with m.submodule.pe1 = PriorityEncoder(4) - # m.submodule.pe2 = PriorityEncoder(4) - # m.submodule.pe3 = PriorityEncoder(4) - # etc. - # and where right will assign input to v and !right will assign v[::-1] - # so as to reverse the order of the input bits. - - def encoder(v, right): - """ - Return the index of the leftmost or rightmost 1 in a set of 4 bits. - Assumes v is not "0000"; if it is, return (right ? "11" : "00"). - """ - ret = Signal(2, reset_less=True) - with m.If(right): - with m.If(v[0]): - m.d.comb += ret.eq(0) - with m.Elif(v[1]): - m.d.comb += ret.eq(1) - with m.Elif(v[2]): - m.d.comb += ret.eq(2) - with m.Else(): - m.d.comb += ret.eq(3) - with m.Else(): - with m.If(v[3]): - m.d.comb += ret.eq(3) - with m.Elif(v[2]): - m.d.comb += ret.eq(2) - with m.Elif(v[1]): - m.d.comb += ret.eq(1) - with m.Else(): - m.d.comb += ret.eq(0) - return ret - - r = IntermediateResult() - r_in = IntermediateResult() - - m.d.comb += r.eq(r_in) # make the module entirely combinatorial for now - - v = IntermediateResult() - y = Signal(4, reset_less=True) - z = Signal(4, reset_less=True) - sel = Signal(6, reset_less=True) - v4 = Signal(4, reset_less=True) - - # Test 4 groups of 16 bits each. - # The top 2 groups are considered to be zero in 32-bit mode. - m.d.comb += z.eq(or4(self.rs_i[0:16], self.rs_i[16:32], - self.rs_i[32:48], self.rs_i[48:64])) - with m.If(self.is_32bit_i): - m.d.comb += v.sel_hi[1].eq(0) - with m.If(self.count_right_i): - m.d.comb += v.sel_hi[0].eq(~z[0]) - with m.Else(): - m.d.comb += v.sel_hi[0].eq(z[1]) - with m.Else(): - m.d.comb += v.sel_hi.eq(encoder(z, self.count_right_i)) - - # Select the leftmost/rightmost non-zero group of 16 bits - with m.Switch(v.sel_hi): - with m.Case(0): - m.d.comb += v.v16.eq(self.rs_i[0:16]) - with m.Case(1): - m.d.comb += v.v16.eq(self.rs_i[16:32]) - with m.Case(2): - m.d.comb += v.v16.eq(self.rs_i[32:48]) - with m.Case(3): - m.d.comb += v.v16.eq(self.rs_i[48:64]) - - # Latch this and do the rest in the next cycle, for the sake of timing - m.d.comb += v.is_32bit.eq(self.is_32bit_i) - m.d.comb += v.count_right.eq(self.count_right_i) - m.d.comb += r_in.eq(v) - m.d.comb += sel[4:6].eq(r.sel_hi) - - # Test 4 groups of 4 bits - m.d.comb += y.eq(or4(r.v16[0:4], r.v16[4:8], - r.v16[8:12], r.v16[12:16])) - m.d.comb += sel[2:4].eq(encoder(y, r.count_right)) - - # Select the leftmost/rightmost non-zero group of 4 bits - with m.Switch(sel[2:4]): - with m.Case(0): - m.d.comb += v4.eq(r.v16[0:4]) - with m.Case(1): - m.d.comb += v4.eq(r.v16[4:8]) - with m.Case(2): - m.d.comb += v4.eq(r.v16[8:12]) - with m.Case(3): - m.d.comb += v4.eq(r.v16[12:16]) - - m.d.comb += sel[0:2].eq(encoder(v4, r.count_right)) - - # sel is now the index of the leftmost/rightmost 1 bit in rs - o = self.result_o - with m.If(v4 == 0): - # operand is zero, return 32 for 32-bit, else 64 - m.d.comb += o[5:7].eq(Cat(r.is_32bit, ~r.is_32bit)) - with m.Elif(r.count_right): - # return (63 - sel), trimmed to 5 bits in 32-bit mode - m.d.comb += o.eq(Cat(~sel[0:5], ~(sel[5] | r.is_32bit))) - with m.Else(): - m.d.comb += o.eq(sel) - - return m diff --git a/src/soc/pipe/countzero/test/test_countzero.py b/src/soc/pipe/countzero/test/test_countzero.py deleted file mode 100644 index 60185196..00000000 --- a/src/soc/pipe/countzero/test/test_countzero.py +++ /dev/null @@ -1,105 +0,0 @@ -# https://github.com/antonblanchard/microwatt/blob/master/countzero_tb.vhdl -from nmigen import Module, Signal -from nmigen.cli import rtlil -from nmigen.back.pysim import Simulator, Delay -from nmigen.test.utils import FHDLTestCase -import unittest -from soc.countzero.countzero import ZeroCounter - - -class ZeroCounterTestCase(FHDLTestCase): - def test_zerocounter(self): - m = Module() - comb = m.d.comb - m.submodules.dut = dut = ZeroCounter() - - sim = Simulator(m) - # sim.add_clock(1e-6) - - def process(): - print("test zero input") - yield dut.rs_i.eq(0) - yield dut.is_32bit_i.eq(0) - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 0x40 - # report "bad cntlzd 0 = " & to_hstring(result); - assert(result == 0x40) - yield dut.count_right_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - # report "bad cntlzd 0 = " & to_hstring(result); - assert(result == 0x40) - yield dut.is_32bit_i.eq(1) - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - # report "bad cntlzw 0 = " & to_hstring(result); - assert(result == 0x20) - yield dut.count_right_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - # report "bad cntlzw 0 = " & to_hstring(result); - assert(result == 0x20) - # TODO next tests - - yield dut.rs_i.eq(0b00010000) - yield dut.is_32bit_i.eq(0) - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 4, "result %d" % result - - yield dut.count_right_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 59, "result %d" % result - - yield dut.is_32bit_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 27, "result %d" % result - - yield dut.rs_i.eq(0b1100000100000000) - yield dut.is_32bit_i.eq(0) - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 14, "result %d" % result - - yield dut.count_right_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 55, "result %d" % result - - yield dut.is_32bit_i.eq(1) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 23, "result %d" % result - - yield dut.count_right_i.eq(0) - yield Delay(1e-6) - result = yield dut.result_o - assert result == 14, "result %d" % result - - - sim.add_process(process) # or sim.add_sync_process(process), see below - - # run test and write vcd - fn = "genullnau" - with sim.write_vcd(fn+".vcd", fn+".gtkw", traces=dut.ports()): - sim.run() - - # cntlzd_w - # cnttzd_w - - -if __name__ == "__main__": - - dut = ZeroCounter() - vl = rtlil.convert(dut, ports=dut.ports()) - with open("countzero.il", "w") as f: - f.write(vl) - - unittest.main() diff --git a/src/soc/pipe/cr/main_stage.py b/src/soc/pipe/cr/main_stage.py deleted file mode 100644 index 67bd78ed..00000000 --- a/src/soc/pipe/cr/main_stage.py +++ /dev/null @@ -1,124 +0,0 @@ -# This stage is intended to do Condition Register instructions -# and output, as well as carry and overflow generation. -# NOTE: with the exception of mtcrf and mfcr, we really should be doing -# the field decoding which -# selects which bits of CR are to be read / written, back in the -# decoder / insn-isue, have both self.i.cr and self.o.cr -# be broken down into 4-bit-wide "registers", with their -# own "Register File" (indexed by bt, ba and bb), -# exactly how INT regs are done (by RA, RB, RS and RT) -# however we are pushed for time so do it as *one* register. - -from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) -from nmutil.pipemodbase import PipeModBase -from soc.cr.pipe_data import CRInputData, CROutputData -from soc.decoder.power_enums import InternalOp - -from soc.decoder.power_fields import DecodeFields -from soc.decoder.power_fieldsn import SignalBitRange - - -class CRMainStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "main") - self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) - self.fields.create_specs() - - def ispec(self): - return CRInputData(self.pspec) - - def ospec(self): - return CROutputData(self.pspec) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - op = self.i.ctx.op - xl_fields = self.fields.FormXL - xfx_fields = self.fields.FormXFX - # default: cr_o remains same as cr input unless modified, below - cr_o = Signal.like(self.i.cr) - comb += cr_o.eq(self.i.cr) - - ##### prepare inputs / temp ##### - - # Generate array for cr input so bits can be selected - cr_arr = Array([Signal(name=f"cr_arr_{i}") for i in range(32)]) - for i in range(32): - comb += cr_arr[i].eq(self.i.cr[31-i]) - - # Generate array for cr output so the bit to write to can be - # selected by a signal - cr_out_arr = Array([Signal(name=f"cr_out_{i}") for i in range(32)]) - for i in range(32): - comb += cr_o[31-i].eq(cr_out_arr[i]) - comb += cr_out_arr[i].eq(cr_arr[i]) - - # Generate the mask for mtcrf, mtocrf, and mfocrf - # replicate every fxm field in the insn to 4-bit, as a mask - FXM = xfx_fields.FXM[0:-1] - mask = Signal(32, reset_less=True) - comb += mask.eq(Cat(*[Repl(FXM[i], 4) for i in range(8)])) - - ################################# - ##### main switch statement ##### - - with m.Switch(op.insn_type): - ##### mcrf ##### - with m.Case(InternalOp.OP_MCRF): - # MCRF copies the 4 bits of crA to crB (for instance - # copying cr2 to cr1) - BF = xl_fields.BF[0:-1] # destination CR - BFA = xl_fields.BFA[0:-1] # source CR - - for i in range(4): - comb += cr_out_arr[BF*4 + i].eq(cr_arr[BFA*4 + i]) - - ##### crand, cror, crnor etc. ##### - with m.Case(InternalOp.OP_CROP): - # crand/cror and friends get decoded to the same opcode, but - # one of the fields inside the instruction is a 4 bit lookup - # table. This lookup table gets indexed by bits a and b from - # the CR to determine what the resulting bit should be. - - # Grab the lookup table for cr_op type instructions - lut = Array([Signal(name=f"lut{i}") for i in range(4)]) - # There's no field, just have to grab it directly from the insn - for i in range(4): - comb += lut[i].eq(self.i.ctx.op.insn[6+i]) - - # Get the bit selector fields from the instruction - BT = xl_fields.BT[0:-1] - BA = xl_fields.BA[0:-1] - BB = xl_fields.BB[0:-1] - - # Use the two input bits to look up the result in the LUT - comb += cr_out_arr[BT].eq(lut[Cat(cr_arr[BB], cr_arr[BA])]) - - ##### mtcrf ##### - with m.Case(InternalOp.OP_MTCRF): - # mtocrf and mtcrf are essentially identical - # put input (RA) - mask-selected - into output CR, leave - # rest of CR alone. - comb += cr_o.eq((self.i.a[0:32] & mask) | (self.i.cr & ~mask)) - - ##### mfcr ##### - with m.Case(InternalOp.OP_MFCR): - # Ugh. mtocrf and mtcrf have one random bit differentiating - # them. This bit is not in any particular field, so this - # extracts that bit from the instruction - move_one = Signal(reset_less=True) - comb += move_one.eq(self.i.ctx.op.insn[20]) - - # mfocrf - with m.If(move_one): - comb += self.o.o.eq(self.i.cr & mask) - # mfcrf - with m.Else(): - comb += self.o.o.eq(self.i.cr) - - # output and context - comb += self.o.cr.eq(cr_o) - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/pipe/cr/pipe_data.py b/src/soc/pipe/cr/pipe_data.py deleted file mode 100644 index d56c8f3f..00000000 --- a/src/soc/pipe/cr/pipe_data.py +++ /dev/null @@ -1,36 +0,0 @@ -from nmigen import Signal, Const -from ieee754.fpcommon.getop import FPPipeContext -from soc.alu.pipe_data import IntegerData - - -class CRInputData(IntegerData): - def __init__(self, pspec): - super().__init__(pspec) - self.a = Signal(64, reset_less=True) # RA - self.cr = Signal(64, reset_less=True) # CR in - - def __iter__(self): - yield from super().__iter__() - yield self.a - yield self.cr - - def eq(self, i): - lst = super().eq(i) - return lst + [self.a.eq(i.a), - self.cr.eq(i.cr)] - -class CROutputData(IntegerData): - def __init__(self, pspec): - super().__init__(pspec) - self.o = Signal(64, reset_less=True) # RA - self.cr = Signal(64, reset_less=True) # CR in - - def __iter__(self): - yield from super().__iter__() - yield self.o - yield self.cr - - def eq(self, i): - lst = super().eq(i) - return lst + [self.o.eq(i.o), - self.cr.eq(i.cr)] diff --git a/src/soc/pipe/cr/pipeline.py b/src/soc/pipe/cr/pipeline.py deleted file mode 100644 index 121cdf8d..00000000 --- a/src/soc/pipe/cr/pipeline.py +++ /dev/null @@ -1,21 +0,0 @@ -from nmutil.singlepipe import ControlBase -from nmutil.pipemodbase import PipeModBaseChain -from soc.cr.main_stage import CRMainStage - -class CRStages(PipeModBaseChain): - def get_chain(self): - main = CRMainStage(self.pspec) - return [main] - - -class CRBasePipe(ControlBase): - def __init__(self, pspec): - ControlBase.__init__(self) - self.pipe1 = CRStages(pspec) - self._eqs = self.connect([self.pipe1]) - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - m.submodules.pipe = self.pipe1 - m.d.comb += self._eqs - return m diff --git a/src/soc/pipe/cr/test/test_pipe_caller.py b/src/soc/pipe/cr/test/test_pipe_caller.py deleted file mode 100644 index fa08fb66..00000000 --- a/src/soc/pipe/cr/test/test_pipe_caller.py +++ /dev/null @@ -1,232 +0,0 @@ -from nmigen import Module, Signal -from nmigen.back.pysim import Simulator, Delay, Settle -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil -import unittest -from soc.decoder.isa.caller import ISACaller, special_sprs -from soc.decoder.power_decoder import (create_pdecode) -from soc.decoder.power_decoder2 import (PowerDecode2) -from soc.decoder.power_enums import (XER_bits, Function) -from soc.decoder.selectable_int import SelectableInt -from soc.simulator.program import Program -from soc.decoder.isa.all import ISA - - -from soc.cr.pipeline import CRBasePipe -from soc.alu.alu_input_record import CompALUOpSubset -from soc.alu.pipe_data import ALUPipeSpec -import random - - -class TestCase: - def __init__(self, program, regs, sprs, cr, name): - self.program = program - self.regs = regs - self.sprs = sprs - self.name = name - self.cr = cr - -def get_rec_width(rec): - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - return recwidth - - -# This test bench is a bit different than is usual. Initially when I -# was writing it, I had all of the tests call a function to create a -# device under test and simulator, initialize the dut, run the -# simulation for ~2 cycles, and assert that the dut output what it -# should have. However, this was really slow, since it needed to -# create and tear down the dut and simulator for every test case. - -# Now, instead of doing that, every test case in ALUTestCase puts some -# data into the test_data list below, describing the instructions to -# be tested and the initial state. Once all the tests have been run, -# test_data gets passed to TestRunner which then sets up the DUT and -# simulator once, runs all the data through it, and asserts that the -# results match the pseudocode sim at every cycle. - -# By doing this, I've reduced the time it takes to run the test suite -# massively. Before, it took around 1 minute on my computer, now it -# takes around 3 seconds - -test_data = [] - - -class CRTestCase(FHDLTestCase): - def __init__(self, name): - super().__init__(name) - self.test_name = name - def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}, - initial_cr=0): - tc = TestCase(prog, initial_regs, initial_sprs, initial_cr, - self.test_name) - test_data.append(tc) - - def test_crop(self): - insns = ["crand", "cror", "crnand", "crnor", "crxor", "creqv", - "crandc", "crorc"] - for i in range(40): - choice = random.choice(insns) - ba = random.randint(0, 31) - bb = random.randint(0, 31) - bt = random.randint(0, 31) - lst = [f"{choice} {ba}, {bb}, {bt}"] - cr = random.randint(0, 7) - self.run_tst_program(Program(lst), initial_cr=cr) - - def test_mcrf(self): - lst = ["mcrf 0, 5"] - cr = 0xffff0000 - self.run_tst_program(Program(lst), initial_cr=cr) - - def test_mtcrf(self): - for i in range(20): - mask = random.randint(0, 255) - lst = [f"mtcrf {mask}, 2"] - cr = random.randint(0, (1<<32)-1) - initial_regs = [0] * 32 - initial_regs[2] = random.randint(0, (1<<32)-1) - self.run_tst_program(Program(lst), initial_regs=initial_regs, - initial_cr=cr) - def test_mtocrf(self): - for i in range(20): - mask = 1< - -from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, - signed) -from nmigen.asserts import Assert, AnyConst, Assume, Cover -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil - -from soc.logical.bperm import Bpermd - -import unittest - - -# So formal verification is a little different than writing a test -# case, as you're actually generating logic around your module to -# check that it behaves a certain way. So here, I'm going to create a -# module to put my formal assertions in -class Driver(Elaboratable): - def __init__(self): - # We don't need any inputs and outputs here, so I won't - # declare any - pass - - def elaborate(self, platform): - # standard stuff - m = Module() - comb = m.d.comb - - # instantiate the device under test as a submodule - m.submodules.bperm = bperm = Bpermd(64) - - # Grab the inputs and outputs of the DUT to make them more - # convenient to access - rs = bperm.rs - rb = bperm.rb - ra = bperm.ra - - # Before we prove any properties about the DUT, we need to set - # up its inputs. There's a couple ways to do this, you could - # define some inputs and outputs for the driver module and - # wire them up to the DUT, but that's kind of a pain. The - # other option is to use AnyConst/AnySeq, which tells yosys - # that those inputs can take on any value. - - # AnyConst should be used when the input should take on a - # random value, but that value should be constant throughout - # the test. - # AnySeq should be used when the input can change on every - # cycle - - # Since this is a combinatorial circuit, it really doesn't - # matter which one you choose, so I chose AnyConst. If this - # was a sequential circuit, (especially a state machine) you'd - # want to use AnySeq - comb += [rs.eq(AnyConst(64)), - rb.eq(AnyConst(64))] - - - # The pseudocode in the Power ISA manual (v3.1) is as follows: - # do i = 0 to 7 - # index <- RS[8*i:8*i+8] - # if index < 64: - # perm[i] <- RB[index] - # else: - # perm[i] <- 0 - # RA <- 56'b0 || perm[0:8] # big endian though - - # Looking at this, I can identify 3 properties that the bperm - # module should keep: - # 1. RA[8:64] should always equal 0 - # 2. If RB[i*8:i*8+8] >= 64 then RA[i] should equal 0 - # 3. If RB[i*8:i*8+8] < 64 then RA[i] should RS[index] - - # Now we need to Assert that the properties above hold: - - # Property 1: RA[8:64] should always equal 0 - comb += Assert(ra[8:] == 0) - # Notice how we're adding Assert to comb like it's a circuit? - # That's because it kind of is. If you run this proof and have - # yosys graph the ilang, you'll be able to see an equals - # comparison cell feeding into an assert cell - - # Now we need to prove property #2. I'm going to leave this to - # you Cole. I'd start by writing a for loop and extracting the - # 8 indices into signals. Then I'd write an if statement - # checking if the index is >= 64 (it's hardware, so use an - # m.If()). Finally, I'd add an assert that checks whether - # ra[i] is equal to 0 - - - - return m - - -class TestCase(FHDLTestCase): - # This bit here is actually in charge of running the formal - # proof. It has nmigen spit out the ilang, and feeds it to - # SymbiYosys to run the proof. If the proof fails, yosys will - # generate a .vcd file showing how it was able to violate your - # assertions in proof_bperm_formal/engine_0/trace.vcd. From that - # you should be able to figure out what went wrong, and either - # correct the assertion or fix the DUT - def test_formal(self): - module = Driver() - # This runs a Bounded Model Check on the driver module - # above. What that does is it starts at some initial state, - # and steps it through `depth` cycles, checking that the - # assertions hold at every cycle. Since this is a - # combinatorial module, it only needs 1 cycle to prove - # everything. - self.assertFormal(module, mode="bmc", depth=2) - self.assertFormal(module, mode="cover", depth=2) - - # As mentioned above, you can look at the graph in yosys and see - # all the assertion cells - def test_ilang(self): - dut = Driver() - vl = rtlil.convert(dut, ports=[]) - with open("bperm.il", "w") as f: - f.write(vl) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/logical/formal/proof_input_stage.py b/src/soc/pipe/logical/formal/proof_input_stage.py deleted file mode 100644 index bb62fb67..00000000 --- a/src/soc/pipe/logical/formal/proof_input_stage.py +++ /dev/null @@ -1,85 +0,0 @@ -# Proof of correctness for partitioned equal signal combiner -# Copyright (C) 2020 Michael Nolan - -from nmigen import Module, Signal, Elaboratable, Mux -from nmigen.asserts import Assert, AnyConst, Assume, Cover -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil - -from soc.alu.input_stage import ALUInputStage -from soc.alu.pipe_data import ALUPipeSpec -from soc.alu.alu_input_record import CompALUOpSubset -from soc.decoder.power_enums import InternalOp -import unittest - - -# This defines a module to drive the device under test and assert -# properties about its outputs -class Driver(Elaboratable): - def __init__(self): - # inputs and outputs - pass - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - rec = CompALUOpSubset() - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - comb += p.eq(AnyConst(width)) - - pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) - m.submodules.dut = dut = ALUInputStage(pspec) - - a = Signal(64) - b = Signal(64) - comb += [dut.i.a.eq(a), - dut.i.b.eq(b), - a.eq(AnyConst(64)), - b.eq(AnyConst(64))] - - - comb += dut.i.ctx.op.eq(rec) - - - # Assert that op gets copied from the input to output - for p in rec.ports(): - name = p.name - rec_sig = p - dut_sig = getattr(dut.o.ctx.op, name) - comb += Assert(dut_sig == rec_sig) - - with m.If(rec.invert_a): - comb += Assert(dut.o.a == ~a) - with m.Else(): - comb += Assert(dut.o.a == a) - - with m.If(rec.imm_data.imm_ok & - ~(rec.insn_type == InternalOp.OP_RLC)): - comb += Assert(dut.o.b == rec.imm_data.imm) - with m.Else(): - comb += Assert(dut.o.b == b) - - - - - return m - -class GTCombinerTestCase(FHDLTestCase): - def test_formal(self): - module = Driver() - self.assertFormal(module, mode="bmc", depth=4) - self.assertFormal(module, mode="cover", depth=4) - def test_ilang(self): - dut = Driver() - vl = rtlil.convert(dut, ports=[]) - with open("input_stage.il", "w") as f: - f.write(vl) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/logical/formal/proof_main_stage.py b/src/soc/pipe/logical/formal/proof_main_stage.py deleted file mode 100644 index 5ca9481d..00000000 --- a/src/soc/pipe/logical/formal/proof_main_stage.py +++ /dev/null @@ -1,92 +0,0 @@ -# Proof of correctness for partitioned equal signal combiner -# Copyright (C) 2020 Michael Nolan - -from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, - signed) -from nmigen.asserts import Assert, AnyConst, Assume, Cover -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil - -from soc.logical.main_stage import LogicalMainStage -from soc.alu.pipe_data import ALUPipeSpec -from soc.alu.alu_input_record import CompALUOpSubset -from soc.decoder.power_enums import InternalOp -import unittest - - -# This defines a module to drive the device under test and assert -# properties about its outputs -class Driver(Elaboratable): - def __init__(self): - # inputs and outputs - pass - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - rec = CompALUOpSubset() - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - comb += p.eq(AnyConst(width)) - - pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) - m.submodules.dut = dut = LogicalMainStage(pspec) - - # convenience variables - a = dut.i.a - b = dut.i.b - carry_in = dut.i.carry_in - so_in = dut.i.so - carry_out = dut.o.carry_out - o = dut.o.o - - # setup random inputs - comb += [a.eq(AnyConst(64)), - b.eq(AnyConst(64)), - carry_in.eq(AnyConst(1)), - so_in.eq(AnyConst(1))] - - comb += dut.i.ctx.op.eq(rec) - - # Assert that op gets copied from the input to output - for rec_sig in rec.ports(): - name = rec_sig.name - dut_sig = getattr(dut.o.ctx.op, name) - comb += Assert(dut_sig == rec_sig) - - # signed and signed/32 versions of input a - a_signed = Signal(signed(64)) - a_signed_32 = Signal(signed(32)) - comb += a_signed.eq(a) - comb += a_signed_32.eq(a[0:32]) - - # main assertion of arithmetic operations - with m.Switch(rec.insn_type): - with m.Case(InternalOp.OP_AND): - comb += Assert(dut.o.o == a & b) - with m.Case(InternalOp.OP_OR): - comb += Assert(dut.o.o == a | b) - with m.Case(InternalOp.OP_XOR): - comb += Assert(dut.o.o == a ^ b) - - return m - - -class LogicalTestCase(FHDLTestCase): - def test_formal(self): - module = Driver() - self.assertFormal(module, mode="bmc", depth=2) - self.assertFormal(module, mode="cover", depth=2) - def test_ilang(self): - dut = Driver() - vl = rtlil.convert(dut, ports=[]) - with open("main_stage.il", "w") as f: - f.write(vl) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/logical/input_stage.py b/src/soc/pipe/logical/input_stage.py deleted file mode 100644 index e6ab48ea..00000000 --- a/src/soc/pipe/logical/input_stage.py +++ /dev/null @@ -1,63 +0,0 @@ -# This stage is intended to adjust the input data before sending it to -# the acutal ALU. Things like handling inverting the input, carry_in -# generation for subtraction, and handling of immediates should happen -# here -from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed, - unsigned) -from nmutil.pipemodbase import PipeModBase -from soc.decoder.power_enums import InternalOp -from soc.alu.pipe_data import ALUInputData -from soc.decoder.power_enums import CryIn - - -class ALUInputStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "input") - - def ispec(self): - return ALUInputData(self.pspec) - - def ospec(self): - return ALUInputData(self.pspec) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - ##### operand A ##### - - # operand a to be as-is or inverted - a = Signal.like(self.i.a) - - with m.If(self.i.ctx.op.invert_a): - comb += a.eq(~self.i.a) - with m.Else(): - comb += a.eq(self.i.a) - - comb += self.o.a.eq(a) - - ##### operand B ##### - - # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 - # remove this, just do self.o.b.eq(self.i.b) and move the - # immediate-detection into set_alu_inputs in the unit test - # If there's an immediate, set the B operand to that - comb += self.o.b.eq(self.i.b) - - ##### carry-in ##### - - # either copy incoming carry or set to 1/0 as defined by op - with m.Switch(self.i.ctx.op.input_carry): - with m.Case(CryIn.ZERO): - comb += self.o.carry_in.eq(0) - with m.Case(CryIn.ONE): - comb += self.o.carry_in.eq(1) - with m.Case(CryIn.CA): - comb += self.o.carry_in.eq(self.i.carry_in) - - ##### sticky overflow and context (both pass-through) ##### - - comb += self.o.so.eq(self.i.so) - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/pipe/logical/main_stage.py b/src/soc/pipe/logical/main_stage.py deleted file mode 100644 index e740d07a..00000000 --- a/src/soc/pipe/logical/main_stage.py +++ /dev/null @@ -1,127 +0,0 @@ -# This stage is intended to do most of the work of executing Logical -# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ -# however input and output stages also perform bit-negation on input(s) -# and output, as well as carry and overflow generation. -# This module however should not gate the carry or overflow, that's up -# to the output stage - -from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) -from nmutil.pipemodbase import PipeModBase -from soc.logical.pipe_data import ALUInputData -from soc.alu.pipe_data import ALUOutputData -from ieee754.part.partsig import PartitionedSignal -from soc.decoder.power_enums import InternalOp -from soc.countzero.countzero import ZeroCounter - -from soc.decoder.power_fields import DecodeFields -from soc.decoder.power_fieldsn import SignalBitRange - - -def array_of(count, bitwidth): - res = [] - for i in range(count): - res.append(Signal(bitwidth, reset_less=True)) - return res - - -class LogicalMainStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "main") - self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) - self.fields.create_specs() - - def ispec(self): - return ALUInputData(self.pspec) - - def ospec(self): - return ALUOutputData(self.pspec) # TODO: ALUIntermediateData - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o - - ########################## - # main switch for logic ops AND, OR and XOR, cmpb, parity, and popcount - - with m.Switch(op.insn_type): - - ###### AND, OR, XOR ####### - with m.Case(InternalOp.OP_AND): - comb += o.eq(a & b) - with m.Case(InternalOp.OP_OR): - comb += o.eq(a | b) - with m.Case(InternalOp.OP_XOR): - comb += o.eq(a ^ b) - - ###### cmpb ####### - with m.Case(InternalOp.OP_CMPB): - l = [] - for i in range(8): - slc = slice(i*8, (i+1)*8) - l.append(Repl(a[slc] == b[slc], 8)) - comb += o.eq(Cat(*l)) - - ###### popcount ####### - with m.Case(InternalOp.OP_POPCNT): - # starting from a, perform successive addition-reductions - # creating arrays big enough to store the sum, each time - pc = [a] - # QTY32 2-bit (to take 2x 1-bit sums) etc. - work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)] - for l, b in work: - pc.append(array_of(l, b)) - pc8 = pc[3] # array of 8 8-bit counts (popcntb) - pc32 = pc[5] # array of 2 32-bit counts (popcntw) - popcnt = pc[-1] # array of 1 64-bit count (popcntd) - # cascade-tree of adds - for idx, (l, b) in enumerate(work): - for i in range(l): - stt, end = i*2, i*2+1 - src, dst = pc[idx], pc[idx+1] - comb += dst[i].eq(Cat(src[stt], Const(0, 1)) + - Cat(src[end], Const(0, 1))) - # decode operation length - with m.If(op.data_len[2:4] == 0b00): - # popcntb - pack 8x 4-bit answers into output - for i in range(8): - comb += o[i*8:i*8+4].eq(pc8[i]) - with m.Elif(op.data_len[3] == 0): - # popcntw - pack 2x 5-bit answers into output - for i in range(2): - comb += o[i*32:i*32+5].eq(pc32[i]) - with m.Else(): - # popcntd - put 1x 6-bit answer into output - comb += o.eq(popcnt[0]) - - ###### parity ####### - with m.Case(InternalOp.OP_PRTY): - # strange instruction which XORs together the LSBs of each byte - par0 = Signal(reset_less=True) - par1 = Signal(reset_less=True) - comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor()) - comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor()) - with m.If(op.data_len[3] == 1): - comb += o.eq(par0 ^ par1) - with m.Else(): - comb += o[0].eq(par0) - comb += o[32].eq(par1) - - ###### cntlz ####### - with m.Case(InternalOp.OP_CNTZ): - XO = self.fields.FormX.XO[0:-1] - m.submodules.countz = countz = ZeroCounter() - comb += countz.rs_i.eq(a) - comb += countz.is_32bit_i.eq(op.is_32bit) - comb += countz.count_right_i.eq(XO[-1]) - comb += o.eq(countz.result_o) - - ###### bpermd ####### - # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt - - ###### sticky overflow and context, both pass-through ##### - - comb += self.o.so.eq(self.i.so) - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/pipe/logical/pipe_data.py b/src/soc/pipe/logical/pipe_data.py deleted file mode 100644 index 4bf064fe..00000000 --- a/src/soc/pipe/logical/pipe_data.py +++ /dev/null @@ -1,25 +0,0 @@ -from nmigen import Signal, Const -from ieee754.fpcommon.getop import FPPipeContext -from soc.alu.pipe_data import IntegerData - - -class ALUInputData(IntegerData): - def __init__(self, pspec): - super().__init__(pspec) - self.a = Signal(64, reset_less=True) # RA - self.b = Signal(64, reset_less=True) # RB/immediate - self.so = Signal(reset_less=True) - self.carry_in = Signal(reset_less=True) - - def __iter__(self): - yield from super().__iter__() - yield self.a - yield self.b - yield self.carry_in - yield self.so - - def eq(self, i): - lst = super().eq(i) - return lst + [self.a.eq(i.a), self.b.eq(i.b), - self.carry_in.eq(i.carry_in), - self.so.eq(i.so)] diff --git a/src/soc/pipe/logical/pipeline.py b/src/soc/pipe/logical/pipeline.py deleted file mode 100644 index f3c83276..00000000 --- a/src/soc/pipe/logical/pipeline.py +++ /dev/null @@ -1,25 +0,0 @@ -from nmutil.singlepipe import ControlBase -from nmutil.pipemodbase import PipeModBaseChain -from soc.alu.input_stage import ALUInputStage -from soc.logical.main_stage import LogicalMainStage -from soc.alu.output_stage import ALUOutputStage - -class LogicalStages(PipeModBaseChain): - def get_chain(self): - inp = ALUInputStage(self.pspec) - main = LogicalMainStage(self.pspec) - out = ALUOutputStage(self.pspec) - return [inp, main, out] - - -class LogicalBasePipe(ControlBase): - def __init__(self, pspec): - ControlBase.__init__(self) - self.pipe1 = LogicalStages(pspec) - self._eqs = self.connect([self.pipe1]) - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - m.submodules.pipe = self.pipe1 - m.d.comb += self._eqs - return m diff --git a/src/soc/pipe/logical/test/test_bperm.py b/src/soc/pipe/logical/test/test_bperm.py deleted file mode 100644 index 7a742b0b..00000000 --- a/src/soc/pipe/logical/test/test_bperm.py +++ /dev/null @@ -1 +0,0 @@ -'''Empty until I write the unit test''' diff --git a/src/soc/pipe/logical/test/test_pipe_caller.py b/src/soc/pipe/logical/test/test_pipe_caller.py deleted file mode 100644 index 79c1e291..00000000 --- a/src/soc/pipe/logical/test/test_pipe_caller.py +++ /dev/null @@ -1,262 +0,0 @@ -from nmigen import Module, Signal -from nmigen.back.pysim import Simulator, Delay, Settle -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil -import unittest -from soc.decoder.isa.caller import ISACaller, special_sprs -from soc.decoder.power_decoder import (create_pdecode) -from soc.decoder.power_decoder2 import (PowerDecode2) -from soc.decoder.power_enums import (XER_bits, Function) -from soc.decoder.selectable_int import SelectableInt -from soc.simulator.program import Program -from soc.decoder.isa.all import ISA - - -from soc.logical.pipeline import LogicalBasePipe -from soc.alu.alu_input_record import CompALUOpSubset -from soc.alu.pipe_data import ALUPipeSpec -import random - - -class TestCase: - def __init__(self, program, regs, sprs, name): - self.program = program - self.regs = regs - self.sprs = sprs - self.name = name - -def get_rec_width(rec): - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - return recwidth - -def set_alu_inputs(alu, dec2, sim): - # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 - # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) - # and place it into data_i.b - - reg3_ok = yield dec2.e.read_reg3.ok - reg1_ok = yield dec2.e.read_reg1.ok - assert reg3_ok != reg1_ok - if reg3_ok: - data1 = yield dec2.e.read_reg3.data - data1 = sim.gpr(data1).value - elif reg1_ok: - data1 = yield dec2.e.read_reg1.data - data1 = sim.gpr(data1).value - else: - data1 = 0 - - yield alu.p.data_i.a.eq(data1) - - # If there's an immediate, set the B operand to that - reg2_ok = yield dec2.e.read_reg2.ok - imm_ok = yield dec2.e.imm_data.imm_ok - if imm_ok: - data2 = yield dec2.e.imm_data.imm - elif reg2_ok: - data2 = yield dec2.e.read_reg2.data - data2 = sim.gpr(data2).value - else: - data2 = 0 - yield alu.p.data_i.b.eq(data2) - - - -def set_extra_alu_inputs(alu, dec2, sim): - carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 - yield alu.p.data_i.carry_in.eq(carry) - so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 - yield alu.p.data_i.so.eq(so) - - -# This test bench is a bit different than is usual. Initially when I -# was writing it, I had all of the tests call a function to create a -# device under test and simulator, initialize the dut, run the -# simulation for ~2 cycles, and assert that the dut output what it -# should have. However, this was really slow, since it needed to -# create and tear down the dut and simulator for every test case. - -# Now, instead of doing that, every test case in ALUTestCase puts some -# data into the test_data list below, describing the instructions to -# be tested and the initial state. Once all the tests have been run, -# test_data gets passed to TestRunner which then sets up the DUT and -# simulator once, runs all the data through it, and asserts that the -# results match the pseudocode sim at every cycle. - -# By doing this, I've reduced the time it takes to run the test suite -# massively. Before, it took around 1 minute on my computer, now it -# takes around 3 seconds - -test_data = [] - - -class LogicalTestCase(FHDLTestCase): - def __init__(self, name): - super().__init__(name) - self.test_name = name - def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): - tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) - test_data.append(tc) - - def test_rand(self): - insns = ["and", "or", "xor"] - for i in range(40): - choice = random.choice(insns) - lst = [f"{choice} 3, 1, 2"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_rand_imm_logical(self): - insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"] - for i in range(10): - choice = random.choice(insns) - imm = random.randint(0, (1<<16)-1) - lst = [f"{choice} 3, 1, {imm}"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - @unittest.skip("broken") - def test_cntz(self): - insns = ["cntlzd", "cnttzd"] - for i in range(10): - choice = random.choice(insns) - lst = [f"{choice} 3, 1"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_parity(self): - insns = ["prtyw", "prtyd"] - for i in range(10): - choice = random.choice(insns) - lst = [f"{choice} 3, 1"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_popcnt(self): - insns = ["popcntb", "popcntw", "popcntd"] - for i in range(10): - choice = random.choice(insns) - lst = [f"{choice} 3, 1"] - print(lst) - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_cmpb(self): - lst = ["cmpb 3, 1, 2"] - initial_regs = [0] * 32 - initial_regs[1] = 0xdeadbeefcafec0de - initial_regs[2] = 0xd0adb0000afec1de - self.run_tst_program(Program(lst), initial_regs) - - def test_ilang(self): - rec = CompALUOpSubset() - - pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - alu = LogicalBasePipe(pspec) - vl = rtlil.convert(alu, ports=alu.ports()) - with open("logical_pipeline.il", "w") as f: - f.write(vl) - - -class TestRunner(FHDLTestCase): - def __init__(self, test_data): - super().__init__("run_all") - self.test_data = test_data - - def run_all(self): - m = Module() - comb = m.d.comb - instruction = Signal(32) - - pdecode = create_pdecode() - - m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) - - rec = CompALUOpSubset() - - pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - m.submodules.alu = alu = LogicalBasePipe(pspec) - - comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) - comb += alu.p.valid_i.eq(1) - comb += alu.n.ready_i.eq(1) - comb += pdecode2.dec.raw_opcode_in.eq(instruction) - sim = Simulator(m) - - sim.add_clock(1e-6) - def process(): - for test in self.test_data: - print(test.name) - program = test.program - self.subTest(test.name) - simulator = ISA(pdecode2, test.regs, test.sprs, 0) - gen = program.generate_instructions() - instructions = list(zip(gen, program.assembly.splitlines())) - - index = simulator.pc.CIA.value//4 - while index < len(instructions): - ins, code = instructions[index] - - print("0x{:X}".format(ins & 0xffffffff)) - print(code) - - # ask the decoder to decode this binary data (endian'd) - yield pdecode2.dec.bigendian.eq(0) # little / big? - yield instruction.eq(ins) # raw binary instr. - yield Settle() - fn_unit = yield pdecode2.e.fn_unit - self.assertEqual(fn_unit, Function.LOGICAL.value, code) - yield from set_alu_inputs(alu, pdecode2, simulator) - yield from set_extra_alu_inputs(alu, pdecode2, simulator) - yield - opname = code.split(' ')[0] - yield from simulator.call(opname) - index = simulator.pc.CIA.value//4 - - vld = yield alu.n.valid_o - while not vld: - yield - vld = yield alu.n.valid_o - yield - alu_out = yield alu.n.data_o.o - out_reg_valid = yield pdecode2.e.write_reg.ok - if out_reg_valid: - write_reg_idx = yield pdecode2.e.write_reg.data - expected = simulator.gpr(write_reg_idx).value - print(f"expected {expected:x}, actual: {alu_out:x}") - self.assertEqual(expected, alu_out, code) - yield from self.check_extra_alu_outputs(alu, pdecode2, - simulator) - - sim.add_sync_process(process) - with sim.write_vcd("simulator.vcd", "simulator.gtkw", - traces=[]): - sim.run() - def check_extra_alu_outputs(self, alu, dec2, sim): - rc = yield dec2.e.rc.data - if rc: - cr_expected = sim.crl[0].get_range().value - cr_actual = yield alu.n.data_o.cr0 - self.assertEqual(cr_expected, cr_actual) - - -if __name__ == "__main__": - unittest.main(exit=False) - suite = unittest.TestSuite() - suite.addTest(TestRunner(test_data)) - - runner = unittest.TextTestRunner() - runner.run(suite) diff --git a/src/soc/pipe/shift_rot/formal/.gitignore b/src/soc/pipe/shift_rot/formal/.gitignore deleted file mode 100644 index 150f68c8..00000000 --- a/src/soc/pipe/shift_rot/formal/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*/* diff --git a/src/soc/pipe/shift_rot/formal/proof_main_stage.py b/src/soc/pipe/shift_rot/formal/proof_main_stage.py deleted file mode 100644 index 50264d5c..00000000 --- a/src/soc/pipe/shift_rot/formal/proof_main_stage.py +++ /dev/null @@ -1,108 +0,0 @@ -# Proof of correctness for partitioned equal signal combiner -# Copyright (C) 2020 Michael Nolan - -from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl, - signed) -from nmigen.asserts import Assert, AnyConst, Assume, Cover -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil - -from soc.shift_rot.main_stage import ShiftRotMainStage -from soc.alu.pipe_data import ALUPipeSpec -from soc.alu.alu_input_record import CompALUOpSubset -from soc.decoder.power_enums import InternalOp -import unittest - - -# This defines a module to drive the device under test and assert -# properties about its outputs -class Driver(Elaboratable): - def __init__(self): - # inputs and outputs - pass - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - rec = CompALUOpSubset() - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - comb += p.eq(AnyConst(width)) - - pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth) - m.submodules.dut = dut = ShiftRotMainStage(pspec) - - # convenience variables - a = dut.i.rs - b = dut.i.rb - ra = dut.i.ra - carry_in = dut.i.carry_in - so_in = dut.i.so - carry_out = dut.o.carry_out - o = dut.o.o - - # setup random inputs - comb += [a.eq(AnyConst(64)), - b.eq(AnyConst(64)), - carry_in.eq(AnyConst(1)), - so_in.eq(AnyConst(1))] - - comb += dut.i.ctx.op.eq(rec) - - # Assert that op gets copied from the input to output - for rec_sig in rec.ports(): - name = rec_sig.name - dut_sig = getattr(dut.o.ctx.op, name) - comb += Assert(dut_sig == rec_sig) - - # signed and signed/32 versions of input a - a_signed = Signal(signed(64)) - a_signed_32 = Signal(signed(32)) - comb += a_signed.eq(a) - comb += a_signed_32.eq(a[0:32]) - - # main assertion of arithmetic operations - with m.Switch(rec.insn_type): - with m.Case(InternalOp.OP_SHL): - comb += Assume(ra == 0) - with m.If(rec.is_32bit): - comb += Assert(o[0:32] == ((a << b[0:6]) & 0xffffffff)) - comb += Assert(o[32:64] == 0) - with m.Else(): - comb += Assert(o == ((a << b[0:7]) & ((1 << 64)-1))) - with m.Case(InternalOp.OP_SHR): - comb += Assume(ra == 0) - with m.If(~rec.is_signed): - with m.If(rec.is_32bit): - comb += Assert(o[0:32] == (a[0:32] >> b[0:6])) - comb += Assert(o[32:64] == 0) - with m.Else(): - comb += Assert(o == (a >> b[0:7])) - with m.Else(): - with m.If(rec.is_32bit): - comb += Assert(o[0:32] == (a_signed_32 >> b[0:6])) - comb += Assert(o[32:64] == Repl(a[31], 32)) - with m.Else(): - comb += Assert(o == (a_signed >> b[0:7])) - - return m - - -class ALUTestCase(FHDLTestCase): - def test_formal(self): - module = Driver() - self.assertFormal(module, mode="bmc", depth=2) - self.assertFormal(module, mode="cover", depth=2) - def test_ilang(self): - dut = Driver() - vl = rtlil.convert(dut, ports=[]) - with open("main_stage.il", "w") as f: - f.write(vl) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/shift_rot/input_stage.py b/src/soc/pipe/shift_rot/input_stage.py deleted file mode 100644 index 72e4c925..00000000 --- a/src/soc/pipe/shift_rot/input_stage.py +++ /dev/null @@ -1,58 +0,0 @@ -# This stage is intended to adjust the input data before sending it to -# the acutal ALU. Things like handling inverting the input, carry_in -# generation for subtraction, and handling of immediates should happen -# here -from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed, - unsigned) -from nmutil.pipemodbase import PipeModBase -from soc.decoder.power_enums import InternalOp -from soc.shift_rot.pipe_data import ShiftRotInputData -from soc.decoder.power_enums import CryIn - - -class ShiftRotInputStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "input") - - def ispec(self): - return ShiftRotInputData(self.pspec) - - def ospec(self): - return ShiftRotInputData(self.pspec) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - ##### operand A ##### - - # operand a to be as-is or inverted - a = Signal.like(self.i.ra) - - with m.If(self.i.ctx.op.invert_a): - comb += a.eq(~self.i.ra) - with m.Else(): - comb += a.eq(self.i.ra) - - comb += self.o.ra.eq(a) - comb += self.o.rb.eq(self.i.rb) - comb += self.o.rs.eq(self.i.rs) - - - ##### carry-in ##### - - # either copy incoming carry or set to 1/0 as defined by op - with m.Switch(self.i.ctx.op.input_carry): - with m.Case(CryIn.ZERO): - comb += self.o.carry_in.eq(0) - with m.Case(CryIn.ONE): - comb += self.o.carry_in.eq(1) - with m.Case(CryIn.CA): - comb += self.o.carry_in.eq(self.i.carry_in) - - ##### sticky overflow and context (both pass-through) ##### - - comb += self.o.so.eq(self.i.so) - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/pipe/shift_rot/main_stage.py b/src/soc/pipe/shift_rot/main_stage.py deleted file mode 100644 index f2375283..00000000 --- a/src/soc/pipe/shift_rot/main_stage.py +++ /dev/null @@ -1,78 +0,0 @@ -# This stage is intended to do most of the work of executing shift -# instructions, as well as carry and overflow generation. This module -# however should not gate the carry or overflow, that's up to the -# output stage -from nmigen import (Module, Signal, Cat, Repl, Mux, Const) -from nmutil.pipemodbase import PipeModBase -from soc.alu.pipe_data import ALUOutputData -from soc.shift_rot.pipe_data import ShiftRotInputData -from ieee754.part.partsig import PartitionedSignal -from soc.decoder.power_enums import InternalOp -from soc.shift_rot.rotator import Rotator - -from soc.decoder.power_fields import DecodeFields -from soc.decoder.power_fieldsn import SignalBitRange - - -class ShiftRotMainStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "main") - self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) - self.fields.create_specs() - - def ispec(self): - return ShiftRotInputData(self.pspec) - - def ospec(self): - return ALUOutputData(self.pspec) # TODO: ALUIntermediateData - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - # obtain me and mb fields from instruction. - m_fields = self.fields.instrs['M'] - md_fields = self.fields.instrs['MD'] - mb = Signal(m_fields['MB'][0:-1].shape()) - me = Signal(m_fields['ME'][0:-1].shape()) - mb_extra = Signal(1, reset_less=True) - comb += mb.eq(m_fields['MB'][0:-1]) - comb += me.eq(m_fields['ME'][0:-1]) - comb += mb_extra.eq(md_fields['mb'][0:-1][0]) - - # set up microwatt rotator module - m.submodules.rotator = rotator = Rotator() - comb += [ - rotator.me.eq(me), - rotator.mb.eq(mb), - rotator.mb_extra.eq(mb_extra), - rotator.rs.eq(self.i.rs), - rotator.ra.eq(self.i.ra), - rotator.shift.eq(self.i.rb), - rotator.is_32bit.eq(self.i.ctx.op.is_32bit), - rotator.arith.eq(self.i.ctx.op.is_signed), - ] - - # instruction rotate type - mode = Signal(3, reset_less=True) - with m.Switch(self.i.ctx.op.insn_type): - with m.Case(InternalOp.OP_SHL): comb += mode.eq(0b000) - with m.Case(InternalOp.OP_SHR): comb += mode.eq(0b001) # R-shift - with m.Case(InternalOp.OP_RLC): comb += mode.eq(0b110) # clear LR - with m.Case(InternalOp.OP_RLCL): comb += mode.eq(0b010) # clear L - with m.Case(InternalOp.OP_RLCR): comb += mode.eq(0b100) # clear R - - comb += Cat(rotator.right_shift, - rotator.clear_left, - rotator.clear_right).eq(mode) - - # outputs from the microwatt rotator module - comb += [self.o.o.eq(rotator.result_o), - self.o.carry_out.eq(rotator.carry_out_o)] - - ###### sticky overflow and context, both pass-through ##### - - comb += self.o.so.eq(self.i.so) - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/pipe/shift_rot/maskgen.py b/src/soc/pipe/shift_rot/maskgen.py deleted file mode 100644 index 89246e0b..00000000 --- a/src/soc/pipe/shift_rot/maskgen.py +++ /dev/null @@ -1,47 +0,0 @@ -from nmigen import (Elaboratable, Signal, Module) -import math - -class MaskGen(Elaboratable): - """MaskGen - create a diff mask - - example: x=5 --> a=0b11111 - y=3 --> b=0b00111 - o: 0b11000 - x=2 --> a=0b00011 - y=4 --> b=0b01111 - o: 0b10011 - """ - def __init__(self, width): - self.width = width - self.shiftwidth = math.ceil(math.log2(width)) - self.mb = Signal(self.shiftwidth, reset_less=True) - self.me = Signal(self.shiftwidth, reset_less=True) - - self.o = Signal(width, reset_less=True) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - x = Signal.like(self.mb) - y = Signal.like(self.mb) - - comb += x.eq(64 - self.mb) - comb += y.eq(63 - self.me) - - mask_a = Signal.like(self.o) - mask_b = Signal.like(self.o) - - comb += mask_a.eq((1< y): - comb += self.o.eq(mask_a ^ mask_b) - with m.Else(): - comb += self.o.eq(mask_a ^ ~mask_b) - - - return m - - def ports(self): - return [self.mb, self.me, self.o] diff --git a/src/soc/pipe/shift_rot/pipe_data.py b/src/soc/pipe/shift_rot/pipe_data.py deleted file mode 100644 index 7f98d16b..00000000 --- a/src/soc/pipe/shift_rot/pipe_data.py +++ /dev/null @@ -1,30 +0,0 @@ -from nmigen import Signal, Const -from nmutil.dynamicpipe import SimpleHandshakeRedir -from soc.alu.alu_input_record import CompALUOpSubset -from ieee754.fpcommon.getop import FPPipeContext -from soc.alu.pipe_data import IntegerData - - -class ShiftRotInputData(IntegerData): - def __init__(self, pspec): - super().__init__(pspec) - self.ra = Signal(64, reset_less=True) # RA - self.rs = Signal(64, reset_less=True) # RS - self.rb = Signal(64, reset_less=True) # RB/immediate - self.so = Signal(reset_less=True) - self.carry_in = Signal(reset_less=True) - - def __iter__(self): - yield from super().__iter__() - yield self.ra - yield self.rs - yield self.rb - yield self.carry_in - yield self.so - - def eq(self, i): - lst = super().eq(i) - return lst + [self.rs.eq(i.rs), self.ra.eq(i.ra), - self.rb.eq(i.rb), - self.carry_in.eq(i.carry_in), - self.so.eq(i.so)] diff --git a/src/soc/pipe/shift_rot/pipeline.py b/src/soc/pipe/shift_rot/pipeline.py deleted file mode 100644 index 1080aa8d..00000000 --- a/src/soc/pipe/shift_rot/pipeline.py +++ /dev/null @@ -1,25 +0,0 @@ -from nmutil.singlepipe import ControlBase -from nmutil.pipemodbase import PipeModBaseChain -from soc.shift_rot.input_stage import ShiftRotInputStage -from soc.shift_rot.main_stage import ShiftRotMainStage -from soc.alu.output_stage import ALUOutputStage - -class ShiftRotStages(PipeModBaseChain): - def get_chain(self): - inp = ShiftRotInputStage(self.pspec) - main = ShiftRotMainStage(self.pspec) - out = ALUOutputStage(self.pspec) - return [inp, main, out] - - -class ShiftRotBasePipe(ControlBase): - def __init__(self, pspec): - ControlBase.__init__(self) - self.pipe1 = ShiftRotStages(pspec) - self._eqs = self.connect([self.pipe1]) - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - m.submodules.pipe = self.pipe1 - m.d.comb += self._eqs - return m diff --git a/src/soc/pipe/shift_rot/rotator.py b/src/soc/pipe/shift_rot/rotator.py deleted file mode 100644 index 23aa0e43..00000000 --- a/src/soc/pipe/shift_rot/rotator.py +++ /dev/null @@ -1,156 +0,0 @@ -# Manual translation and adaptation of rotator.vhdl from microwatt into nmigen -# - -from nmigen import (Elaboratable, Signal, Module, Const, Cat, - unsigned, signed) -from soc.shift_rot.rotl import ROTL - -# note BE bit numbering -def right_mask(m, mask_begin): - ret = Signal(64, name="right_mask", reset_less=True) - with m.If(mask_begin <= 64): - m.d.comb += ret.eq((1<<(64-mask_begin)) - 1) - return ret - -def left_mask(m, mask_end): - ret = Signal(64, name="left_mask", reset_less=True) - m.d.comb += ret.eq(~((1<<(63-mask_end)) - 1)) - return ret - - -class Rotator(Elaboratable): - """Rotator: covers multiple POWER9 rotate functions - - supported modes: - - * sl[wd] - * rlw*, rldic, rldicr, rldimi - * rldicl, sr[wd] - * sra[wd][i] - - use as follows: - - * shift = RB[0:7] - * arith = 1 when is_signed - * right_shift = 1 when insn_type is OP_SHR - * clear_left = 1 when insn_type is OP_RLC or OP_RLCL - * clear_right = 1 when insn_type is OP_RLC or OP_RLCR - """ - def __init__(self): - # input - self.me = Signal(5, reset_less=True) # ME field - self.mb = Signal(5, reset_less=True) # MB field - self.mb_extra = Signal(1, reset_less=True) # extra bit of mb in MD-form - self.ra = Signal(64, reset_less=True) # RA - self.rs = Signal(64, reset_less=True) # RS - self.ra = Signal(64, reset_less=True) # RA - self.shift = Signal(7, reset_less=True) # RB[0:7] - self.is_32bit = Signal(reset_less=True) - self.right_shift = Signal(reset_less=True) - self.arith = Signal(reset_less=True) - self.clear_left = Signal(reset_less=True) - self.clear_right = Signal(reset_less=True) - # output - self.result_o = Signal(64, reset_less=True) - self.carry_out_o = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - ra, rs = self.ra, self.rs - - # temporaries - rot_in = Signal(64, reset_less=True) - rot_count = Signal(6, reset_less=True) - rot = Signal(64, reset_less=True) - sh = Signal(7, reset_less=True) - mb = Signal(7, reset_less=True) - me = Signal(7, reset_less=True) - mr = Signal(64, reset_less=True) - ml = Signal(64, reset_less=True) - output_mode = Signal(2, reset_less=True) - - # First replicate bottom 32 bits to both halves if 32-bit - comb += rot_in[0:32].eq(rs[0:32]) - with m.If(self.is_32bit): - comb += rot_in[32:64].eq(rs[0:32]) - with m.Else(): - comb += rot_in[32:64].eq(rs[32:64]) - - shift_signed = Signal(signed(6)) - comb += shift_signed.eq(self.shift[0:6]) - - # Negate shift count for right shifts - with m.If(self.right_shift): - comb += rot_count.eq(-shift_signed) - with m.Else(): - comb += rot_count.eq(self.shift[0:6]) - - # ROTL submodule - m.submodules.rotl = rotl = ROTL(64) - comb += rotl.a.eq(rot_in) - comb += rotl.b.eq(rot_count) - comb += rot.eq(rotl.o) - - # Trim shift count to 6 bits for 32-bit shifts - comb += sh.eq(Cat(self.shift[0:6], self.shift[6] & ~self.is_32bit)) - - # XXX errr... we should already have these, in Fields? oh well - # Work out mask begin/end indexes (caution, big-endian bit numbering) - - # mask-begin (mb) - with m.If(self.clear_left): - comb += mb.eq(self.mb) - with m.If(self.is_32bit): - comb += mb[5:7].eq(Const(0b01, 2)) - with m.Else(): - comb += mb[5:7].eq(Cat(self.mb_extra, Const(0b0, 1))) - with m.Elif(self.right_shift): - # this is basically mb = sh + (is_32bit? 32: 0); - comb += mb.eq(sh) - with m.If(self.is_32bit): - comb += mb[5:7].eq(Cat(~sh[5], sh[5])) - with m.Else(): - comb += mb.eq(Cat(Const(0b0, 5), self.is_32bit, Const(0b0, 1))) - - # mask-end (me) - with m.If(self.clear_right & self.is_32bit): - # TODO: track down where this is. have to use fields. - comb += me.eq(Cat(self.me, Const(0b01, 2))) - with m.Elif(self.clear_right & ~self.clear_left): - # this is me, have to use fields - comb += me.eq(Cat(self.mb, self.mb_extra, Const(0b0, 1))) - with m.Else(): - # effectively, 63 - sh - comb += me.eq(Cat(~sh[0:6], sh[6])) - - # Calculate left and right masks - comb += mr.eq(right_mask(m, mb)) - comb += ml.eq(left_mask(m, me)) - - # Work out output mode - # 00 for sl[wd] - # 0w for rlw*, rldic, rldicr, rldimi, where w = 1 iff mb > me - # 10 for rldicl, sr[wd] - # 1z for sra[wd][i], z = 1 if rs is negative - with m.If((self.clear_left & ~self.clear_right) | self.right_shift): - comb += output_mode.eq(Cat(self.arith & rot_in[63], Const(1, 1))) - with m.Else(): - mbgt = self.clear_right & (mb[0:6] > me[0:6]) - comb += output_mode.eq(Cat(mbgt, Const(0, 1))) - - # Generate output from rotated input and masks - with m.Switch(output_mode): - with m.Case(0b00): - comb += self.result_o.eq((rot & (mr & ml)) | (ra & ~(mr & ml))) - with m.Case(0b01): - comb += self.result_o.eq((rot & (mr | ml)) | (ra & ~(mr | ml))) - with m.Case(0b10): - comb += self.result_o.eq(rot & mr) - with m.Case(0b11): - comb += self.result_o.eq(rot | ~mr) - # Generate carry output for arithmetic shift right of -ve value - comb += self.carry_out_o.eq(rs & ~ml) - - return m - diff --git a/src/soc/pipe/shift_rot/rotl.py b/src/soc/pipe/shift_rot/rotl.py deleted file mode 100644 index d2ebfcf7..00000000 --- a/src/soc/pipe/shift_rot/rotl.py +++ /dev/null @@ -1,24 +0,0 @@ -from nmigen import (Elaboratable, Signal, Module) -import math - -class ROTL(Elaboratable): - def __init__(self, width): - self.width = width - self.shiftwidth = math.ceil(math.log2(width)) - self.a = Signal(width, reset_less=True) - self.b = Signal(self.shiftwidth, reset_less=True) - - self.o = Signal(width, reset_less=True) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - - shl = Signal.like(self.a) - shr = Signal.like(self.a) - - comb += shl.eq(self.a << self.b) - comb += shr.eq(self.a >> (self.width - self.b)) - - comb += self.o.eq(shl | shr) - return m diff --git a/src/soc/pipe/shift_rot/test/test_maskgen.py b/src/soc/pipe/shift_rot/test/test_maskgen.py deleted file mode 100644 index 1a4d34e6..00000000 --- a/src/soc/pipe/shift_rot/test/test_maskgen.py +++ /dev/null @@ -1,48 +0,0 @@ -from nmigen import Signal, Module -from nmigen.back.pysim import Simulator, Delay, Settle -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil -from soc.alu.maskgen import MaskGen -from soc.decoder.helpers import MASK -import random -import unittest - -class MaskGenTestCase(FHDLTestCase): - def test_maskgen(self): - m = Module() - comb = m.d.comb - m.submodules.dut = dut = MaskGen(64) - mb = Signal.like(dut.mb) - me = Signal.like(dut.me) - o = Signal.like(dut.o) - - comb += [ - dut.mb.eq(mb), - dut.me.eq(me), - o.eq(dut.o)] - - sim = Simulator(m) - - def process(): - for x in range(0, 64): - for y in range(0, 64): - yield mb.eq(x) - yield me.eq(y) - yield Delay(1e-6) - - expected = MASK(x, y) - result = yield o - self.assertEqual(expected, result) - - sim.add_process(process) # or sim.add_sync_process(process), see below - with sim.write_vcd("maskgen.vcd", "maskgen.gtkw", traces=dut.ports()): - sim.run() - - def test_ilang(self): - dut = MaskGen(64) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("maskgen.il", "w") as f: - f.write(vl) - -if __name__ == '__main__': - unittest.main() diff --git a/src/soc/pipe/shift_rot/test/test_pipe_caller.py b/src/soc/pipe/shift_rot/test/test_pipe_caller.py deleted file mode 100644 index dbd40923..00000000 --- a/src/soc/pipe/shift_rot/test/test_pipe_caller.py +++ /dev/null @@ -1,279 +0,0 @@ -from nmigen import Module, Signal -from nmigen.back.pysim import Simulator, Delay, Settle -from nmigen.test.utils import FHDLTestCase -from nmigen.cli import rtlil -import unittest -from soc.decoder.isa.caller import ISACaller, special_sprs -from soc.decoder.power_decoder import (create_pdecode) -from soc.decoder.power_decoder2 import (PowerDecode2) -from soc.decoder.power_enums import (XER_bits, Function) -from soc.decoder.selectable_int import SelectableInt -from soc.simulator.program import Program -from soc.decoder.isa.all import ISA - - -from soc.shift_rot.pipeline import ShiftRotBasePipe -from soc.alu.alu_input_record import CompALUOpSubset -from soc.alu.pipe_data import ALUPipeSpec -import random - -class TestCase: - def __init__(self, program, regs, sprs, name): - self.program = program - self.regs = regs - self.sprs = sprs - self.name = name - -def get_rec_width(rec): - recwidth = 0 - # Setup random inputs for dut.op - for p in rec.ports(): - width = p.width - recwidth += width - return recwidth - -def set_alu_inputs(alu, dec2, sim): - inputs = [] - # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43 - # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok)) - # and place it into data_i.b - - reg3_ok = yield dec2.e.read_reg3.ok - if reg3_ok: - reg3_sel = yield dec2.e.read_reg3.data - data3 = sim.gpr(reg3_sel).value - else: - data3 = 0 - reg1_ok = yield dec2.e.read_reg1.ok - if reg1_ok: - reg1_sel = yield dec2.e.read_reg1.data - data1 = sim.gpr(reg1_sel).value - else: - data1 = 0 - reg2_ok = yield dec2.e.read_reg2.ok - imm_ok = yield dec2.e.imm_data.ok - if reg2_ok: - reg2_sel = yield dec2.e.read_reg2.data - data2 = sim.gpr(reg2_sel).value - elif imm_ok: - data2 = yield dec2.e.imm_data.imm - else: - data2 = 0 - - yield alu.p.data_i.ra.eq(data1) - yield alu.p.data_i.rb.eq(data2) - yield alu.p.data_i.rs.eq(data3) - - -def set_extra_alu_inputs(alu, dec2, sim): - carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0 - yield alu.p.data_i.carry_in.eq(carry) - so = 1 if sim.spr['XER'][XER_bits['SO']] else 0 - yield alu.p.data_i.so.eq(so) - - -# This test bench is a bit different than is usual. Initially when I -# was writing it, I had all of the tests call a function to create a -# device under test and simulator, initialize the dut, run the -# simulation for ~2 cycles, and assert that the dut output what it -# should have. However, this was really slow, since it needed to -# create and tear down the dut and simulator for every test case. - -# Now, instead of doing that, every test case in ALUTestCase puts some -# data into the test_data list below, describing the instructions to -# be tested and the initial state. Once all the tests have been run, -# test_data gets passed to TestRunner which then sets up the DUT and -# simulator once, runs all the data through it, and asserts that the -# results match the pseudocode sim at every cycle. - -# By doing this, I've reduced the time it takes to run the test suite -# massively. Before, it took around 1 minute on my computer, now it -# takes around 3 seconds - -test_data = [] - - -class ALUTestCase(FHDLTestCase): - def __init__(self, name): - super().__init__(name) - self.test_name = name - def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}): - tc = TestCase(prog, initial_regs, initial_sprs, self.test_name) - test_data.append(tc) - - - def test_shift(self): - insns = ["slw", "sld", "srw", "srd", "sraw", "srad"] - for i in range(20): - choice = random.choice(insns) - lst = [f"{choice} 3, 1, 2"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, 63) - print(initial_regs[1], initial_regs[2]) - self.run_tst_program(Program(lst), initial_regs) - - - def test_shift_arith(self): - lst = ["sraw 3, 1, 2"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, 63) - print(initial_regs[1], initial_regs[2]) - self.run_tst_program(Program(lst), initial_regs) - - def test_shift_once(self): - lst = ["slw 3, 1, 4", - "slw 3, 1, 2"] - initial_regs = [0] * 32 - initial_regs[1] = 0x80000000 - initial_regs[2] = 0x40 - initial_regs[4] = 0x00 - self.run_tst_program(Program(lst), initial_regs) - - def test_rlwinm(self): - for i in range(10): - mb = random.randint(0,31) - me = random.randint(0,31) - sh = random.randint(0,31) - lst = [f"rlwinm 3, 1, {mb}, {me}, {sh}"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_rlwimi(self): - lst = ["rlwimi 3, 1, 5, 20, 6"] - initial_regs = [0] * 32 - initial_regs[1] = 0xdeadbeef - initial_regs[3] = 0x12345678 - self.run_tst_program(Program(lst), initial_regs) - - def test_rlwnm(self): - lst = ["rlwnm 3, 1, 2, 20, 6"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - initial_regs[2] = random.randint(0, 63) - self.run_tst_program(Program(lst), initial_regs) - - def test_rldicl(self): - lst = ["rldicl 3, 1, 5, 20"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_rldicr(self): - lst = ["rldicr 3, 1, 5, 20"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_rlc(self): - insns = ["rldic", "rldicl", "rldicr"] - for i in range(20): - choice = random.choice(insns) - sh = random.randint(0, 63) - m = random.randint(0, 63) - lst = [f"{choice} 3, 1, {sh}, {m}"] - initial_regs = [0] * 32 - initial_regs[1] = random.randint(0, (1<<64)-1) - self.run_tst_program(Program(lst), initial_regs) - - def test_ilang(self): - rec = CompALUOpSubset() - - pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - alu = ShiftRotBasePipe(pspec) - vl = rtlil.convert(alu, ports=alu.ports()) - with open("pipeline.il", "w") as f: - f.write(vl) - - -class TestRunner(FHDLTestCase): - def __init__(self, test_data): - super().__init__("run_all") - self.test_data = test_data - - def run_all(self): - m = Module() - comb = m.d.comb - instruction = Signal(32) - - pdecode = create_pdecode() - - m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) - - rec = CompALUOpSubset() - - pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec)) - m.submodules.alu = alu = ShiftRotBasePipe(pspec) - - comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e) - comb += alu.p.valid_i.eq(1) - comb += alu.n.ready_i.eq(1) - comb += pdecode2.dec.raw_opcode_in.eq(instruction) - sim = Simulator(m) - - sim.add_clock(1e-6) - def process(): - for test in self.test_data: - print(test.name) - program = test.program - self.subTest(test.name) - simulator = ISA(pdecode2, test.regs, test.sprs, 0) - gen = program.generate_instructions() - instructions = list(zip(gen, program.assembly.splitlines())) - - index = simulator.pc.CIA.value//4 - while index < len(instructions): - ins, code = instructions[index] - - print("0x{:X}".format(ins & 0xffffffff)) - print(code) - - # ask the decoder to decode this binary data (endian'd) - yield pdecode2.dec.bigendian.eq(0) # little / big? - yield instruction.eq(ins) # raw binary instr. - yield Settle() - fn_unit = yield pdecode2.e.fn_unit - self.assertEqual(fn_unit, Function.SHIFT_ROT.value) - yield from set_alu_inputs(alu, pdecode2, simulator) - yield from set_extra_alu_inputs(alu, pdecode2, simulator) - yield - opname = code.split(' ')[0] - yield from simulator.call(opname) - index = simulator.pc.CIA.value//4 - - vld = yield alu.n.valid_o - while not vld: - yield - vld = yield alu.n.valid_o - yield - alu_out = yield alu.n.data_o.o - out_reg_valid = yield pdecode2.e.write_reg.ok - if out_reg_valid: - write_reg_idx = yield pdecode2.e.write_reg.data - expected = simulator.gpr(write_reg_idx).value - msg = f"expected {expected:x}, actual: {alu_out:x}" - self.assertEqual(expected, alu_out, msg) - yield from self.check_extra_alu_outputs(alu, pdecode2, - simulator) - - sim.add_sync_process(process) - with sim.write_vcd("simulator.vcd", "simulator.gtkw", - traces=[]): - sim.run() - def check_extra_alu_outputs(self, alu, dec2, sim): - rc = yield dec2.e.rc.data - if rc: - cr_expected = sim.crl[0].get_range().value - cr_actual = yield alu.n.data_o.cr0 - self.assertEqual(cr_expected, cr_actual) - - -if __name__ == "__main__": - unittest.main(exit=False) - suite = unittest.TestSuite() - suite.addTest(TestRunner(test_data)) - - runner = unittest.TextTestRunner() - runner.run(suite)