--- /dev/null
+from nmigen.hdl.rec import Record, Layout
+
+from soc.decoder.power_enums import InternalOp, Function, CryIn
+
+
+class CompALUOpSubset(Record):
+ """CompALUOpSubset
+
+ a copy of the relevant subset information from Decode2Execute1Type
+ needed for ALU operations. use with eq_from_execute1 (below) to
+ grab subsets.
+ """
+ def __init__(self, name=None):
+ layout = (('insn_type', InternalOp),
+ ('fn_unit', Function),
+ ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
+ #'cr = Signal(32, reset_less=True) # NO: this is from the CR SPR
+ #'xerc = XerBits() # NO: this is from the XER SPR
+ ('lk', 1),
+ ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
+ ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
+ ('invert_a', 1),
+ ('invert_out', 1),
+ ('input_carry', CryIn),
+ ('output_carry', 1),
+ ('input_cr', 1),
+ ('output_cr', 1),
+ ('is_32bit', 1),
+ ('is_signed', 1),
+ ('data_len', 4), # TODO: should be in separate CompLDSTSubset
+ ('insn', 32),
+ ('byte_reverse', 1),
+ ('sign_extend', 1))
+
+ Record.__init__(self, Layout(layout), name=name)
+
+ # grrr. Record does not have kwargs
+ self.insn_type.reset_less = True
+ self.fn_unit.reset_less = True
+ #self.cr = Signal(32, reset_less = True
+ #self.xerc = XerBits(
+ self.lk.reset_less = True
+ self.invert_a.reset_less = True
+ self.invert_out.reset_less = True
+ self.input_carry.reset_less = True
+ self.output_carry.reset_less = True
+ self.input_cr.reset_less = True
+ self.output_cr.reset_less = True
+ self.is_32bit.reset_less = True
+ self.is_signed.reset_less = True
+ self.data_len.reset_less = True
+ self.byte_reverse.reset_less = True
+ self.sign_extend.reset_less = True
+
+ def eq_from_execute1(self, other):
+ """ use this to copy in from Decode2Execute1Type
+ """
+ res = []
+ for fname, sig in self.fields.items():
+ eqfrom = other.fields[fname]
+ res.append(sig.eq(eqfrom))
+ return res
+
+ def ports(self):
+ return [self.insn_type,
+ #self.cr,
+ #self.xerc,
+ self.lk,
+ self.invert_a,
+ self.invert_out,
+ self.input_carry,
+ self.output_carry,
+ self.input_cr,
+ self.output_cr,
+ self.is_32bit,
+ self.is_signed,
+ self.data_len,
+ self.byte_reverse,
+ self.sign_extend,
+ ]
--- /dev/null
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import Module, Signal, Elaboratable, Mux
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.input_stage import ALUInputStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+ def __init__(self):
+ # inputs and outputs
+ pass
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ rec = CompALUOpSubset()
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ comb += p.eq(AnyConst(width))
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+ m.submodules.dut = dut = ALUInputStage(pspec)
+
+ a = Signal(64)
+ b = Signal(64)
+ comb += [dut.i.a.eq(a),
+ dut.i.b.eq(b),
+ a.eq(AnyConst(64)),
+ b.eq(AnyConst(64))]
+
+ comb += dut.i.ctx.op.eq(rec)
+
+ # Assert that op gets copied from the input to output
+ for p in rec.ports():
+ name = p.name
+ rec_sig = p
+ dut_sig = getattr(dut.o.ctx.op, name)
+ comb += Assert(dut_sig == rec_sig)
+
+ with m.If(rec.invert_a):
+ comb += Assert(dut.o.a == ~a)
+ with m.Else():
+ comb += Assert(dut.o.a == a)
+
+ comb += Assert(dut.o.b == b)
+
+ return m
+
+
+class GTCombinerTestCase(FHDLTestCase):
+ def test_formal(self):
+ module = Driver()
+ self.assertFormal(module, mode="bmc", depth=4)
+ self.assertFormal(module, mode="cover", depth=4)
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("input_stage.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+ signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.main_stage import ALUMainStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+ def __init__(self):
+ # inputs and outputs
+ pass
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ rec = CompALUOpSubset()
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ comb += p.eq(AnyConst(width))
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+ m.submodules.dut = dut = ALUMainStage(pspec)
+
+ # convenience variables
+ a = dut.i.a
+ b = dut.i.b
+ carry_in = dut.i.carry_in
+ so_in = dut.i.so
+ carry_out = dut.o.carry_out
+ o = dut.o.o
+
+ # setup random inputs
+ comb += [a.eq(AnyConst(64)),
+ b.eq(AnyConst(64)),
+ carry_in.eq(AnyConst(1)),
+ so_in.eq(AnyConst(1))]
+
+ comb += dut.i.ctx.op.eq(rec)
+
+ # Assert that op gets copied from the input to output
+ for rec_sig in rec.ports():
+ name = rec_sig.name
+ dut_sig = getattr(dut.o.ctx.op, name)
+ comb += Assert(dut_sig == rec_sig)
+
+ # signed and signed/32 versions of input a
+ a_signed = Signal(signed(64))
+ a_signed_32 = Signal(signed(32))
+ comb += a_signed.eq(a)
+ comb += a_signed_32.eq(a[0:32])
+
+ # main assertion of arithmetic operations
+ with m.Switch(rec.insn_type):
+ with m.Case(InternalOp.OP_ADD):
+ comb += Assert(Cat(o, carry_out) == (a + b + carry_in))
+
+ return m
+
+
+class ALUTestCase(FHDLTestCase):
+ def test_formal(self):
+ module = Driver()
+ self.assertFormal(module, mode="bmc", depth=2)
+ self.assertFormal(module, mode="cover", depth=2)
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("main_stage.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import Module, Signal, Elaboratable, Mux, Cat, signed
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.output_stage import ALUOutputStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+ def __init__(self):
+ # inputs and outputs
+ pass
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ rec = CompALUOpSubset()
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ comb += p.eq(AnyConst(width))
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+ m.submodules.dut = dut = ALUOutputStage(pspec)
+
+ o = Signal(64)
+ carry_out = Signal()
+ carry_out32 = Signal()
+ ov = Signal()
+ ov32 = Signal()
+ cr0 = Signal(4)
+ so = Signal()
+ comb += [dut.i.o.eq(o),
+ dut.i.carry_out.eq(carry_out),
+ dut.i.so.eq(so),
+ dut.i.carry_out32.eq(carry_out32),
+ dut.i.cr0.eq(cr0),
+ dut.i.ov.eq(ov),
+ dut.i.ov32.eq(ov32),
+ o.eq(AnyConst(64)),
+ carry_out.eq(AnyConst(1)),
+ carry_out32.eq(AnyConst(1)),
+ ov.eq(AnyConst(1)),
+ ov32.eq(AnyConst(1)),
+ cr0.eq(AnyConst(4)),
+ so.eq(AnyConst(1))]
+
+ comb += dut.i.ctx.op.eq(rec)
+
+ with m.If(dut.i.ctx.op.invert_out):
+ comb += Assert(dut.o.o == ~o)
+ with m.Else():
+ comb += Assert(dut.o.o == o)
+
+ cr_out = Signal.like(cr0)
+ comb += cr_out.eq(dut.o.cr0)
+
+ o_signed = Signal(signed(64))
+ comb += o_signed.eq(dut.o.o)
+ # Assert only one of the comparison bits is set
+ comb += Assert(cr_out[3] + cr_out[2] + cr_out[1] == 1)
+ with m.If(o_signed == 0):
+ comb += Assert(cr_out[1] == 1)
+ with m.Elif(o_signed > 0):
+ # sigh. see https://bugs.libre-soc.org/show_bug.cgi?id=305#c61
+ # for OP_CMP we do b-a rather than a-b (just like ADD) and
+ # then invert the *test condition*.
+ with m.If(rec.insn_type == InternalOp.OP_CMP):
+ comb += Assert(cr_out[3] == 1)
+ with m.Else():
+ comb += Assert(cr_out[2] == 1)
+ with m.Elif(o_signed < 0):
+ # ditto as above
+ with m.If(rec.insn_type == InternalOp.OP_CMP):
+ comb += Assert(cr_out[2] == 1)
+ with m.Else():
+ comb += Assert(cr_out[3] == 1)
+
+
+ # Assert that op gets copied from the input to output
+ for p in rec.ports():
+ name = p.name
+ rec_sig = p
+ dut_sig = getattr(dut.o.ctx.op, name)
+ comb += Assert(dut_sig == rec_sig)
+
+
+ return m
+
+class GTCombinerTestCase(FHDLTestCase):
+ def test_formal(self):
+ module = Driver()
+ self.assertFormal(module, mode="bmc", depth=4)
+ self.assertFormal(module, mode="cover", depth=4)
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("output_stage.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# This stage is intended to adjust the input data before sending it to
+# the acutal ALU. Things like handling inverting the input, carry_in
+# generation for subtraction, and handling of immediates should happen
+# here
+from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
+ unsigned)
+from nmutil.pipemodbase import PipeModBase
+from soc.decoder.power_enums import InternalOp
+from soc.alu.pipe_data import ALUInputData
+from soc.decoder.power_enums import CryIn
+
+
+class ALUInputStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "input")
+
+ def ispec(self):
+ return ALUInputData(self.pspec)
+
+ def ospec(self):
+ return ALUInputData(self.pspec)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ ctx = self.i.ctx
+
+ ##### operand A #####
+
+ # operand a to be as-is or inverted
+ a = Signal.like(self.i.a)
+
+ with m.If(ctx.op.invert_a):
+ comb += a.eq(~self.i.a)
+ with m.Else():
+ comb += a.eq(self.i.a)
+
+ comb += self.o.a.eq(a)
+ comb += self.o.b.eq(self.i.b)
+
+ ##### carry-in #####
+
+ # either copy incoming carry or set to 1/0 as defined by op
+ with m.Switch(ctx.op.input_carry):
+ with m.Case(CryIn.ZERO):
+ comb += self.o.carry_in.eq(0)
+ with m.Case(CryIn.ONE):
+ comb += self.o.carry_in.eq(1)
+ with m.Case(CryIn.CA):
+ comb += self.o.carry_in.eq(self.i.carry_in)
+
+ ##### sticky overflow and context (both pass-through) #####
+
+ comb += self.o.so.eq(self.i.so)
+ comb += self.o.ctx.eq(ctx)
+
+ return m
--- /dev/null
+# This stage is intended to do most of the work of executing the Arithmetic
+# instructions. This would be like the additions, compares, and sign-extension
+# as well as carry and overflow generation. This module
+# however should not gate the carry or overflow, that's up to the
+# output stage
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
+from nmutil.pipemodbase import PipeModBase
+from soc.alu.pipe_data import ALUInputData, ALUOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+
+class ALUMainStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "main")
+
+ def ispec(self):
+ return ALUInputData(self.pspec)
+
+ def ospec(self):
+ return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ carry_out, o = self.o.carry_out, self.o.o
+
+ # check if op is 32-bit, and get sign bit from operand a
+ is_32bit = Signal(reset_less=True)
+ sign_bit = Signal(reset_less=True)
+ comb += is_32bit.eq(self.i.ctx.op.is_32bit)
+ comb += sign_bit.eq(Mux(is_32bit, self.i.a[31], self.i.a[63]))
+
+ # little trick: do the add using only one add (not 2)
+ add_a = Signal(self.i.a.width + 2, reset_less=True)
+ add_b = Signal(self.i.a.width + 2, reset_less=True)
+ add_output = Signal(self.i.a.width + 2, reset_less=True)
+ with m.If((self.i.ctx.op.insn_type == InternalOp.OP_ADD) |
+ (self.i.ctx.op.insn_type == InternalOp.OP_CMP)):
+ # in bit 0, 1+carry_in creates carry into bit 1 and above
+ comb += add_a.eq(Cat(self.i.carry_in, self.i.a, Const(0, 1)))
+ comb += add_b.eq(Cat(Const(1, 1), self.i.b, Const(0, 1)))
+ comb += add_output.eq(add_a + add_b)
+
+ ##########################
+ # main switch-statement for handling arithmetic operations
+
+ with m.Switch(self.i.ctx.op.insn_type):
+ #### CMP, CMPL ####
+ with m.Case(InternalOp.OP_CMP):
+ # this is supposed to be inverted (b-a, not a-b)
+ # however we have a trick: instead of adding either 2x 64-bit
+ # MUXes to invert a and b, or messing with a 64-bit output,
+ # swap +ve and -ve test in the *output* stage using an XOR gate
+ comb += o.eq(add_output[1:-1])
+
+ #### add ####
+ with m.Case(InternalOp.OP_ADD):
+ # bit 0 is not part of the result, top bit is the carry-out
+ comb += o.eq(add_output[1:-1])
+ comb += carry_out.eq(add_output[-1])
+
+ #### exts (sign-extend) ####
+ with m.Case(InternalOp.OP_EXTS):
+ with m.If(self.i.ctx.op.data_len == 1):
+ comb += o.eq(Cat(self.i.a[0:8], Repl(self.i.a[7], 64-8)))
+ with m.If(self.i.ctx.op.data_len == 2):
+ comb += o.eq(Cat(self.i.a[0:16], Repl(self.i.a[15], 64-16)))
+ with m.If(self.i.ctx.op.data_len == 4):
+ comb += o.eq(Cat(self.i.a[0:32], Repl(self.i.a[31], 64-32)))
+ with m.Case(InternalOp.OP_CMPEQB):
+ eqs = Signal(8, reset_less=True)
+ src1 = Signal(8, reset_less=True)
+ comb += src1.eq(self.i.a[0:8])
+ for i in range(8):
+ comb += eqs[i].eq(src1 == self.i.b[8*i:8*(i+1)])
+ comb += self.o.cr0.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
+
+ ###### sticky overflow and context, both pass-through #####
+
+ comb += self.o.so.eq(self.i.so)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+# This stage is intended to handle the gating of carry and overflow
+# out, summary overflow generation, and updating the condition
+# register
+from nmigen import (Module, Signal, Cat, Repl)
+from nmutil.pipemodbase import PipeModBase
+from soc.alu.pipe_data import ALUInputData, ALUOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+
+class ALUOutputStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "output")
+
+ def ispec(self):
+ return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+
+ def ospec(self):
+ return ALUOutputData(self.pspec)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ # op requests inversion of the output
+ o = Signal.like(self.i.o)
+ with m.If(self.i.ctx.op.invert_out):
+ comb += o.eq(~self.i.o)
+ with m.Else():
+ comb += o.eq(self.i.o)
+
+ # create condition register cr0 and sticky-overflow
+ is_zero = Signal(reset_less=True)
+ is_positive = Signal(reset_less=True)
+ is_negative = Signal(reset_less=True)
+ msb_test = Signal(reset_less=True) # set equal to MSB, invert if OP=CMP
+ is_cmp = Signal(reset_less=True) # true if OP=CMP
+ so = Signal(reset_less=True)
+
+ # TODO: if o[63] is XORed with "operand == OP_CMP"
+ # that can be used as a test
+ # see https://bugs.libre-soc.org/show_bug.cgi?id=305#c60
+
+ comb += is_cmp.eq(self.i.ctx.op.insn_type == InternalOp.OP_CMP)
+ comb += msb_test.eq(o[-1] ^ is_cmp)
+ comb += is_zero.eq(o == 0)
+ comb += is_positive.eq(~is_zero & ~msb_test)
+ comb += is_negative.eq(~is_zero & msb_test)
+ comb += so.eq(self.i.so | self.i.ov)
+
+ comb += self.o.o.eq(o)
+ with m.If(self.i.ctx.op.insn_type != InternalOp.OP_CMPEQB):
+ comb += self.o.cr0.eq(Cat(so, is_zero, is_positive, is_negative))
+ with m.Else():
+ comb += self.o.cr0.eq(self.i.cr0)
+
+ comb += self.o.so.eq(so)
+
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+from nmigen import Signal, Const
+from nmutil.dynamicpipe import SimpleHandshakeRedir
+from soc.alu.alu_input_record import CompALUOpSubset
+from ieee754.fpcommon.getop import FPPipeContext
+
+
+class IntegerData:
+
+ def __init__(self, pspec):
+ self.ctx = FPPipeContext(pspec)
+ self.muxid = self.ctx.muxid
+
+ def __iter__(self):
+ yield from self.ctx
+
+ def eq(self, i):
+ return [self.ctx.eq(i.ctx)]
+
+ def ports(self):
+ return self.ctx.ports()
+
+
+class ALUInputData(IntegerData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+ self.a = Signal(64, reset_less=True) # RA
+ self.b = Signal(64, reset_less=True) # RB/immediate
+ self.so = Signal(reset_less=True)
+ self.carry_in = Signal(reset_less=True)
+
+ def __iter__(self):
+ yield from super().__iter__()
+ yield self.a
+ yield self.b
+ yield self.carry_in
+ yield self.so
+
+ def eq(self, i):
+ lst = super().eq(i)
+ return lst + [self.a.eq(i.a), self.b.eq(i.b),
+ self.carry_in.eq(i.carry_in),
+ self.so.eq(i.so)]
+
+# TODO: ALUIntermediateData which does not have
+# cr0, ov, ov32 in it (because they are generated as outputs by
+# the final output stage, not by the intermediate stage)
+# https://bugs.libre-soc.org/show_bug.cgi?id=305#c19
+
+class ALUOutputData(IntegerData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+ self.o = Signal(64, reset_less=True, name="stage_o")
+ self.carry_out = Signal(reset_less=True)
+ self.carry_out32 = Signal(reset_less=True)
+ self.cr0 = Signal(4, reset_less=True)
+ self.ov = Signal(reset_less=True)
+ self.ov32 = Signal(reset_less=True)
+ self.so = Signal(reset_less=True)
+
+ def __iter__(self):
+ yield from super().__iter__()
+ yield self.o
+ yield self.carry_out
+ yield self.carry_out32
+ yield self.cr0
+ yield self.ov
+ yield self.ov32
+ yield self.so
+
+ def eq(self, i):
+ lst = super().eq(i)
+ return lst + [self.o.eq(i.o),
+ self.carry_out.eq(i.carry_out),
+ self.carry_out32.eq(i.carry_out32),
+ self.cr0.eq(i.cr0), self.ov.eq(i.ov),
+ self.ov32.eq(i.ov32), self.so.eq(i.so)]
+
+
+class IntPipeSpec:
+ def __init__(self, id_wid=2, op_wid=1):
+ self.id_wid = id_wid
+ self.op_wid = op_wid
+ self.opkls = lambda _: CompALUOpSubset(name="op")
+ self.stage = None
+
+
+class ALUPipeSpec(IntPipeSpec):
+ def __init__(self, id_wid, op_wid):
+ super().__init__(id_wid, op_wid)
+ self.pipekls = SimpleHandshakeRedir
--- /dev/null
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.alu.input_stage import ALUInputStage
+from soc.alu.main_stage import ALUMainStage
+from soc.alu.output_stage import ALUOutputStage
+
+class ALUStages(PipeModBaseChain):
+ def get_chain(self):
+ inp = ALUInputStage(self.pspec)
+ main = ALUMainStage(self.pspec)
+ out = ALUOutputStage(self.pspec)
+ return [inp, main, out]
+
+
+class ALUBasePipe(ControlBase):
+ def __init__(self, pspec):
+ ControlBase.__init__(self)
+ self.pipe1 = ALUStages(pspec)
+ self._eqs = self.connect([self.pipe1])
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+ m.submodules.pipe = self.pipe1
+ m.d.comb += self._eqs
+ return m
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function, InternalOp)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.alu.pipeline import ALUBasePipe
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+class TestCase:
+ def __init__(self, program, regs, sprs, name):
+ self.program = program
+ self.regs = regs
+ self.sprs = sprs
+ self.name = name
+
+def get_rec_width(rec):
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ return recwidth
+
+def set_alu_inputs(alu, dec2, sim):
+ # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+ # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
+ # and place it into data_i.b
+
+ reg3_ok = yield dec2.e.read_reg3.ok
+ reg1_ok = yield dec2.e.read_reg1.ok
+ assert reg3_ok != reg1_ok
+ if reg3_ok:
+ data1 = yield dec2.e.read_reg3.data
+ data1 = sim.gpr(data1).value
+ elif reg1_ok:
+ data1 = yield dec2.e.read_reg1.data
+ data1 = sim.gpr(data1).value
+ else:
+ data1 = 0
+
+ yield alu.p.data_i.a.eq(data1)
+
+ # If there's an immediate, set the B operand to that
+ reg2_ok = yield dec2.e.read_reg2.ok
+ imm_ok = yield dec2.e.imm_data.imm_ok
+ if imm_ok:
+ data2 = yield dec2.e.imm_data.imm
+ elif reg2_ok:
+ data2 = yield dec2.e.read_reg2.data
+ data2 = sim.gpr(data2).value
+ else:
+ data2 = 0
+ yield alu.p.data_i.b.eq(data2)
+
+
+
+def set_extra_alu_inputs(alu, dec2, sim):
+ carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
+ yield alu.p.data_i.carry_in.eq(carry)
+ so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+ yield alu.p.data_i.so.eq(so)
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class ALUTestCase(FHDLTestCase):
+ def __init__(self, name):
+ super().__init__(name)
+ self.test_name = name
+ def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
+ tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
+ test_data.append(tc)
+
+ def test_rand(self):
+ insns = ["add", "add.", "subf"]
+ for i in range(40):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1, 2"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ initial_regs[2] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_rand_imm(self):
+ insns = ["addi", "addis", "subfic"]
+ for i in range(10):
+ choice = random.choice(insns)
+ imm = random.randint(-(1<<15), (1<<15)-1)
+ lst = [f"{choice} 3, 1, {imm}"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_adde(self):
+ lst = ["adde. 5, 6, 7"]
+ initial_regs = [0] * 32
+ initial_regs[6] = random.randint(0, (1<<64)-1)
+ initial_regs[7] = random.randint(0, (1<<64)-1)
+ initial_sprs = {}
+ xer = SelectableInt(0, 64)
+ xer[XER_bits['CA']] = 1
+ initial_sprs[special_sprs['XER']] = xer
+ self.run_tst_program(Program(lst), initial_regs, initial_sprs)
+
+ def test_cmp(self):
+ lst = ["subf. 1, 6, 7",
+ "cmp cr2, 1, 6, 7"]
+ initial_regs = [0] * 32
+ initial_regs[6] = 0x10
+ initial_regs[7] = 0x05
+ self.run_tst_program(Program(lst), initial_regs, {})
+
+ def test_extsb(self):
+ insns = ["extsb", "extsh", "extsw"]
+ for i in range(10):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_cmpeqb(self):
+ lst = ["cmpeqb cr0, 1, 2"]
+ for i in range(20):
+ initial_regs = [0] * 32
+ initial_regs[1] = i
+ initial_regs[2] = 0x01030507090b0d0f11
+ self.run_tst_program(Program(lst), initial_regs, {})
+
+ def test_ilang(self):
+ rec = CompALUOpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ alu = ALUBasePipe(pspec)
+ vl = rtlil.convert(alu, ports=alu.ports())
+ with open("pipeline.il", "w") as f:
+ f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+ def __init__(self, test_data):
+ super().__init__("run_all")
+ self.test_data = test_data
+
+ def run_all(self):
+ m = Module()
+ comb = m.d.comb
+ instruction = Signal(32)
+
+ pdecode = create_pdecode()
+
+ m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+ rec = CompALUOpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ m.submodules.alu = alu = ALUBasePipe(pspec)
+
+ comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+ comb += alu.p.valid_i.eq(1)
+ comb += alu.n.ready_i.eq(1)
+ comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+ sim = Simulator(m)
+
+ sim.add_clock(1e-6)
+ def process():
+ for test in self.test_data:
+ print(test.name)
+ program = test.program
+ self.subTest(test.name)
+ simulator = ISA(pdecode2, test.regs, test.sprs, 0)
+ gen = program.generate_instructions()
+ instructions = list(zip(gen, program.assembly.splitlines()))
+
+ index = simulator.pc.CIA.value//4
+ while index < len(instructions):
+ ins, code = instructions[index]
+
+ print("0x{:X}".format(ins & 0xffffffff))
+ print(code)
+
+ # ask the decoder to decode this binary data (endian'd)
+ yield pdecode2.dec.bigendian.eq(0) # little / big?
+ yield instruction.eq(ins) # raw binary instr.
+ yield Settle()
+ fn_unit = yield pdecode2.e.fn_unit
+ self.assertEqual(fn_unit, Function.ALU.value)
+ yield from set_alu_inputs(alu, pdecode2, simulator)
+ yield from set_extra_alu_inputs(alu, pdecode2, simulator)
+ yield
+ opname = code.split(' ')[0]
+ yield from simulator.call(opname)
+ index = simulator.pc.CIA.value//4
+
+ vld = yield alu.n.valid_o
+ while not vld:
+ yield
+ vld = yield alu.n.valid_o
+ yield
+ alu_out = yield alu.n.data_o.o
+ out_reg_valid = yield pdecode2.e.write_reg.ok
+ if out_reg_valid:
+ write_reg_idx = yield pdecode2.e.write_reg.data
+ expected = simulator.gpr(write_reg_idx).value
+ print(f"expected {expected:x}, actual: {alu_out:x}")
+ self.assertEqual(expected, alu_out)
+ yield from self.check_extra_alu_outputs(alu, pdecode2,
+ simulator, code)
+
+ sim.add_sync_process(process)
+ with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+ traces=[]):
+ sim.run()
+
+ def check_extra_alu_outputs(self, alu, dec2, sim, code):
+ rc = yield dec2.e.rc.data
+ if rc:
+ cr_expected = sim.crl[0].get_range().value
+ cr_actual = yield alu.n.data_o.cr0
+ self.assertEqual(cr_expected, cr_actual, code)
+
+ op = yield dec2.e.insn_type
+ if op == InternalOp.OP_CMP.value or \
+ op == InternalOp.OP_CMPEQB.value:
+ bf = yield dec2.dec.BF
+ cr_actual = yield alu.n.data_o.cr0
+ cr_expected = sim.crl[bf].get_range().value
+ self.assertEqual(cr_expected, cr_actual, code)
+
+
+
+if __name__ == "__main__":
+ unittest.main(exit=False)
+ suite = unittest.TestSuite()
+ suite.addTest(TestRunner(test_data))
+
+ runner = unittest.TextTestRunner()
+ runner.run(suite)
--- /dev/null
+from nmigen.hdl.rec import Record, Layout
+
+from soc.decoder.power_enums import InternalOp, Function, CryIn
+
+
+class CompBROpSubset(Record):
+ """CompBROpSubset
+
+ TODO: remove anything not needed by the Branch pipeline (determine this
+ after all branch operations have been written. see
+ https://bugs.libre-soc.org/show_bug.cgi?id=313#c3)
+
+ a copy of the relevant subset information from Decode2Execute1Type
+ needed for Branch operations. use with eq_from_execute1 (below) to
+ grab subsets.
+ """
+ def __init__(self, name=None):
+ layout = (('insn_type', InternalOp),
+ ('fn_unit', Function),
+ ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
+ #'cr = Signal(32) # NO: this is from the CR SPR
+ #'xerc = XerBits() # NO: this is from the XER SPR
+ ('lk', 1),
+ ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
+ ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
+ ('invert_a', 1),
+ ('invert_out', 1),
+ ('input_carry', CryIn),
+ ('output_carry', 1),
+ ('input_cr', 1),
+ ('output_cr', 1),
+ ('is_32bit', 1),
+ ('is_signed', 1),
+ ('insn', 32),
+ ('byte_reverse', 1),
+ ('sign_extend', 1))
+
+ Record.__init__(self, Layout(layout), name=name)
+
+ # grrr. Record does not have kwargs
+ self.insn_type.reset_less = True
+ self.fn_unit.reset_less = True
+ #self.cr = Signal(32, reset_less = True
+ #self.xerc = XerBits(
+ self.lk.reset_less = True
+ self.invert_a.reset_less = True
+ self.invert_out.reset_less = True
+ self.input_carry.reset_less = True
+ self.output_carry.reset_less = True
+ self.input_cr.reset_less = True
+ self.output_cr.reset_less = True
+ self.is_32bit.reset_less = True
+ self.is_signed.reset_less = True
+ self.byte_reverse.reset_less = True
+ self.sign_extend.reset_less = True
+
+ def eq_from_execute1(self, other):
+ """ use this to copy in from Decode2Execute1Type
+ """
+ res = []
+ for fname, sig in self.fields.items():
+ eqfrom = other.fields[fname]
+ res.append(sig.eq(eqfrom))
+ return res
+
+ def ports(self):
+ return [self.insn_type,
+ #self.cr,
+ #self.xerc,
+ self.lk,
+ self.invert_a,
+ self.invert_out,
+ self.input_carry,
+ self.output_carry,
+ self.input_cr,
+ self.output_cr,
+ self.is_32bit,
+ self.is_signed,
+ self.byte_reverse,
+ self.sign_extend,
+ ]
--- /dev/null
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import Module, Signal, Elaboratable, Mux
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.input_stage import ALUInputStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.branch.br_input_record import CompBROpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+ def __init__(self):
+ # inputs and outputs
+ pass
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ rec = CompBROpSubset()
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ comb += p.eq(AnyConst(width))
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+ m.submodules.dut = dut = ALUInputStage(pspec)
+
+ a = Signal(64)
+ b = Signal(64)
+ comb += [dut.i.a.eq(a),
+ dut.i.b.eq(b),
+ a.eq(AnyConst(64)),
+ b.eq(AnyConst(64))]
+
+ comb += dut.i.ctx.op.eq(rec)
+
+ # Assert that op gets copied from the input to output
+ for p in rec.ports():
+ name = p.name
+ rec_sig = p
+ dut_sig = getattr(dut.o.ctx.op, name)
+ comb += Assert(dut_sig == rec_sig)
+
+ with m.If(rec.invert_a):
+ comb += Assert(dut.o.a == ~a)
+ with m.Else():
+ comb += Assert(dut.o.a == a)
+
+ with m.If(rec.imm_data.imm_ok &
+ ~(rec.insn_type == InternalOp.OP_RLC)):
+ comb += Assert(dut.o.b == rec.imm_data.imm)
+ with m.Else():
+ comb += Assert(dut.o.b == b)
+
+ return m
+
+class GTCombinerTestCase(FHDLTestCase):
+ def test_formal(self):
+ module = Driver()
+ self.assertFormal(module, mode="bmc", depth=4)
+ self.assertFormal(module, mode="cover", depth=4)
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("input_stage.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+ signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.logical.main_stage import LogicalMainStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+ def __init__(self):
+ # inputs and outputs
+ pass
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ rec = CompALUOpSubset()
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ comb += p.eq(AnyConst(width))
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+ m.submodules.dut = dut = LogicalMainStage(pspec)
+
+ # convenience variables
+ a = dut.i.a
+ b = dut.i.b
+ carry_in = dut.i.carry_in
+ so_in = dut.i.so
+ carry_out = dut.o.carry_out
+ o = dut.o.o
+
+ # setup random inputs
+ comb += [a.eq(AnyConst(64)),
+ b.eq(AnyConst(64)),
+ carry_in.eq(AnyConst(1)),
+ so_in.eq(AnyConst(1))]
+
+ comb += dut.i.ctx.op.eq(rec)
+
+ # Assert that op gets copied from the input to output
+ for rec_sig in rec.ports():
+ name = rec_sig.name
+ dut_sig = getattr(dut.o.ctx.op, name)
+ comb += Assert(dut_sig == rec_sig)
+
+ # signed and signed/32 versions of input a
+ a_signed = Signal(signed(64))
+ a_signed_32 = Signal(signed(32))
+ comb += a_signed.eq(a)
+ comb += a_signed_32.eq(a[0:32])
+
+ # main assertion of arithmetic operations
+ with m.Switch(rec.insn_type):
+ with m.Case(InternalOp.OP_AND):
+ comb += Assert(dut.o.o == a & b)
+ with m.Case(InternalOp.OP_OR):
+ comb += Assert(dut.o.o == a | b)
+ with m.Case(InternalOp.OP_XOR):
+ comb += Assert(dut.o.o == a ^ b)
+
+ return m
+
+
+class LogicalTestCase(FHDLTestCase):
+ def test_formal(self):
+ module = Driver()
+ self.assertFormal(module, mode="bmc", depth=2)
+ self.assertFormal(module, mode="cover", depth=2)
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("main_stage.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# This stage is intended to adjust the input data before sending it to
+# the acutal ALU. Things like handling inverting the input, carry_in
+# generation for subtraction, and handling of immediates should happen
+# here
+from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
+ unsigned)
+from nmutil.pipemodbase import PipeModBase
+from soc.decoder.power_enums import InternalOp
+from soc.alu.pipe_data import ALUInputData
+from soc.decoder.power_enums import CryIn
+
+
+class ALUInputStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "input")
+
+ def ispec(self):
+ return ALUInputData(self.pspec)
+
+ def ospec(self):
+ return ALUInputData(self.pspec)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ ##### operand A #####
+
+ # operand a to be as-is or inverted
+ a = Signal.like(self.i.a)
+
+ with m.If(self.i.ctx.op.invert_a):
+ comb += a.eq(~self.i.a)
+ with m.Else():
+ comb += a.eq(self.i.a)
+
+ comb += self.o.a.eq(a)
+
+ ##### operand B #####
+
+ # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+ # remove this, just do self.o.b.eq(self.i.b) and move the
+ # immediate-detection into set_alu_inputs in the unit test
+ # If there's an immediate, set the B operand to that
+ comb += self.o.b.eq(self.i.b)
+
+ ##### carry-in #####
+
+ # either copy incoming carry or set to 1/0 as defined by op
+ with m.Switch(self.i.ctx.op.input_carry):
+ with m.Case(CryIn.ZERO):
+ comb += self.o.carry_in.eq(0)
+ with m.Case(CryIn.ONE):
+ comb += self.o.carry_in.eq(1)
+ with m.Case(CryIn.CA):
+ comb += self.o.carry_in.eq(self.i.carry_in)
+
+ ##### sticky overflow and context (both pass-through) #####
+
+ comb += self.o.so.eq(self.i.so)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+# This stage is intended to do most of the work of executing Logical
+# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ
+# however input and output stages also perform bit-negation on input(s)
+# and output, as well as carry and overflow generation.
+# This module however should not gate the carry or overflow, that's up
+# to the output stage
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.branch.pipe_data import BranchInputData, BranchOutputData
+from soc.decoder.power_enums import InternalOp
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+
+def br_ext(bd):
+ return Cat(Const(0, 2), bd, Repl(bd[-1], 64-(bd.shape().width + 2)))
+
+"""
+Notes on BO Field:
+
+BO Description
+0000z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=0
+0001z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=0
+001at Branch if CR[BI]=0
+0100z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=1
+0101z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=1
+011at Branch if CR[BI]=1
+1a00t Decrement the CTR, then branch if decremented CTR[M:63]!=0
+1a01t Decrement the CTR, then branch if decremented CTR[M:63]=0
+1z1zz Branch always
+"""
+
+class BranchMainStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "main")
+ self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+ self.fields.create_specs()
+
+ def ispec(self):
+ return BranchInputData(self.pspec)
+
+ def ospec(self):
+ return BranchOutputData(self.pspec) # TODO: ALUIntermediateData
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ op = self.i.ctx.op
+ lk = op.lk # see PowerDecode2 as to why this is done
+ nia_o, lr_o = self.o.nia, self.o.lr
+
+ # obtain relevant instruction fields
+ i_fields = self.fields.FormI
+ aa = Signal(i_fields.AA[0:-1].shape())
+ comb += aa.eq(i_fields.AA[0:-1])
+
+ br_imm_addr = Signal(64, reset_less=True)
+ br_addr = Signal(64, reset_less=True)
+ br_taken = Signal(reset_less=True)
+
+ # Handle absolute or relative branches
+ with m.If(aa):
+ comb += br_addr.eq(br_imm_addr)
+ with m.Else():
+ comb += br_addr.eq(br_imm_addr + self.i.cia)
+
+ # fields for conditional branches (BO and BI are same for BC and BCREG)
+ # NOTE: here, BO and BI we would like be treated as CR regfile
+ # selectors (similar to RA, RB, RS, RT). see comment here:
+ # https://bugs.libre-soc.org/show_bug.cgi?id=313#c2
+ b_fields = self.fields.FormB
+ BO = b_fields.BO[0:-1]
+ BI = b_fields.BI[0:-1]
+
+ # The bit of CR selected by BI
+ cr_bit = Signal(reset_less=True)
+ comb += cr_bit.eq((self.i.cr & (1<<(31-BI))) != 0)
+
+ # Whether the conditional branch should be taken
+ bc_taken = Signal(reset_less=True)
+ with m.If(BO[2]):
+ comb += bc_taken.eq((cr_bit == BO[3]) | BO[4])
+ with m.Else():
+ # decrement the counter and place into output
+ ctr = Signal(64, reset_less=True)
+ comb += ctr.eq(self.i.ctr - 1)
+ comb += self.o.ctr.data.eq(ctr)
+ comb += self.o.ctr.ok.eq(1)
+ # take either all 64 bits or only 32 of post-incremented counter
+ ctr_m = Signal(64, reset_less=True)
+ with m.If((op.is_32bit):
+ comb += ctr_m.eq(ctr[:32])
+ with m.Else():
+ comb += ctr_m.eq(ctr)
+ # check CTR zero/non-zero against BO[1]
+ ctr_zero_bo1 = Signal(reset_less=True) # BO[1] == (ctr==0)
+ comb += ctr_zero_bo1.eq(BO[1] ^ ctr_m.any())
+ with m.If(BO[3:5] == 0b00):
+ comb += bc_taken.eq(ctr_zero_bo1 & ~cr_bit)
+ with m.Elif(BO[3:5] == 0b01):
+ comb += bc_taken.eq(ctr_zero_bo1 & cr_bit)
+ with m.Elif(BO[4] == 1):
+ comb += bc_taken.eq(ctr_zero_bo1)
+
+ ### Main Switch Statement ###
+ with m.Switch(op.insn_type):
+ #### branch ####
+ with m.Case(InternalOp.OP_B):
+ LI = i_fields.LI[0:-1]
+ comb += br_imm_addr.eq(br_ext(LI))
+ comb += br_taken.eq(1)
+ #### branch conditional ####
+ with m.Case(InternalOp.OP_BC):
+ BD = b_fields.BD[0:-1]
+ comb += br_imm_addr.eq(br_ext(BD))
+ comb += br_taken.eq(bc_taken)
+ #### branch conditional reg ####
+ with m.Case(InternalOp.OP_BCREG):
+ comb += br_imm_addr.eq(self.i.spr1) # SPR1 is set by decode unit
+ comb += br_taken.eq(bc_taken)
+
+ ###### output next instruction address #####
+
+ comb += nia_o.data.eq(br_addr)
+ comb += nia_o.ok.eq(br_taken)
+
+ ###### link register - only activate on operations marked as "lk" #####
+
+ with m.If(lk):
+ # ctx.op.lk is the AND of the insn LK field *and* whether the
+ # op is to "listen" to the link field
+ comb += lr_o.data.eq(self.i.cia + 4)
+ comb += lr_o.ok.eq(1)
+
+ ###### and context #####
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+"""
+ Optional Register allocation listed below. mandatory input
+ (CompBROpSubset, CIA) not included.
+
+ * CR is Condition Register (not an SPR)
+ * SPR1, SPR2 and SPR3 are all from the SPR regfile. 3 ports are needed
+
+ insn CR SPR1 SPR2 SPR3
+ ---- -- ---- ---- ----
+ op_b xx xx xx xx
+ op_ba xx xx xx xx
+ op_bl xx xx xx xx
+ op_bla xx xx xx xx
+ op_bc CR, xx, CTR xx
+ op_bca CR, xx, CTR xx
+ op_bcl CR, xx, CTR xx
+ op_bcla CR, xx, CTR xx
+ op_bclr CR, LR, CTR xx
+ op_bclrl CR, LR, CTR xx
+ op_bcctr CR, xx, CTR xx
+ op_bcctrl CR, xx, CTR xx
+ op_bctar CR, TAR, CTR, xx
+ op_bctarl CR, TAR, CTR, xx
+
+ op_sc xx xx xx MSR
+ op_scv xx LR, SRR1, MSR
+ op_rfscv xx LR, CTR, MSR
+ op_rfid xx SRR0, SRR1, MSR
+ op_hrfid xx HSRR0, HSRR1, MSR
+"""
+
+from nmigen import Signal, Const
+from ieee754.fpcommon.getop import FPPipeContext
+from soc.decoder.power_decoder2 import Data
+from soc.alu.pipe_data import IntegerData
+
+
+class BranchInputData(IntegerData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+ # Note: for OP_BCREG, SPR1 will either be CTR, LR, or TAR
+ # this involves the *decode* unit selecting the register, based
+ # on detecting the operand being bcctr, bclr or bctar
+
+ self.spr1 = Signal(64, reset_less=True) # see table above, SPR1
+ self.spr2 = Signal(64, reset_less=True) # see table above, SPR2
+ self.spr3 = Signal(64, reset_less=True) # see table above, SPR3
+ self.cr = Signal(32, reset_less=True) # Condition Register(s) CR0-7
+ self.cia = Signal(64, reset_less=True) # Current Instruction Address
+
+ # convenience variables. not all of these are used at once
+ self.ctr = self.srr0 = self.hsrr0 = self.spr2
+ self.lr = self.tar = self.srr1 = self.hsrr1 = self.spr1
+ self.msr = self.spr3
+
+ def __iter__(self):
+ yield from super().__iter__()
+ yield self.spr1
+ yield self.spr2
+ yield self.spr3
+ yield self.cr
+ yield self.cia
+
+ def eq(self, i):
+ lst = super().eq(i)
+ return lst + [self.spr1.eq(i.spr1), self.spr2.eq(i.spr2),
+ self.spr3.eq(i.spr3),
+ self.cr.eq(i.cr), self.cia.eq(i.cia)]
+
+
+class BranchOutputData(IntegerData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+ self.lr = Data(64, name="lr")
+ self.spr = Data(64, name="spr")
+ self.nia = Data(64, name="nia")
+
+ # convenience variables.
+ self.ctr = self.spr
+
+ def __iter__(self):
+ yield from super().__iter__()
+ yield from self.lr
+ yield from self.spr
+ yield from self.nia
+
+ def eq(self, i):
+ lst = super().eq(i)
+ return lst + [self.lr.eq(i.lr), self.spr.eq(i.spr),
+ self.nia.eq(i.nia)]
--- /dev/null
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.branch.main_stage import BranchMainStage
+
+class BranchStages(PipeModBaseChain):
+ def get_chain(self):
+ main = BranchMainStage(self.pspec)
+ return [main]
+
+
+class BranchBasePipe(ControlBase):
+ def __init__(self, pspec):
+ ControlBase.__init__(self)
+ self.pipe1 = BranchStages(pspec)
+ self._eqs = self.connect([self.pipe1])
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+ m.submodules.pipe = self.pipe1
+ m.d.comb += self._eqs
+ return m
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.branch.pipeline import BranchBasePipe
+from soc.branch.br_input_record import CompBROpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+
+class TestCase:
+ def __init__(self, program, regs, sprs, cr, name):
+ self.program = program
+ self.regs = regs
+ self.sprs = sprs
+ self.name = name
+ self.cr = cr
+
+def get_rec_width(rec):
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ return recwidth
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class BranchTestCase(FHDLTestCase):
+ def __init__(self, name):
+ super().__init__(name)
+ self.test_name = name
+ def run_tst_program(self, prog, initial_regs=[0] * 32,
+ initial_sprs={}, initial_cr=0):
+ tc = TestCase(prog, initial_regs, initial_sprs, initial_cr,
+ self.test_name)
+ test_data.append(tc)
+
+ def test_unconditional(self):
+ choices = ["b", "ba", "bl", "bla"]
+ for i in range(20):
+ choice = random.choice(choices)
+ imm = random.randrange(-1<<23, (1<<23)-1) * 4
+ lst = [f"{choice} {imm}"]
+ initial_regs = [0] * 32
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_bc_cr(self):
+ for i in range(20):
+ bc = random.randrange(-1<<13, (1<<13)-1) * 4
+ bo = random.choice([0b01100, 0b00100, 0b10100])
+ bi = random.randrange(0, 31)
+ cr = random.randrange(0, (1<<32)-1)
+ lst = [f"bc {bo}, {bi}, {bc}"]
+ initial_regs = [0] * 32
+ self.run_tst_program(Program(lst), initial_cr=cr)
+
+ def test_bc_ctr(self):
+ for i in range(20):
+ bc = random.randrange(-1<<13, (1<<13)-1) * 4
+ bo = random.choice([0, 2, 8, 10, 16, 18])
+ bi = random.randrange(0, 31)
+ cr = random.randrange(0, (1<<32)-1)
+ ctr = random.randint(0, (1<<32)-1)
+ lst = [f"bc {bo}, {bi}, {bc}"]
+ initial_sprs={9: SelectableInt(ctr, 64)}
+ self.run_tst_program(Program(lst),
+ initial_sprs=initial_sprs,
+ initial_cr=cr)
+
+ def test_ilang(self):
+ rec = CompBROpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ alu = BranchBasePipe(pspec)
+ vl = rtlil.convert(alu, ports=alu.ports())
+ with open("logical_pipeline.il", "w") as f:
+ f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+ def __init__(self, test_data):
+ super().__init__("run_all")
+ self.test_data = test_data
+
+ def run_all(self):
+ m = Module()
+ comb = m.d.comb
+ instruction = Signal(32)
+
+ pdecode = create_pdecode()
+
+ m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+ rec = CompBROpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ m.submodules.branch = branch = BranchBasePipe(pspec)
+
+ comb += branch.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+ comb += branch.p.valid_i.eq(1)
+ comb += branch.n.ready_i.eq(1)
+ comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+ sim = Simulator(m)
+
+ sim.add_clock(1e-6)
+ def process():
+ for test in self.test_data:
+ print(test.name)
+ program = test.program
+ self.subTest(test.name)
+ simulator = ISA(pdecode2, test.regs, test.sprs, test.cr)
+ initial_cia = 0x2000
+ simulator.set_pc(initial_cia)
+ gen = program.generate_instructions()
+ instructions = list(zip(gen, program.assembly.splitlines()))
+
+ index = (simulator.pc.CIA.value - initial_cia)//4
+ while index < len(instructions) and index >= 0:
+ print(index)
+ ins, code = instructions[index]
+
+ print("0x{:X}".format(ins & 0xffffffff))
+ print(code)
+
+ # ask the decoder to decode this binary data (endian'd)
+ yield pdecode2.dec.bigendian.eq(0) # little / big?
+ yield instruction.eq(ins) # raw binary instr.
+ yield branch.p.data_i.cia.eq(simulator.pc.CIA.value)
+ yield branch.p.data_i.cr.eq(simulator.cr.get_range().value)
+ # note, here, the op will need further decoding in order
+ # to set the correct SPRs on SPR1/2/3. op_bc* require
+ # spr2 to be set to CTR, op_bctar require spr1 to be
+ # set to TAR, op_bclr* require spr1 to be set to LR.
+ # if op_sc*, op_rf* and op_hrfid are to be added here
+ # then additional op-decoding is required, accordingly
+ yield branch.p.data_i.spr2.eq(simulator.spr['CTR'].value)
+ print(f"cr0: {simulator.crl[0].get_range()}")
+ yield Settle()
+ fn_unit = yield pdecode2.e.fn_unit
+ self.assertEqual(fn_unit, Function.BRANCH.value, code)
+ yield
+ yield
+ opname = code.split(' ')[0]
+ prev_nia = simulator.pc.NIA.value
+ yield from simulator.call(opname)
+ index = (simulator.pc.CIA.value - initial_cia)//4
+
+ yield from self.assert_outputs(branch, pdecode2,
+ simulator, prev_nia, code)
+
+
+ sim.add_sync_process(process)
+ with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+ traces=[]):
+ sim.run()
+
+ def assert_outputs(self, branch, dec2, sim, prev_nia, code):
+ branch_taken = yield branch.n.data_o.nia.ok
+ sim_branch_taken = prev_nia != sim.pc.CIA
+ self.assertEqual(branch_taken, sim_branch_taken, code)
+ if branch_taken:
+ branch_addr = yield branch.n.data_o.nia.data
+ self.assertEqual(branch_addr, sim.pc.CIA.value, code)
+
+ lk = yield dec2.e.lk
+ branch_lk = yield branch.n.data_o.lr.ok
+ self.assertEqual(lk, branch_lk, code)
+ if lk:
+ branch_lr = yield branch.n.data_o.lr.data
+ self.assertEqual(sim.spr['LR'], branch_lr, code)
+
+
+if __name__ == "__main__":
+ unittest.main(exit=False)
+ suite = unittest.TestSuite()
+ suite.addTest(TestRunner(test_data))
+
+ runner = unittest.TextTestRunner()
+ runner.run(suite)
--- /dev/null
+# https://github.com/antonblanchard/microwatt/blob/master/countzero.vhdl
+from nmigen import Memory, Module, Signal, Cat, Elaboratable
+from nmigen.hdl.rec import Record, Layout
+from nmigen.cli import main
+
+
+def or4(a, b, c, d):
+ return Cat(a.any(), b.any(), c.any(), d.any())
+
+
+class IntermediateResult(Record):
+ def __init__(self, name=None):
+ layout = (('v16', 15),
+ ('sel_hi', 2),
+ ('is_32bit', 1),
+ ('count_right', 1))
+ Record.__init__(self, Layout(layout), name=name)
+
+
+class ZeroCounter(Elaboratable):
+ def __init__(self):
+ self.rs_i = Signal(64, reset_less=True)
+ self.count_right_i = Signal(1, reset_less=True)
+ self.is_32bit_i = Signal(1, reset_less=True)
+ self.result_o = Signal(64, reset_less=True)
+
+ def ports(self):
+ return [self.rs_i, self.count_right_i, self.is_32bit_i, self.result_o]
+
+ def elaborate(self, platform):
+ m = Module()
+
+ # TODO: replace this with m.submodule.pe1 = PriorityEncoder(4)
+ # m.submodule.pe2 = PriorityEncoder(4)
+ # m.submodule.pe3 = PriorityEncoder(4)
+ # etc.
+ # and where right will assign input to v and !right will assign v[::-1]
+ # so as to reverse the order of the input bits.
+
+ def encoder(v, right):
+ """
+ Return the index of the leftmost or rightmost 1 in a set of 4 bits.
+ Assumes v is not "0000"; if it is, return (right ? "11" : "00").
+ """
+ ret = Signal(2, reset_less=True)
+ with m.If(right):
+ with m.If(v[0]):
+ m.d.comb += ret.eq(0)
+ with m.Elif(v[1]):
+ m.d.comb += ret.eq(1)
+ with m.Elif(v[2]):
+ m.d.comb += ret.eq(2)
+ with m.Else():
+ m.d.comb += ret.eq(3)
+ with m.Else():
+ with m.If(v[3]):
+ m.d.comb += ret.eq(3)
+ with m.Elif(v[2]):
+ m.d.comb += ret.eq(2)
+ with m.Elif(v[1]):
+ m.d.comb += ret.eq(1)
+ with m.Else():
+ m.d.comb += ret.eq(0)
+ return ret
+
+ r = IntermediateResult()
+ r_in = IntermediateResult()
+
+ m.d.comb += r.eq(r_in) # make the module entirely combinatorial for now
+
+ v = IntermediateResult()
+ y = Signal(4, reset_less=True)
+ z = Signal(4, reset_less=True)
+ sel = Signal(6, reset_less=True)
+ v4 = Signal(4, reset_less=True)
+
+ # Test 4 groups of 16 bits each.
+ # The top 2 groups are considered to be zero in 32-bit mode.
+ m.d.comb += z.eq(or4(self.rs_i[0:16], self.rs_i[16:32],
+ self.rs_i[32:48], self.rs_i[48:64]))
+ with m.If(self.is_32bit_i):
+ m.d.comb += v.sel_hi[1].eq(0)
+ with m.If(self.count_right_i):
+ m.d.comb += v.sel_hi[0].eq(~z[0])
+ with m.Else():
+ m.d.comb += v.sel_hi[0].eq(z[1])
+ with m.Else():
+ m.d.comb += v.sel_hi.eq(encoder(z, self.count_right_i))
+
+ # Select the leftmost/rightmost non-zero group of 16 bits
+ with m.Switch(v.sel_hi):
+ with m.Case(0):
+ m.d.comb += v.v16.eq(self.rs_i[0:16])
+ with m.Case(1):
+ m.d.comb += v.v16.eq(self.rs_i[16:32])
+ with m.Case(2):
+ m.d.comb += v.v16.eq(self.rs_i[32:48])
+ with m.Case(3):
+ m.d.comb += v.v16.eq(self.rs_i[48:64])
+
+ # Latch this and do the rest in the next cycle, for the sake of timing
+ m.d.comb += v.is_32bit.eq(self.is_32bit_i)
+ m.d.comb += v.count_right.eq(self.count_right_i)
+ m.d.comb += r_in.eq(v)
+ m.d.comb += sel[4:6].eq(r.sel_hi)
+
+ # Test 4 groups of 4 bits
+ m.d.comb += y.eq(or4(r.v16[0:4], r.v16[4:8],
+ r.v16[8:12], r.v16[12:16]))
+ m.d.comb += sel[2:4].eq(encoder(y, r.count_right))
+
+ # Select the leftmost/rightmost non-zero group of 4 bits
+ with m.Switch(sel[2:4]):
+ with m.Case(0):
+ m.d.comb += v4.eq(r.v16[0:4])
+ with m.Case(1):
+ m.d.comb += v4.eq(r.v16[4:8])
+ with m.Case(2):
+ m.d.comb += v4.eq(r.v16[8:12])
+ with m.Case(3):
+ m.d.comb += v4.eq(r.v16[12:16])
+
+ m.d.comb += sel[0:2].eq(encoder(v4, r.count_right))
+
+ # sel is now the index of the leftmost/rightmost 1 bit in rs
+ o = self.result_o
+ with m.If(v4 == 0):
+ # operand is zero, return 32 for 32-bit, else 64
+ m.d.comb += o[5:7].eq(Cat(r.is_32bit, ~r.is_32bit))
+ with m.Elif(r.count_right):
+ # return (63 - sel), trimmed to 5 bits in 32-bit mode
+ m.d.comb += o.eq(Cat(~sel[0:5], ~(sel[5] | r.is_32bit)))
+ with m.Else():
+ m.d.comb += o.eq(sel)
+
+ return m
--- /dev/null
+# https://github.com/antonblanchard/microwatt/blob/master/countzero_tb.vhdl
+from nmigen import Module, Signal
+from nmigen.cli import rtlil
+from nmigen.back.pysim import Simulator, Delay
+from nmigen.test.utils import FHDLTestCase
+import unittest
+from soc.countzero.countzero import ZeroCounter
+
+
+class ZeroCounterTestCase(FHDLTestCase):
+ def test_zerocounter(self):
+ m = Module()
+ comb = m.d.comb
+ m.submodules.dut = dut = ZeroCounter()
+
+ sim = Simulator(m)
+ # sim.add_clock(1e-6)
+
+ def process():
+ print("test zero input")
+ yield dut.rs_i.eq(0)
+ yield dut.is_32bit_i.eq(0)
+ yield dut.count_right_i.eq(0)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ assert result == 0x40
+ # report "bad cntlzd 0 = " & to_hstring(result);
+ assert(result == 0x40)
+ yield dut.count_right_i.eq(1)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ # report "bad cntlzd 0 = " & to_hstring(result);
+ assert(result == 0x40)
+ yield dut.is_32bit_i.eq(1)
+ yield dut.count_right_i.eq(0)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ # report "bad cntlzw 0 = " & to_hstring(result);
+ assert(result == 0x20)
+ yield dut.count_right_i.eq(1)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ # report "bad cntlzw 0 = " & to_hstring(result);
+ assert(result == 0x20)
+ # TODO next tests
+
+ yield dut.rs_i.eq(0b00010000)
+ yield dut.is_32bit_i.eq(0)
+ yield dut.count_right_i.eq(0)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ assert result == 4, "result %d" % result
+
+ yield dut.count_right_i.eq(1)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ assert result == 59, "result %d" % result
+
+ yield dut.is_32bit_i.eq(1)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ assert result == 27, "result %d" % result
+
+ yield dut.rs_i.eq(0b1100000100000000)
+ yield dut.is_32bit_i.eq(0)
+ yield dut.count_right_i.eq(0)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ assert result == 14, "result %d" % result
+
+ yield dut.count_right_i.eq(1)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ assert result == 55, "result %d" % result
+
+ yield dut.is_32bit_i.eq(1)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ assert result == 23, "result %d" % result
+
+ yield dut.count_right_i.eq(0)
+ yield Delay(1e-6)
+ result = yield dut.result_o
+ assert result == 14, "result %d" % result
+
+
+ sim.add_process(process) # or sim.add_sync_process(process), see below
+
+ # run test and write vcd
+ fn = "genullnau"
+ with sim.write_vcd(fn+".vcd", fn+".gtkw", traces=dut.ports()):
+ sim.run()
+
+ # cntlzd_w
+ # cnttzd_w
+
+
+if __name__ == "__main__":
+
+ dut = ZeroCounter()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("countzero.il", "w") as f:
+ f.write(vl)
+
+ unittest.main()
--- /dev/null
+# This stage is intended to do Condition Register instructions
+# and output, as well as carry and overflow generation.
+# NOTE: with the exception of mtcrf and mfcr, we really should be doing
+# the field decoding which
+# selects which bits of CR are to be read / written, back in the
+# decoder / insn-isue, have both self.i.cr and self.o.cr
+# be broken down into 4-bit-wide "registers", with their
+# own "Register File" (indexed by bt, ba and bb),
+# exactly how INT regs are done (by RA, RB, RS and RT)
+# however we are pushed for time so do it as *one* register.
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.cr.pipe_data import CRInputData, CROutputData
+from soc.decoder.power_enums import InternalOp
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+
+
+class CRMainStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "main")
+ self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+ self.fields.create_specs()
+
+ def ispec(self):
+ return CRInputData(self.pspec)
+
+ def ospec(self):
+ return CROutputData(self.pspec)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ op = self.i.ctx.op
+ xl_fields = self.fields.FormXL
+ xfx_fields = self.fields.FormXFX
+ # default: cr_o remains same as cr input unless modified, below
+ cr_o = Signal.like(self.i.cr)
+ comb += cr_o.eq(self.i.cr)
+
+ ##### prepare inputs / temp #####
+
+ # Generate array for cr input so bits can be selected
+ cr_arr = Array([Signal(name=f"cr_arr_{i}") for i in range(32)])
+ for i in range(32):
+ comb += cr_arr[i].eq(self.i.cr[31-i])
+
+ # Generate array for cr output so the bit to write to can be
+ # selected by a signal
+ cr_out_arr = Array([Signal(name=f"cr_out_{i}") for i in range(32)])
+ for i in range(32):
+ comb += cr_o[31-i].eq(cr_out_arr[i])
+ comb += cr_out_arr[i].eq(cr_arr[i])
+
+ # Generate the mask for mtcrf, mtocrf, and mfocrf
+ # replicate every fxm field in the insn to 4-bit, as a mask
+ FXM = xfx_fields.FXM[0:-1]
+ mask = Signal(32, reset_less=True)
+ comb += mask.eq(Cat(*[Repl(FXM[i], 4) for i in range(8)]))
+
+ #################################
+ ##### main switch statement #####
+
+ with m.Switch(op.insn_type):
+ ##### mcrf #####
+ with m.Case(InternalOp.OP_MCRF):
+ # MCRF copies the 4 bits of crA to crB (for instance
+ # copying cr2 to cr1)
+ BF = xl_fields.BF[0:-1] # destination CR
+ BFA = xl_fields.BFA[0:-1] # source CR
+
+ for i in range(4):
+ comb += cr_out_arr[BF*4 + i].eq(cr_arr[BFA*4 + i])
+
+ ##### crand, cror, crnor etc. #####
+ with m.Case(InternalOp.OP_CROP):
+ # crand/cror and friends get decoded to the same opcode, but
+ # one of the fields inside the instruction is a 4 bit lookup
+ # table. This lookup table gets indexed by bits a and b from
+ # the CR to determine what the resulting bit should be.
+
+ # Grab the lookup table for cr_op type instructions
+ lut = Array([Signal(name=f"lut{i}") for i in range(4)])
+ # There's no field, just have to grab it directly from the insn
+ for i in range(4):
+ comb += lut[i].eq(self.i.ctx.op.insn[6+i])
+
+ # Get the bit selector fields from the instruction
+ BT = xl_fields.BT[0:-1]
+ BA = xl_fields.BA[0:-1]
+ BB = xl_fields.BB[0:-1]
+
+ # Use the two input bits to look up the result in the LUT
+ comb += cr_out_arr[BT].eq(lut[Cat(cr_arr[BB], cr_arr[BA])])
+
+ ##### mtcrf #####
+ with m.Case(InternalOp.OP_MTCRF):
+ # mtocrf and mtcrf are essentially identical
+ # put input (RA) - mask-selected - into output CR, leave
+ # rest of CR alone.
+ comb += cr_o.eq((self.i.a[0:32] & mask) | (self.i.cr & ~mask))
+
+ ##### mfcr #####
+ with m.Case(InternalOp.OP_MFCR):
+ # Ugh. mtocrf and mtcrf have one random bit differentiating
+ # them. This bit is not in any particular field, so this
+ # extracts that bit from the instruction
+ move_one = Signal(reset_less=True)
+ comb += move_one.eq(self.i.ctx.op.insn[20])
+
+ # mfocrf
+ with m.If(move_one):
+ comb += self.o.o.eq(self.i.cr & mask)
+ # mfcrf
+ with m.Else():
+ comb += self.o.o.eq(self.i.cr)
+
+ # output and context
+ comb += self.o.cr.eq(cr_o)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+from nmigen import Signal, Const
+from ieee754.fpcommon.getop import FPPipeContext
+from soc.alu.pipe_data import IntegerData
+
+
+class CRInputData(IntegerData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+ self.a = Signal(64, reset_less=True) # RA
+ self.cr = Signal(64, reset_less=True) # CR in
+
+ def __iter__(self):
+ yield from super().__iter__()
+ yield self.a
+ yield self.cr
+
+ def eq(self, i):
+ lst = super().eq(i)
+ return lst + [self.a.eq(i.a),
+ self.cr.eq(i.cr)]
+
+class CROutputData(IntegerData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+ self.o = Signal(64, reset_less=True) # RA
+ self.cr = Signal(64, reset_less=True) # CR in
+
+ def __iter__(self):
+ yield from super().__iter__()
+ yield self.o
+ yield self.cr
+
+ def eq(self, i):
+ lst = super().eq(i)
+ return lst + [self.o.eq(i.o),
+ self.cr.eq(i.cr)]
--- /dev/null
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.cr.main_stage import CRMainStage
+
+class CRStages(PipeModBaseChain):
+ def get_chain(self):
+ main = CRMainStage(self.pspec)
+ return [main]
+
+
+class CRBasePipe(ControlBase):
+ def __init__(self, pspec):
+ ControlBase.__init__(self)
+ self.pipe1 = CRStages(pspec)
+ self._eqs = self.connect([self.pipe1])
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+ m.submodules.pipe = self.pipe1
+ m.d.comb += self._eqs
+ return m
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.cr.pipeline import CRBasePipe
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+
+class TestCase:
+ def __init__(self, program, regs, sprs, cr, name):
+ self.program = program
+ self.regs = regs
+ self.sprs = sprs
+ self.name = name
+ self.cr = cr
+
+def get_rec_width(rec):
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ return recwidth
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class CRTestCase(FHDLTestCase):
+ def __init__(self, name):
+ super().__init__(name)
+ self.test_name = name
+ def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={},
+ initial_cr=0):
+ tc = TestCase(prog, initial_regs, initial_sprs, initial_cr,
+ self.test_name)
+ test_data.append(tc)
+
+ def test_crop(self):
+ insns = ["crand", "cror", "crnand", "crnor", "crxor", "creqv",
+ "crandc", "crorc"]
+ for i in range(40):
+ choice = random.choice(insns)
+ ba = random.randint(0, 31)
+ bb = random.randint(0, 31)
+ bt = random.randint(0, 31)
+ lst = [f"{choice} {ba}, {bb}, {bt}"]
+ cr = random.randint(0, 7)
+ self.run_tst_program(Program(lst), initial_cr=cr)
+
+ def test_mcrf(self):
+ lst = ["mcrf 0, 5"]
+ cr = 0xffff0000
+ self.run_tst_program(Program(lst), initial_cr=cr)
+
+ def test_mtcrf(self):
+ for i in range(20):
+ mask = random.randint(0, 255)
+ lst = [f"mtcrf {mask}, 2"]
+ cr = random.randint(0, (1<<32)-1)
+ initial_regs = [0] * 32
+ initial_regs[2] = random.randint(0, (1<<32)-1)
+ self.run_tst_program(Program(lst), initial_regs=initial_regs,
+ initial_cr=cr)
+ def test_mtocrf(self):
+ for i in range(20):
+ mask = 1<<random.randint(0, 7)
+ lst = [f"mtocrf {mask}, 2"]
+ cr = random.randint(0, (1<<32)-1)
+ initial_regs = [0] * 32
+ initial_regs[2] = random.randint(0, (1<<32)-1)
+ self.run_tst_program(Program(lst), initial_regs=initial_regs,
+ initial_cr=cr)
+
+ def test_mfcr(self):
+ for i in range(5):
+ lst = ["mfcr 2"]
+ cr = random.randint(0, (1<<32)-1)
+ self.run_tst_program(Program(lst), initial_cr=cr)
+
+ def test_mfocrf(self):
+ for i in range(20):
+ mask = 1<<random.randint(0, 7)
+ lst = [f"mfocrf 2, {mask}"]
+ cr = random.randint(0, (1<<32)-1)
+ self.run_tst_program(Program(lst), initial_cr=cr)
+
+
+ def test_ilang(self):
+ rec = CompALUOpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ alu = CRBasePipe(pspec)
+ ports = alu.ports()
+ vl = rtlil.convert(alu, ports=alu.ports())
+ with open("logical_pipeline.il", "w") as f:
+ f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+ def __init__(self, test_data):
+ super().__init__("run_all")
+ self.test_data = test_data
+
+ def set_inputs(self, alu, dec2, simulator):
+ yield alu.p.data_i.cr.eq(simulator.cr.get_range().value)
+
+ reg3_ok = yield dec2.e.read_reg3.ok
+ if reg3_ok:
+ reg3_sel = yield dec2.e.read_reg3.data
+ reg3 = simulator.gpr(reg3_sel).value
+ yield alu.p.data_i.a.eq(reg3)
+
+ def run_all(self):
+ m = Module()
+ comb = m.d.comb
+ instruction = Signal(32)
+
+ pdecode = create_pdecode()
+
+ m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+ rec = CompALUOpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ m.submodules.alu = alu = CRBasePipe(pspec)
+
+ comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+ comb += alu.p.valid_i.eq(1)
+ comb += alu.n.ready_i.eq(1)
+ comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+ sim = Simulator(m)
+
+ sim.add_clock(1e-6)
+ def process():
+ for test in self.test_data:
+ print(test.name)
+ program = test.program
+ self.subTest(test.name)
+ simulator = ISA(pdecode2, test.regs, test.sprs, test.cr)
+ gen = program.generate_instructions()
+ instructions = list(zip(gen, program.assembly.splitlines()))
+
+ index = simulator.pc.CIA.value//4
+ while index < len(instructions):
+ ins, code = instructions[index]
+
+ print("0x{:X}".format(ins & 0xffffffff))
+ print(code)
+
+ # ask the decoder to decode this binary data (endian'd)
+ yield pdecode2.dec.bigendian.eq(0) # little / big?
+ yield instruction.eq(ins) # raw binary instr.
+ yield Settle()
+ yield from self.set_inputs(alu, pdecode2, simulator)
+ fn_unit = yield pdecode2.e.fn_unit
+ self.assertEqual(fn_unit, Function.CR.value, code)
+ yield
+ opname = code.split(' ')[0]
+ yield from simulator.call(opname)
+ index = simulator.pc.CIA.value//4
+
+ vld = yield alu.n.valid_o
+ while not vld:
+ yield
+ vld = yield alu.n.valid_o
+ yield
+ cr_out = yield pdecode2.e.output_cr
+ if cr_out:
+ cr_expected = simulator.cr.get_range().value
+ cr_real = yield alu.n.data_o.cr
+ msg = f"real: {cr_expected:x}, actual: {cr_real:x}"
+ msg += " code: %s" % code
+ self.assertEqual(cr_expected, cr_real, msg)
+
+ reg_out = yield pdecode2.e.write_reg.ok
+ if reg_out:
+ reg_sel = yield pdecode2.e.write_reg.data
+ reg_data = simulator.gpr(reg_sel).value
+ output = yield alu.n.data_o.o
+ msg = f"real: {reg_data:x}, actual: {output:x}"
+ self.assertEqual(reg_data, output)
+
+ sim.add_sync_process(process)
+ with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+ traces=[]):
+ sim.run()
+ def check_extra_alu_outputs(self, alu, dec2, sim):
+ rc = yield dec2.e.rc.data
+ if rc:
+ cr_expected = sim.crl[0].get_range().value
+ cr_actual = yield alu.n.data_o.cr0
+ self.assertEqual(cr_expected, cr_actual)
+
+
+if __name__ == "__main__":
+ unittest.main(exit=False)
+ suite = unittest.TestSuite()
+ suite.addTest(TestRunner(test_data))
+
+ runner = unittest.TextTestRunner()
+ runner.run(suite)
--- /dev/null
+from nmigen import Elaboratable, Signal, Module, Repl, Cat, Const, Array
+from nmigen.cli import main
+
+
+class Bpermd(Elaboratable):
+ """This class does a Bit Permute on a Doubleword
+
+ X-form bpermd RA,RS,RB]
+
+ Eight permuted bits are produced. For each permuted bit i where i ranges
+ from 0 to 7 and for each byte i of RS, do the following. If byte i of RS
+ is less than 64, permuted bit i is setto the bit of RB specified by byte
+ i of RS; otherwise permuted bit i is set to 0. The permuted bits are
+ placed in the least-significantbyte of RA, and the remaining bits are
+ filled with 0s.
+ Special Registers Altered: None
+
+ Programming note:
+ The fact that the permuted bit is 0 if the corresponding index value
+ exceeds 63 permits the permuted bits to be selected from a 128-bit
+ quantity, using a single index register. For example, assume that the
+ 128-bit quantity Q, from which the permuted bits are to be selected, is
+ in registers r2(high-order 64 bits of Q) and r3 (low-order 64 bits of Q),
+ that the index values are in register r1, with each byte of r1 containing
+ a value in the range 0:127, and that each byte of register r4 contains
+ the value 64. The following code sequence selects eight permuted bits
+ from Q and places them into the low-order byte of r6.
+ """
+
+ def __init__(self, width):
+ self.width = width
+ self.rs = Signal(width, reset_less=True)
+ self.ra = Signal(width, reset_less=True)
+ self.rb = Signal(width, reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+ perm = Signal(self.width, reset_less=True)
+ rb64 = [Signal(1, reset_less=True, name=f"rb64_{i}") for i in range(64)]
+ for i in range(64):
+ m.d.comb += rb64[i].eq(self.rb[i])
+ rb64 = Array(rb64)
+ for i in range(8):
+ index = self.rs[8*i:8*i+8]
+ idx = Signal(8, name=f"idx_{i}", reset_less=True)
+ m.d.comb += idx.eq(index)
+ with m.If(idx < 64):
+ m.d.comb += perm[i].eq(rb64[idx])
+ m.d.comb += self.ra[0:8].eq(perm)
+ return m
+
+
+if __name__ == "__main__":
+ bperm = Bpermd(width=64)
+ main(bperm, ports=[bperm.rs, bperm.ra, bperm.rb])
--- /dev/null
+# Proof of correctness for bit permute module
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+ signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.logical.bperm import Bpermd
+
+import unittest
+
+
+# So formal verification is a little different than writing a test
+# case, as you're actually generating logic around your module to
+# check that it behaves a certain way. So here, I'm going to create a
+# module to put my formal assertions in
+class Driver(Elaboratable):
+ def __init__(self):
+ # We don't need any inputs and outputs here, so I won't
+ # declare any
+ pass
+
+ def elaborate(self, platform):
+ # standard stuff
+ m = Module()
+ comb = m.d.comb
+
+ # instantiate the device under test as a submodule
+ m.submodules.bperm = bperm = Bpermd(64)
+
+ # Grab the inputs and outputs of the DUT to make them more
+ # convenient to access
+ rs = bperm.rs
+ rb = bperm.rb
+ ra = bperm.ra
+
+ # Before we prove any properties about the DUT, we need to set
+ # up its inputs. There's a couple ways to do this, you could
+ # define some inputs and outputs for the driver module and
+ # wire them up to the DUT, but that's kind of a pain. The
+ # other option is to use AnyConst/AnySeq, which tells yosys
+ # that those inputs can take on any value.
+
+ # AnyConst should be used when the input should take on a
+ # random value, but that value should be constant throughout
+ # the test.
+ # AnySeq should be used when the input can change on every
+ # cycle
+
+ # Since this is a combinatorial circuit, it really doesn't
+ # matter which one you choose, so I chose AnyConst. If this
+ # was a sequential circuit, (especially a state machine) you'd
+ # want to use AnySeq
+ comb += [rs.eq(AnyConst(64)),
+ rb.eq(AnyConst(64))]
+
+
+ # The pseudocode in the Power ISA manual (v3.1) is as follows:
+ # do i = 0 to 7
+ # index <- RS[8*i:8*i+8]
+ # if index < 64:
+ # perm[i] <- RB[index]
+ # else:
+ # perm[i] <- 0
+ # RA <- 56'b0 || perm[0:8] # big endian though
+
+ # Looking at this, I can identify 3 properties that the bperm
+ # module should keep:
+ # 1. RA[8:64] should always equal 0
+ # 2. If RB[i*8:i*8+8] >= 64 then RA[i] should equal 0
+ # 3. If RB[i*8:i*8+8] < 64 then RA[i] should RS[index]
+
+ # Now we need to Assert that the properties above hold:
+
+ # Property 1: RA[8:64] should always equal 0
+ comb += Assert(ra[8:] == 0)
+ # Notice how we're adding Assert to comb like it's a circuit?
+ # That's because it kind of is. If you run this proof and have
+ # yosys graph the ilang, you'll be able to see an equals
+ # comparison cell feeding into an assert cell
+
+ # Now we need to prove property #2. I'm going to leave this to
+ # you Cole. I'd start by writing a for loop and extracting the
+ # 8 indices into signals. Then I'd write an if statement
+ # checking if the index is >= 64 (it's hardware, so use an
+ # m.If()). Finally, I'd add an assert that checks whether
+ # ra[i] is equal to 0
+
+
+
+ return m
+
+
+class TestCase(FHDLTestCase):
+ # This bit here is actually in charge of running the formal
+ # proof. It has nmigen spit out the ilang, and feeds it to
+ # SymbiYosys to run the proof. If the proof fails, yosys will
+ # generate a .vcd file showing how it was able to violate your
+ # assertions in proof_bperm_formal/engine_0/trace.vcd. From that
+ # you should be able to figure out what went wrong, and either
+ # correct the assertion or fix the DUT
+ def test_formal(self):
+ module = Driver()
+ # This runs a Bounded Model Check on the driver module
+ # above. What that does is it starts at some initial state,
+ # and steps it through `depth` cycles, checking that the
+ # assertions hold at every cycle. Since this is a
+ # combinatorial module, it only needs 1 cycle to prove
+ # everything.
+ self.assertFormal(module, mode="bmc", depth=2)
+ self.assertFormal(module, mode="cover", depth=2)
+
+ # As mentioned above, you can look at the graph in yosys and see
+ # all the assertion cells
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("bperm.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import Module, Signal, Elaboratable, Mux
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.alu.input_stage import ALUInputStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+ def __init__(self):
+ # inputs and outputs
+ pass
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ rec = CompALUOpSubset()
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ comb += p.eq(AnyConst(width))
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+ m.submodules.dut = dut = ALUInputStage(pspec)
+
+ a = Signal(64)
+ b = Signal(64)
+ comb += [dut.i.a.eq(a),
+ dut.i.b.eq(b),
+ a.eq(AnyConst(64)),
+ b.eq(AnyConst(64))]
+
+
+ comb += dut.i.ctx.op.eq(rec)
+
+
+ # Assert that op gets copied from the input to output
+ for p in rec.ports():
+ name = p.name
+ rec_sig = p
+ dut_sig = getattr(dut.o.ctx.op, name)
+ comb += Assert(dut_sig == rec_sig)
+
+ with m.If(rec.invert_a):
+ comb += Assert(dut.o.a == ~a)
+ with m.Else():
+ comb += Assert(dut.o.a == a)
+
+ with m.If(rec.imm_data.imm_ok &
+ ~(rec.insn_type == InternalOp.OP_RLC)):
+ comb += Assert(dut.o.b == rec.imm_data.imm)
+ with m.Else():
+ comb += Assert(dut.o.b == b)
+
+
+
+
+ return m
+
+class GTCombinerTestCase(FHDLTestCase):
+ def test_formal(self):
+ module = Driver()
+ self.assertFormal(module, mode="bmc", depth=4)
+ self.assertFormal(module, mode="cover", depth=4)
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("input_stage.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+ signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.logical.main_stage import LogicalMainStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+ def __init__(self):
+ # inputs and outputs
+ pass
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ rec = CompALUOpSubset()
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ comb += p.eq(AnyConst(width))
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+ m.submodules.dut = dut = LogicalMainStage(pspec)
+
+ # convenience variables
+ a = dut.i.a
+ b = dut.i.b
+ carry_in = dut.i.carry_in
+ so_in = dut.i.so
+ carry_out = dut.o.carry_out
+ o = dut.o.o
+
+ # setup random inputs
+ comb += [a.eq(AnyConst(64)),
+ b.eq(AnyConst(64)),
+ carry_in.eq(AnyConst(1)),
+ so_in.eq(AnyConst(1))]
+
+ comb += dut.i.ctx.op.eq(rec)
+
+ # Assert that op gets copied from the input to output
+ for rec_sig in rec.ports():
+ name = rec_sig.name
+ dut_sig = getattr(dut.o.ctx.op, name)
+ comb += Assert(dut_sig == rec_sig)
+
+ # signed and signed/32 versions of input a
+ a_signed = Signal(signed(64))
+ a_signed_32 = Signal(signed(32))
+ comb += a_signed.eq(a)
+ comb += a_signed_32.eq(a[0:32])
+
+ # main assertion of arithmetic operations
+ with m.Switch(rec.insn_type):
+ with m.Case(InternalOp.OP_AND):
+ comb += Assert(dut.o.o == a & b)
+ with m.Case(InternalOp.OP_OR):
+ comb += Assert(dut.o.o == a | b)
+ with m.Case(InternalOp.OP_XOR):
+ comb += Assert(dut.o.o == a ^ b)
+
+ return m
+
+
+class LogicalTestCase(FHDLTestCase):
+ def test_formal(self):
+ module = Driver()
+ self.assertFormal(module, mode="bmc", depth=2)
+ self.assertFormal(module, mode="cover", depth=2)
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("main_stage.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# This stage is intended to adjust the input data before sending it to
+# the acutal ALU. Things like handling inverting the input, carry_in
+# generation for subtraction, and handling of immediates should happen
+# here
+from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
+ unsigned)
+from nmutil.pipemodbase import PipeModBase
+from soc.decoder.power_enums import InternalOp
+from soc.alu.pipe_data import ALUInputData
+from soc.decoder.power_enums import CryIn
+
+
+class ALUInputStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "input")
+
+ def ispec(self):
+ return ALUInputData(self.pspec)
+
+ def ospec(self):
+ return ALUInputData(self.pspec)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ ##### operand A #####
+
+ # operand a to be as-is or inverted
+ a = Signal.like(self.i.a)
+
+ with m.If(self.i.ctx.op.invert_a):
+ comb += a.eq(~self.i.a)
+ with m.Else():
+ comb += a.eq(self.i.a)
+
+ comb += self.o.a.eq(a)
+
+ ##### operand B #####
+
+ # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+ # remove this, just do self.o.b.eq(self.i.b) and move the
+ # immediate-detection into set_alu_inputs in the unit test
+ # If there's an immediate, set the B operand to that
+ comb += self.o.b.eq(self.i.b)
+
+ ##### carry-in #####
+
+ # either copy incoming carry or set to 1/0 as defined by op
+ with m.Switch(self.i.ctx.op.input_carry):
+ with m.Case(CryIn.ZERO):
+ comb += self.o.carry_in.eq(0)
+ with m.Case(CryIn.ONE):
+ comb += self.o.carry_in.eq(1)
+ with m.Case(CryIn.CA):
+ comb += self.o.carry_in.eq(self.i.carry_in)
+
+ ##### sticky overflow and context (both pass-through) #####
+
+ comb += self.o.so.eq(self.i.so)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+# This stage is intended to do most of the work of executing Logical
+# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ
+# however input and output stages also perform bit-negation on input(s)
+# and output, as well as carry and overflow generation.
+# This module however should not gate the carry or overflow, that's up
+# to the output stage
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.logical.pipe_data import ALUInputData
+from soc.alu.pipe_data import ALUOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+from soc.countzero.countzero import ZeroCounter
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+
+
+def array_of(count, bitwidth):
+ res = []
+ for i in range(count):
+ res.append(Signal(bitwidth, reset_less=True))
+ return res
+
+
+class LogicalMainStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "main")
+ self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+ self.fields.create_specs()
+
+ def ispec(self):
+ return ALUInputData(self.pspec)
+
+ def ospec(self):
+ return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o
+
+ ##########################
+ # main switch for logic ops AND, OR and XOR, cmpb, parity, and popcount
+
+ with m.Switch(op.insn_type):
+
+ ###### AND, OR, XOR #######
+ with m.Case(InternalOp.OP_AND):
+ comb += o.eq(a & b)
+ with m.Case(InternalOp.OP_OR):
+ comb += o.eq(a | b)
+ with m.Case(InternalOp.OP_XOR):
+ comb += o.eq(a ^ b)
+
+ ###### cmpb #######
+ with m.Case(InternalOp.OP_CMPB):
+ l = []
+ for i in range(8):
+ slc = slice(i*8, (i+1)*8)
+ l.append(Repl(a[slc] == b[slc], 8))
+ comb += o.eq(Cat(*l))
+
+ ###### popcount #######
+ with m.Case(InternalOp.OP_POPCNT):
+ # starting from a, perform successive addition-reductions
+ # creating arrays big enough to store the sum, each time
+ pc = [a]
+ # QTY32 2-bit (to take 2x 1-bit sums) etc.
+ work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)]
+ for l, b in work:
+ pc.append(array_of(l, b))
+ pc8 = pc[3] # array of 8 8-bit counts (popcntb)
+ pc32 = pc[5] # array of 2 32-bit counts (popcntw)
+ popcnt = pc[-1] # array of 1 64-bit count (popcntd)
+ # cascade-tree of adds
+ for idx, (l, b) in enumerate(work):
+ for i in range(l):
+ stt, end = i*2, i*2+1
+ src, dst = pc[idx], pc[idx+1]
+ comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
+ Cat(src[end], Const(0, 1)))
+ # decode operation length
+ with m.If(op.data_len[2:4] == 0b00):
+ # popcntb - pack 8x 4-bit answers into output
+ for i in range(8):
+ comb += o[i*8:i*8+4].eq(pc8[i])
+ with m.Elif(op.data_len[3] == 0):
+ # popcntw - pack 2x 5-bit answers into output
+ for i in range(2):
+ comb += o[i*32:i*32+5].eq(pc32[i])
+ with m.Else():
+ # popcntd - put 1x 6-bit answer into output
+ comb += o.eq(popcnt[0])
+
+ ###### parity #######
+ with m.Case(InternalOp.OP_PRTY):
+ # strange instruction which XORs together the LSBs of each byte
+ par0 = Signal(reset_less=True)
+ par1 = Signal(reset_less=True)
+ comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor())
+ comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor())
+ with m.If(op.data_len[3] == 1):
+ comb += o.eq(par0 ^ par1)
+ with m.Else():
+ comb += o[0].eq(par0)
+ comb += o[32].eq(par1)
+
+ ###### cntlz #######
+ with m.Case(InternalOp.OP_CNTZ):
+ XO = self.fields.FormX.XO[0:-1]
+ m.submodules.countz = countz = ZeroCounter()
+ comb += countz.rs_i.eq(a)
+ comb += countz.is_32bit_i.eq(op.is_32bit)
+ comb += countz.count_right_i.eq(XO[-1])
+ comb += o.eq(countz.result_o)
+
+ ###### bpermd #######
+ # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt
+
+ ###### sticky overflow and context, both pass-through #####
+
+ comb += self.o.so.eq(self.i.so)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+from nmigen import Signal, Const
+from ieee754.fpcommon.getop import FPPipeContext
+from soc.alu.pipe_data import IntegerData
+
+
+class ALUInputData(IntegerData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+ self.a = Signal(64, reset_less=True) # RA
+ self.b = Signal(64, reset_less=True) # RB/immediate
+ self.so = Signal(reset_less=True)
+ self.carry_in = Signal(reset_less=True)
+
+ def __iter__(self):
+ yield from super().__iter__()
+ yield self.a
+ yield self.b
+ yield self.carry_in
+ yield self.so
+
+ def eq(self, i):
+ lst = super().eq(i)
+ return lst + [self.a.eq(i.a), self.b.eq(i.b),
+ self.carry_in.eq(i.carry_in),
+ self.so.eq(i.so)]
--- /dev/null
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.alu.input_stage import ALUInputStage
+from soc.logical.main_stage import LogicalMainStage
+from soc.alu.output_stage import ALUOutputStage
+
+class LogicalStages(PipeModBaseChain):
+ def get_chain(self):
+ inp = ALUInputStage(self.pspec)
+ main = LogicalMainStage(self.pspec)
+ out = ALUOutputStage(self.pspec)
+ return [inp, main, out]
+
+
+class LogicalBasePipe(ControlBase):
+ def __init__(self, pspec):
+ ControlBase.__init__(self)
+ self.pipe1 = LogicalStages(pspec)
+ self._eqs = self.connect([self.pipe1])
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+ m.submodules.pipe = self.pipe1
+ m.d.comb += self._eqs
+ return m
--- /dev/null
+'''Empty until I write the unit test'''
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.logical.pipeline import LogicalBasePipe
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+
+class TestCase:
+ def __init__(self, program, regs, sprs, name):
+ self.program = program
+ self.regs = regs
+ self.sprs = sprs
+ self.name = name
+
+def get_rec_width(rec):
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ return recwidth
+
+def set_alu_inputs(alu, dec2, sim):
+ # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+ # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
+ # and place it into data_i.b
+
+ reg3_ok = yield dec2.e.read_reg3.ok
+ reg1_ok = yield dec2.e.read_reg1.ok
+ assert reg3_ok != reg1_ok
+ if reg3_ok:
+ data1 = yield dec2.e.read_reg3.data
+ data1 = sim.gpr(data1).value
+ elif reg1_ok:
+ data1 = yield dec2.e.read_reg1.data
+ data1 = sim.gpr(data1).value
+ else:
+ data1 = 0
+
+ yield alu.p.data_i.a.eq(data1)
+
+ # If there's an immediate, set the B operand to that
+ reg2_ok = yield dec2.e.read_reg2.ok
+ imm_ok = yield dec2.e.imm_data.imm_ok
+ if imm_ok:
+ data2 = yield dec2.e.imm_data.imm
+ elif reg2_ok:
+ data2 = yield dec2.e.read_reg2.data
+ data2 = sim.gpr(data2).value
+ else:
+ data2 = 0
+ yield alu.p.data_i.b.eq(data2)
+
+
+
+def set_extra_alu_inputs(alu, dec2, sim):
+ carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
+ yield alu.p.data_i.carry_in.eq(carry)
+ so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+ yield alu.p.data_i.so.eq(so)
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class LogicalTestCase(FHDLTestCase):
+ def __init__(self, name):
+ super().__init__(name)
+ self.test_name = name
+ def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
+ tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
+ test_data.append(tc)
+
+ def test_rand(self):
+ insns = ["and", "or", "xor"]
+ for i in range(40):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1, 2"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ initial_regs[2] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_rand_imm_logical(self):
+ insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"]
+ for i in range(10):
+ choice = random.choice(insns)
+ imm = random.randint(0, (1<<16)-1)
+ lst = [f"{choice} 3, 1, {imm}"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ @unittest.skip("broken")
+ def test_cntz(self):
+ insns = ["cntlzd", "cnttzd"]
+ for i in range(10):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_parity(self):
+ insns = ["prtyw", "prtyd"]
+ for i in range(10):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_popcnt(self):
+ insns = ["popcntb", "popcntw", "popcntd"]
+ for i in range(10):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1"]
+ print(lst)
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_cmpb(self):
+ lst = ["cmpb 3, 1, 2"]
+ initial_regs = [0] * 32
+ initial_regs[1] = 0xdeadbeefcafec0de
+ initial_regs[2] = 0xd0adb0000afec1de
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_ilang(self):
+ rec = CompALUOpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ alu = LogicalBasePipe(pspec)
+ vl = rtlil.convert(alu, ports=alu.ports())
+ with open("logical_pipeline.il", "w") as f:
+ f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+ def __init__(self, test_data):
+ super().__init__("run_all")
+ self.test_data = test_data
+
+ def run_all(self):
+ m = Module()
+ comb = m.d.comb
+ instruction = Signal(32)
+
+ pdecode = create_pdecode()
+
+ m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+ rec = CompALUOpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ m.submodules.alu = alu = LogicalBasePipe(pspec)
+
+ comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+ comb += alu.p.valid_i.eq(1)
+ comb += alu.n.ready_i.eq(1)
+ comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+ sim = Simulator(m)
+
+ sim.add_clock(1e-6)
+ def process():
+ for test in self.test_data:
+ print(test.name)
+ program = test.program
+ self.subTest(test.name)
+ simulator = ISA(pdecode2, test.regs, test.sprs, 0)
+ gen = program.generate_instructions()
+ instructions = list(zip(gen, program.assembly.splitlines()))
+
+ index = simulator.pc.CIA.value//4
+ while index < len(instructions):
+ ins, code = instructions[index]
+
+ print("0x{:X}".format(ins & 0xffffffff))
+ print(code)
+
+ # ask the decoder to decode this binary data (endian'd)
+ yield pdecode2.dec.bigendian.eq(0) # little / big?
+ yield instruction.eq(ins) # raw binary instr.
+ yield Settle()
+ fn_unit = yield pdecode2.e.fn_unit
+ self.assertEqual(fn_unit, Function.LOGICAL.value, code)
+ yield from set_alu_inputs(alu, pdecode2, simulator)
+ yield from set_extra_alu_inputs(alu, pdecode2, simulator)
+ yield
+ opname = code.split(' ')[0]
+ yield from simulator.call(opname)
+ index = simulator.pc.CIA.value//4
+
+ vld = yield alu.n.valid_o
+ while not vld:
+ yield
+ vld = yield alu.n.valid_o
+ yield
+ alu_out = yield alu.n.data_o.o
+ out_reg_valid = yield pdecode2.e.write_reg.ok
+ if out_reg_valid:
+ write_reg_idx = yield pdecode2.e.write_reg.data
+ expected = simulator.gpr(write_reg_idx).value
+ print(f"expected {expected:x}, actual: {alu_out:x}")
+ self.assertEqual(expected, alu_out, code)
+ yield from self.check_extra_alu_outputs(alu, pdecode2,
+ simulator)
+
+ sim.add_sync_process(process)
+ with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+ traces=[]):
+ sim.run()
+ def check_extra_alu_outputs(self, alu, dec2, sim):
+ rc = yield dec2.e.rc.data
+ if rc:
+ cr_expected = sim.crl[0].get_range().value
+ cr_actual = yield alu.n.data_o.cr0
+ self.assertEqual(cr_expected, cr_actual)
+
+
+if __name__ == "__main__":
+ unittest.main(exit=False)
+ suite = unittest.TestSuite()
+ suite.addTest(TestRunner(test_data))
+
+ runner = unittest.TextTestRunner()
+ runner.run(suite)
--- /dev/null
+# Proof of correctness for partitioned equal signal combiner
+# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+
+from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
+ signed)
+from nmigen.asserts import Assert, AnyConst, Assume, Cover
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+
+from soc.shift_rot.main_stage import ShiftRotMainStage
+from soc.alu.pipe_data import ALUPipeSpec
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.decoder.power_enums import InternalOp
+import unittest
+
+
+# This defines a module to drive the device under test and assert
+# properties about its outputs
+class Driver(Elaboratable):
+ def __init__(self):
+ # inputs and outputs
+ pass
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ rec = CompALUOpSubset()
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ comb += p.eq(AnyConst(width))
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
+ m.submodules.dut = dut = ShiftRotMainStage(pspec)
+
+ # convenience variables
+ a = dut.i.rs
+ b = dut.i.rb
+ ra = dut.i.ra
+ carry_in = dut.i.carry_in
+ so_in = dut.i.so
+ carry_out = dut.o.carry_out
+ o = dut.o.o
+
+ # setup random inputs
+ comb += [a.eq(AnyConst(64)),
+ b.eq(AnyConst(64)),
+ carry_in.eq(AnyConst(1)),
+ so_in.eq(AnyConst(1))]
+
+ comb += dut.i.ctx.op.eq(rec)
+
+ # Assert that op gets copied from the input to output
+ for rec_sig in rec.ports():
+ name = rec_sig.name
+ dut_sig = getattr(dut.o.ctx.op, name)
+ comb += Assert(dut_sig == rec_sig)
+
+ # signed and signed/32 versions of input a
+ a_signed = Signal(signed(64))
+ a_signed_32 = Signal(signed(32))
+ comb += a_signed.eq(a)
+ comb += a_signed_32.eq(a[0:32])
+
+ # main assertion of arithmetic operations
+ with m.Switch(rec.insn_type):
+ with m.Case(InternalOp.OP_SHL):
+ comb += Assume(ra == 0)
+ with m.If(rec.is_32bit):
+ comb += Assert(o[0:32] == ((a << b[0:6]) & 0xffffffff))
+ comb += Assert(o[32:64] == 0)
+ with m.Else():
+ comb += Assert(o == ((a << b[0:7]) & ((1 << 64)-1)))
+ with m.Case(InternalOp.OP_SHR):
+ comb += Assume(ra == 0)
+ with m.If(~rec.is_signed):
+ with m.If(rec.is_32bit):
+ comb += Assert(o[0:32] == (a[0:32] >> b[0:6]))
+ comb += Assert(o[32:64] == 0)
+ with m.Else():
+ comb += Assert(o == (a >> b[0:7]))
+ with m.Else():
+ with m.If(rec.is_32bit):
+ comb += Assert(o[0:32] == (a_signed_32 >> b[0:6]))
+ comb += Assert(o[32:64] == Repl(a[31], 32))
+ with m.Else():
+ comb += Assert(o == (a_signed >> b[0:7]))
+
+ return m
+
+
+class ALUTestCase(FHDLTestCase):
+ def test_formal(self):
+ module = Driver()
+ self.assertFormal(module, mode="bmc", depth=2)
+ self.assertFormal(module, mode="cover", depth=2)
+ def test_ilang(self):
+ dut = Driver()
+ vl = rtlil.convert(dut, ports=[])
+ with open("main_stage.il", "w") as f:
+ f.write(vl)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# This stage is intended to adjust the input data before sending it to
+# the acutal ALU. Things like handling inverting the input, carry_in
+# generation for subtraction, and handling of immediates should happen
+# here
+from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
+ unsigned)
+from nmutil.pipemodbase import PipeModBase
+from soc.decoder.power_enums import InternalOp
+from soc.shift_rot.pipe_data import ShiftRotInputData
+from soc.decoder.power_enums import CryIn
+
+
+class ShiftRotInputStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "input")
+
+ def ispec(self):
+ return ShiftRotInputData(self.pspec)
+
+ def ospec(self):
+ return ShiftRotInputData(self.pspec)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ ##### operand A #####
+
+ # operand a to be as-is or inverted
+ a = Signal.like(self.i.ra)
+
+ with m.If(self.i.ctx.op.invert_a):
+ comb += a.eq(~self.i.ra)
+ with m.Else():
+ comb += a.eq(self.i.ra)
+
+ comb += self.o.ra.eq(a)
+ comb += self.o.rb.eq(self.i.rb)
+ comb += self.o.rs.eq(self.i.rs)
+
+
+ ##### carry-in #####
+
+ # either copy incoming carry or set to 1/0 as defined by op
+ with m.Switch(self.i.ctx.op.input_carry):
+ with m.Case(CryIn.ZERO):
+ comb += self.o.carry_in.eq(0)
+ with m.Case(CryIn.ONE):
+ comb += self.o.carry_in.eq(1)
+ with m.Case(CryIn.CA):
+ comb += self.o.carry_in.eq(self.i.carry_in)
+
+ ##### sticky overflow and context (both pass-through) #####
+
+ comb += self.o.so.eq(self.i.so)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+# This stage is intended to do most of the work of executing shift
+# instructions, as well as carry and overflow generation. This module
+# however should not gate the carry or overflow, that's up to the
+# output stage
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
+from nmutil.pipemodbase import PipeModBase
+from soc.alu.pipe_data import ALUOutputData
+from soc.shift_rot.pipe_data import ShiftRotInputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+from soc.shift_rot.rotator import Rotator
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+
+
+class ShiftRotMainStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "main")
+ self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+ self.fields.create_specs()
+
+ def ispec(self):
+ return ShiftRotInputData(self.pspec)
+
+ def ospec(self):
+ return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ # obtain me and mb fields from instruction.
+ m_fields = self.fields.instrs['M']
+ md_fields = self.fields.instrs['MD']
+ mb = Signal(m_fields['MB'][0:-1].shape())
+ me = Signal(m_fields['ME'][0:-1].shape())
+ mb_extra = Signal(1, reset_less=True)
+ comb += mb.eq(m_fields['MB'][0:-1])
+ comb += me.eq(m_fields['ME'][0:-1])
+ comb += mb_extra.eq(md_fields['mb'][0:-1][0])
+
+ # set up microwatt rotator module
+ m.submodules.rotator = rotator = Rotator()
+ comb += [
+ rotator.me.eq(me),
+ rotator.mb.eq(mb),
+ rotator.mb_extra.eq(mb_extra),
+ rotator.rs.eq(self.i.rs),
+ rotator.ra.eq(self.i.ra),
+ rotator.shift.eq(self.i.rb),
+ rotator.is_32bit.eq(self.i.ctx.op.is_32bit),
+ rotator.arith.eq(self.i.ctx.op.is_signed),
+ ]
+
+ # instruction rotate type
+ mode = Signal(3, reset_less=True)
+ with m.Switch(self.i.ctx.op.insn_type):
+ with m.Case(InternalOp.OP_SHL): comb += mode.eq(0b000)
+ with m.Case(InternalOp.OP_SHR): comb += mode.eq(0b001) # R-shift
+ with m.Case(InternalOp.OP_RLC): comb += mode.eq(0b110) # clear LR
+ with m.Case(InternalOp.OP_RLCL): comb += mode.eq(0b010) # clear L
+ with m.Case(InternalOp.OP_RLCR): comb += mode.eq(0b100) # clear R
+
+ comb += Cat(rotator.right_shift,
+ rotator.clear_left,
+ rotator.clear_right).eq(mode)
+
+ # outputs from the microwatt rotator module
+ comb += [self.o.o.eq(rotator.result_o),
+ self.o.carry_out.eq(rotator.carry_out_o)]
+
+ ###### sticky overflow and context, both pass-through #####
+
+ comb += self.o.so.eq(self.i.so)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
--- /dev/null
+from nmigen import (Elaboratable, Signal, Module)
+import math
+
+class MaskGen(Elaboratable):
+ """MaskGen - create a diff mask
+
+ example: x=5 --> a=0b11111
+ y=3 --> b=0b00111
+ o: 0b11000
+ x=2 --> a=0b00011
+ y=4 --> b=0b01111
+ o: 0b10011
+ """
+ def __init__(self, width):
+ self.width = width
+ self.shiftwidth = math.ceil(math.log2(width))
+ self.mb = Signal(self.shiftwidth, reset_less=True)
+ self.me = Signal(self.shiftwidth, reset_less=True)
+
+ self.o = Signal(width, reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ x = Signal.like(self.mb)
+ y = Signal.like(self.mb)
+
+ comb += x.eq(64 - self.mb)
+ comb += y.eq(63 - self.me)
+
+ mask_a = Signal.like(self.o)
+ mask_b = Signal.like(self.o)
+
+ comb += mask_a.eq((1<<x) - 1)
+ comb += mask_b.eq((1<<y) - 1)
+
+ with m.If(x > y):
+ comb += self.o.eq(mask_a ^ mask_b)
+ with m.Else():
+ comb += self.o.eq(mask_a ^ ~mask_b)
+
+
+ return m
+
+ def ports(self):
+ return [self.mb, self.me, self.o]
--- /dev/null
+from nmigen import Signal, Const
+from nmutil.dynamicpipe import SimpleHandshakeRedir
+from soc.alu.alu_input_record import CompALUOpSubset
+from ieee754.fpcommon.getop import FPPipeContext
+from soc.alu.pipe_data import IntegerData
+
+
+class ShiftRotInputData(IntegerData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+ self.ra = Signal(64, reset_less=True) # RA
+ self.rs = Signal(64, reset_less=True) # RS
+ self.rb = Signal(64, reset_less=True) # RB/immediate
+ self.so = Signal(reset_less=True)
+ self.carry_in = Signal(reset_less=True)
+
+ def __iter__(self):
+ yield from super().__iter__()
+ yield self.ra
+ yield self.rs
+ yield self.rb
+ yield self.carry_in
+ yield self.so
+
+ def eq(self, i):
+ lst = super().eq(i)
+ return lst + [self.rs.eq(i.rs), self.ra.eq(i.ra),
+ self.rb.eq(i.rb),
+ self.carry_in.eq(i.carry_in),
+ self.so.eq(i.so)]
--- /dev/null
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.shift_rot.input_stage import ShiftRotInputStage
+from soc.shift_rot.main_stage import ShiftRotMainStage
+from soc.alu.output_stage import ALUOutputStage
+
+class ShiftRotStages(PipeModBaseChain):
+ def get_chain(self):
+ inp = ShiftRotInputStage(self.pspec)
+ main = ShiftRotMainStage(self.pspec)
+ out = ALUOutputStage(self.pspec)
+ return [inp, main, out]
+
+
+class ShiftRotBasePipe(ControlBase):
+ def __init__(self, pspec):
+ ControlBase.__init__(self)
+ self.pipe1 = ShiftRotStages(pspec)
+ self._eqs = self.connect([self.pipe1])
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+ m.submodules.pipe = self.pipe1
+ m.d.comb += self._eqs
+ return m
--- /dev/null
+# Manual translation and adaptation of rotator.vhdl from microwatt into nmigen
+#
+
+from nmigen import (Elaboratable, Signal, Module, Const, Cat,
+ unsigned, signed)
+from soc.shift_rot.rotl import ROTL
+
+# note BE bit numbering
+def right_mask(m, mask_begin):
+ ret = Signal(64, name="right_mask", reset_less=True)
+ with m.If(mask_begin <= 64):
+ m.d.comb += ret.eq((1<<(64-mask_begin)) - 1)
+ return ret
+
+def left_mask(m, mask_end):
+ ret = Signal(64, name="left_mask", reset_less=True)
+ m.d.comb += ret.eq(~((1<<(63-mask_end)) - 1))
+ return ret
+
+
+class Rotator(Elaboratable):
+ """Rotator: covers multiple POWER9 rotate functions
+
+ supported modes:
+
+ * sl[wd]
+ * rlw*, rldic, rldicr, rldimi
+ * rldicl, sr[wd]
+ * sra[wd][i]
+
+ use as follows:
+
+ * shift = RB[0:7]
+ * arith = 1 when is_signed
+ * right_shift = 1 when insn_type is OP_SHR
+ * clear_left = 1 when insn_type is OP_RLC or OP_RLCL
+ * clear_right = 1 when insn_type is OP_RLC or OP_RLCR
+ """
+ def __init__(self):
+ # input
+ self.me = Signal(5, reset_less=True) # ME field
+ self.mb = Signal(5, reset_less=True) # MB field
+ self.mb_extra = Signal(1, reset_less=True) # extra bit of mb in MD-form
+ self.ra = Signal(64, reset_less=True) # RA
+ self.rs = Signal(64, reset_less=True) # RS
+ self.ra = Signal(64, reset_less=True) # RA
+ self.shift = Signal(7, reset_less=True) # RB[0:7]
+ self.is_32bit = Signal(reset_less=True)
+ self.right_shift = Signal(reset_less=True)
+ self.arith = Signal(reset_less=True)
+ self.clear_left = Signal(reset_less=True)
+ self.clear_right = Signal(reset_less=True)
+ # output
+ self.result_o = Signal(64, reset_less=True)
+ self.carry_out_o = Signal(reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ ra, rs = self.ra, self.rs
+
+ # temporaries
+ rot_in = Signal(64, reset_less=True)
+ rot_count = Signal(6, reset_less=True)
+ rot = Signal(64, reset_less=True)
+ sh = Signal(7, reset_less=True)
+ mb = Signal(7, reset_less=True)
+ me = Signal(7, reset_less=True)
+ mr = Signal(64, reset_less=True)
+ ml = Signal(64, reset_less=True)
+ output_mode = Signal(2, reset_less=True)
+
+ # First replicate bottom 32 bits to both halves if 32-bit
+ comb += rot_in[0:32].eq(rs[0:32])
+ with m.If(self.is_32bit):
+ comb += rot_in[32:64].eq(rs[0:32])
+ with m.Else():
+ comb += rot_in[32:64].eq(rs[32:64])
+
+ shift_signed = Signal(signed(6))
+ comb += shift_signed.eq(self.shift[0:6])
+
+ # Negate shift count for right shifts
+ with m.If(self.right_shift):
+ comb += rot_count.eq(-shift_signed)
+ with m.Else():
+ comb += rot_count.eq(self.shift[0:6])
+
+ # ROTL submodule
+ m.submodules.rotl = rotl = ROTL(64)
+ comb += rotl.a.eq(rot_in)
+ comb += rotl.b.eq(rot_count)
+ comb += rot.eq(rotl.o)
+
+ # Trim shift count to 6 bits for 32-bit shifts
+ comb += sh.eq(Cat(self.shift[0:6], self.shift[6] & ~self.is_32bit))
+
+ # XXX errr... we should already have these, in Fields? oh well
+ # Work out mask begin/end indexes (caution, big-endian bit numbering)
+
+ # mask-begin (mb)
+ with m.If(self.clear_left):
+ comb += mb.eq(self.mb)
+ with m.If(self.is_32bit):
+ comb += mb[5:7].eq(Const(0b01, 2))
+ with m.Else():
+ comb += mb[5:7].eq(Cat(self.mb_extra, Const(0b0, 1)))
+ with m.Elif(self.right_shift):
+ # this is basically mb = sh + (is_32bit? 32: 0);
+ comb += mb.eq(sh)
+ with m.If(self.is_32bit):
+ comb += mb[5:7].eq(Cat(~sh[5], sh[5]))
+ with m.Else():
+ comb += mb.eq(Cat(Const(0b0, 5), self.is_32bit, Const(0b0, 1)))
+
+ # mask-end (me)
+ with m.If(self.clear_right & self.is_32bit):
+ # TODO: track down where this is. have to use fields.
+ comb += me.eq(Cat(self.me, Const(0b01, 2)))
+ with m.Elif(self.clear_right & ~self.clear_left):
+ # this is me, have to use fields
+ comb += me.eq(Cat(self.mb, self.mb_extra, Const(0b0, 1)))
+ with m.Else():
+ # effectively, 63 - sh
+ comb += me.eq(Cat(~sh[0:6], sh[6]))
+
+ # Calculate left and right masks
+ comb += mr.eq(right_mask(m, mb))
+ comb += ml.eq(left_mask(m, me))
+
+ # Work out output mode
+ # 00 for sl[wd]
+ # 0w for rlw*, rldic, rldicr, rldimi, where w = 1 iff mb > me
+ # 10 for rldicl, sr[wd]
+ # 1z for sra[wd][i], z = 1 if rs is negative
+ with m.If((self.clear_left & ~self.clear_right) | self.right_shift):
+ comb += output_mode.eq(Cat(self.arith & rot_in[63], Const(1, 1)))
+ with m.Else():
+ mbgt = self.clear_right & (mb[0:6] > me[0:6])
+ comb += output_mode.eq(Cat(mbgt, Const(0, 1)))
+
+ # Generate output from rotated input and masks
+ with m.Switch(output_mode):
+ with m.Case(0b00):
+ comb += self.result_o.eq((rot & (mr & ml)) | (ra & ~(mr & ml)))
+ with m.Case(0b01):
+ comb += self.result_o.eq((rot & (mr | ml)) | (ra & ~(mr | ml)))
+ with m.Case(0b10):
+ comb += self.result_o.eq(rot & mr)
+ with m.Case(0b11):
+ comb += self.result_o.eq(rot | ~mr)
+ # Generate carry output for arithmetic shift right of -ve value
+ comb += self.carry_out_o.eq(rs & ~ml)
+
+ return m
+
--- /dev/null
+from nmigen import (Elaboratable, Signal, Module)
+import math
+
+class ROTL(Elaboratable):
+ def __init__(self, width):
+ self.width = width
+ self.shiftwidth = math.ceil(math.log2(width))
+ self.a = Signal(width, reset_less=True)
+ self.b = Signal(self.shiftwidth, reset_less=True)
+
+ self.o = Signal(width, reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ shl = Signal.like(self.a)
+ shr = Signal.like(self.a)
+
+ comb += shl.eq(self.a << self.b)
+ comb += shr.eq(self.a >> (self.width - self.b))
+
+ comb += self.o.eq(shl | shr)
+ return m
--- /dev/null
+from nmigen import Signal, Module
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+from soc.alu.maskgen import MaskGen
+from soc.decoder.helpers import MASK
+import random
+import unittest
+
+class MaskGenTestCase(FHDLTestCase):
+ def test_maskgen(self):
+ m = Module()
+ comb = m.d.comb
+ m.submodules.dut = dut = MaskGen(64)
+ mb = Signal.like(dut.mb)
+ me = Signal.like(dut.me)
+ o = Signal.like(dut.o)
+
+ comb += [
+ dut.mb.eq(mb),
+ dut.me.eq(me),
+ o.eq(dut.o)]
+
+ sim = Simulator(m)
+
+ def process():
+ for x in range(0, 64):
+ for y in range(0, 64):
+ yield mb.eq(x)
+ yield me.eq(y)
+ yield Delay(1e-6)
+
+ expected = MASK(x, y)
+ result = yield o
+ self.assertEqual(expected, result)
+
+ sim.add_process(process) # or sim.add_sync_process(process), see below
+ with sim.write_vcd("maskgen.vcd", "maskgen.gtkw", traces=dut.ports()):
+ sim.run()
+
+ def test_ilang(self):
+ dut = MaskGen(64)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("maskgen.il", "w") as f:
+ f.write(vl)
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.shift_rot.pipeline import ShiftRotBasePipe
+from soc.alu.alu_input_record import CompALUOpSubset
+from soc.alu.pipe_data import ALUPipeSpec
+import random
+
+class TestCase:
+ def __init__(self, program, regs, sprs, name):
+ self.program = program
+ self.regs = regs
+ self.sprs = sprs
+ self.name = name
+
+def get_rec_width(rec):
+ recwidth = 0
+ # Setup random inputs for dut.op
+ for p in rec.ports():
+ width = p.width
+ recwidth += width
+ return recwidth
+
+def set_alu_inputs(alu, dec2, sim):
+ inputs = []
+ # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+ # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
+ # and place it into data_i.b
+
+ reg3_ok = yield dec2.e.read_reg3.ok
+ if reg3_ok:
+ reg3_sel = yield dec2.e.read_reg3.data
+ data3 = sim.gpr(reg3_sel).value
+ else:
+ data3 = 0
+ reg1_ok = yield dec2.e.read_reg1.ok
+ if reg1_ok:
+ reg1_sel = yield dec2.e.read_reg1.data
+ data1 = sim.gpr(reg1_sel).value
+ else:
+ data1 = 0
+ reg2_ok = yield dec2.e.read_reg2.ok
+ imm_ok = yield dec2.e.imm_data.ok
+ if reg2_ok:
+ reg2_sel = yield dec2.e.read_reg2.data
+ data2 = sim.gpr(reg2_sel).value
+ elif imm_ok:
+ data2 = yield dec2.e.imm_data.imm
+ else:
+ data2 = 0
+
+ yield alu.p.data_i.ra.eq(data1)
+ yield alu.p.data_i.rb.eq(data2)
+ yield alu.p.data_i.rs.eq(data3)
+
+
+def set_extra_alu_inputs(alu, dec2, sim):
+ carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
+ yield alu.p.data_i.carry_in.eq(carry)
+ so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+ yield alu.p.data_i.so.eq(so)
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in ALUTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+test_data = []
+
+
+class ALUTestCase(FHDLTestCase):
+ def __init__(self, name):
+ super().__init__(name)
+ self.test_name = name
+ def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
+ tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
+ test_data.append(tc)
+
+
+ def test_shift(self):
+ insns = ["slw", "sld", "srw", "srd", "sraw", "srad"]
+ for i in range(20):
+ choice = random.choice(insns)
+ lst = [f"{choice} 3, 1, 2"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ initial_regs[2] = random.randint(0, 63)
+ print(initial_regs[1], initial_regs[2])
+ self.run_tst_program(Program(lst), initial_regs)
+
+
+ def test_shift_arith(self):
+ lst = ["sraw 3, 1, 2"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ initial_regs[2] = random.randint(0, 63)
+ print(initial_regs[1], initial_regs[2])
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_shift_once(self):
+ lst = ["slw 3, 1, 4",
+ "slw 3, 1, 2"]
+ initial_regs = [0] * 32
+ initial_regs[1] = 0x80000000
+ initial_regs[2] = 0x40
+ initial_regs[4] = 0x00
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_rlwinm(self):
+ for i in range(10):
+ mb = random.randint(0,31)
+ me = random.randint(0,31)
+ sh = random.randint(0,31)
+ lst = [f"rlwinm 3, 1, {mb}, {me}, {sh}"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_rlwimi(self):
+ lst = ["rlwimi 3, 1, 5, 20, 6"]
+ initial_regs = [0] * 32
+ initial_regs[1] = 0xdeadbeef
+ initial_regs[3] = 0x12345678
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_rlwnm(self):
+ lst = ["rlwnm 3, 1, 2, 20, 6"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ initial_regs[2] = random.randint(0, 63)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_rldicl(self):
+ lst = ["rldicl 3, 1, 5, 20"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_rldicr(self):
+ lst = ["rldicr 3, 1, 5, 20"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_rlc(self):
+ insns = ["rldic", "rldicl", "rldicr"]
+ for i in range(20):
+ choice = random.choice(insns)
+ sh = random.randint(0, 63)
+ m = random.randint(0, 63)
+ lst = [f"{choice} 3, 1, {sh}, {m}"]
+ initial_regs = [0] * 32
+ initial_regs[1] = random.randint(0, (1<<64)-1)
+ self.run_tst_program(Program(lst), initial_regs)
+
+ def test_ilang(self):
+ rec = CompALUOpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ alu = ShiftRotBasePipe(pspec)
+ vl = rtlil.convert(alu, ports=alu.ports())
+ with open("pipeline.il", "w") as f:
+ f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+ def __init__(self, test_data):
+ super().__init__("run_all")
+ self.test_data = test_data
+
+ def run_all(self):
+ m = Module()
+ comb = m.d.comb
+ instruction = Signal(32)
+
+ pdecode = create_pdecode()
+
+ m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+ rec = CompALUOpSubset()
+
+ pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
+ m.submodules.alu = alu = ShiftRotBasePipe(pspec)
+
+ comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+ comb += alu.p.valid_i.eq(1)
+ comb += alu.n.ready_i.eq(1)
+ comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+ sim = Simulator(m)
+
+ sim.add_clock(1e-6)
+ def process():
+ for test in self.test_data:
+ print(test.name)
+ program = test.program
+ self.subTest(test.name)
+ simulator = ISA(pdecode2, test.regs, test.sprs, 0)
+ gen = program.generate_instructions()
+ instructions = list(zip(gen, program.assembly.splitlines()))
+
+ index = simulator.pc.CIA.value//4
+ while index < len(instructions):
+ ins, code = instructions[index]
+
+ print("0x{:X}".format(ins & 0xffffffff))
+ print(code)
+
+ # ask the decoder to decode this binary data (endian'd)
+ yield pdecode2.dec.bigendian.eq(0) # little / big?
+ yield instruction.eq(ins) # raw binary instr.
+ yield Settle()
+ fn_unit = yield pdecode2.e.fn_unit
+ self.assertEqual(fn_unit, Function.SHIFT_ROT.value)
+ yield from set_alu_inputs(alu, pdecode2, simulator)
+ yield from set_extra_alu_inputs(alu, pdecode2, simulator)
+ yield
+ opname = code.split(' ')[0]
+ yield from simulator.call(opname)
+ index = simulator.pc.CIA.value//4
+
+ vld = yield alu.n.valid_o
+ while not vld:
+ yield
+ vld = yield alu.n.valid_o
+ yield
+ alu_out = yield alu.n.data_o.o
+ out_reg_valid = yield pdecode2.e.write_reg.ok
+ if out_reg_valid:
+ write_reg_idx = yield pdecode2.e.write_reg.data
+ expected = simulator.gpr(write_reg_idx).value
+ msg = f"expected {expected:x}, actual: {alu_out:x}"
+ self.assertEqual(expected, alu_out, msg)
+ yield from self.check_extra_alu_outputs(alu, pdecode2,
+ simulator)
+
+ sim.add_sync_process(process)
+ with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+ traces=[]):
+ sim.run()
+ def check_extra_alu_outputs(self, alu, dec2, sim):
+ rc = yield dec2.e.rc.data
+ if rc:
+ cr_expected = sim.crl[0].get_range().value
+ cr_actual = yield alu.n.data_o.cr0
+ self.assertEqual(cr_expected, cr_actual)
+
+
+if __name__ == "__main__":
+ unittest.main(exit=False)
+ suite = unittest.TestSuite()
+ suite.addTest(TestRunner(test_data))
+
+ runner = unittest.TextTestRunner()
+ runner.run(suite)
+++ /dev/null
-from nmigen.hdl.rec import Record, Layout
-
-from soc.decoder.power_enums import InternalOp, Function, CryIn
-
-
-class CompALUOpSubset(Record):
- """CompALUOpSubset
-
- a copy of the relevant subset information from Decode2Execute1Type
- needed for ALU operations. use with eq_from_execute1 (below) to
- grab subsets.
- """
- def __init__(self, name=None):
- layout = (('insn_type', InternalOp),
- ('fn_unit', Function),
- ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
- #'cr = Signal(32, reset_less=True) # NO: this is from the CR SPR
- #'xerc = XerBits() # NO: this is from the XER SPR
- ('lk', 1),
- ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
- ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
- ('invert_a', 1),
- ('invert_out', 1),
- ('input_carry', CryIn),
- ('output_carry', 1),
- ('input_cr', 1),
- ('output_cr', 1),
- ('is_32bit', 1),
- ('is_signed', 1),
- ('data_len', 4), # TODO: should be in separate CompLDSTSubset
- ('insn', 32),
- ('byte_reverse', 1),
- ('sign_extend', 1))
-
- Record.__init__(self, Layout(layout), name=name)
-
- # grrr. Record does not have kwargs
- self.insn_type.reset_less = True
- self.fn_unit.reset_less = True
- #self.cr = Signal(32, reset_less = True
- #self.xerc = XerBits(
- self.lk.reset_less = True
- self.invert_a.reset_less = True
- self.invert_out.reset_less = True
- self.input_carry.reset_less = True
- self.output_carry.reset_less = True
- self.input_cr.reset_less = True
- self.output_cr.reset_less = True
- self.is_32bit.reset_less = True
- self.is_signed.reset_less = True
- self.data_len.reset_less = True
- self.byte_reverse.reset_less = True
- self.sign_extend.reset_less = True
-
- def eq_from_execute1(self, other):
- """ use this to copy in from Decode2Execute1Type
- """
- res = []
- for fname, sig in self.fields.items():
- eqfrom = other.fields[fname]
- res.append(sig.eq(eqfrom))
- return res
-
- def ports(self):
- return [self.insn_type,
- #self.cr,
- #self.xerc,
- self.lk,
- self.invert_a,
- self.invert_out,
- self.input_carry,
- self.output_carry,
- self.input_cr,
- self.output_cr,
- self.is_32bit,
- self.is_signed,
- self.data_len,
- self.byte_reverse,
- self.sign_extend,
- ]
+++ /dev/null
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import Module, Signal, Elaboratable, Mux
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.input_stage import ALUInputStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
- def __init__(self):
- # inputs and outputs
- pass
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- rec = CompALUOpSubset()
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- comb += p.eq(AnyConst(width))
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
- m.submodules.dut = dut = ALUInputStage(pspec)
-
- a = Signal(64)
- b = Signal(64)
- comb += [dut.i.a.eq(a),
- dut.i.b.eq(b),
- a.eq(AnyConst(64)),
- b.eq(AnyConst(64))]
-
- comb += dut.i.ctx.op.eq(rec)
-
- # Assert that op gets copied from the input to output
- for p in rec.ports():
- name = p.name
- rec_sig = p
- dut_sig = getattr(dut.o.ctx.op, name)
- comb += Assert(dut_sig == rec_sig)
-
- with m.If(rec.invert_a):
- comb += Assert(dut.o.a == ~a)
- with m.Else():
- comb += Assert(dut.o.a == a)
-
- comb += Assert(dut.o.b == b)
-
- return m
-
-
-class GTCombinerTestCase(FHDLTestCase):
- def test_formal(self):
- module = Driver()
- self.assertFormal(module, mode="bmc", depth=4)
- self.assertFormal(module, mode="cover", depth=4)
- def test_ilang(self):
- dut = Driver()
- vl = rtlil.convert(dut, ports=[])
- with open("input_stage.il", "w") as f:
- f.write(vl)
-
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
- signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.main_stage import ALUMainStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
- def __init__(self):
- # inputs and outputs
- pass
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- rec = CompALUOpSubset()
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- comb += p.eq(AnyConst(width))
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
- m.submodules.dut = dut = ALUMainStage(pspec)
-
- # convenience variables
- a = dut.i.a
- b = dut.i.b
- carry_in = dut.i.carry_in
- so_in = dut.i.so
- carry_out = dut.o.carry_out
- o = dut.o.o
-
- # setup random inputs
- comb += [a.eq(AnyConst(64)),
- b.eq(AnyConst(64)),
- carry_in.eq(AnyConst(1)),
- so_in.eq(AnyConst(1))]
-
- comb += dut.i.ctx.op.eq(rec)
-
- # Assert that op gets copied from the input to output
- for rec_sig in rec.ports():
- name = rec_sig.name
- dut_sig = getattr(dut.o.ctx.op, name)
- comb += Assert(dut_sig == rec_sig)
-
- # signed and signed/32 versions of input a
- a_signed = Signal(signed(64))
- a_signed_32 = Signal(signed(32))
- comb += a_signed.eq(a)
- comb += a_signed_32.eq(a[0:32])
-
- # main assertion of arithmetic operations
- with m.Switch(rec.insn_type):
- with m.Case(InternalOp.OP_ADD):
- comb += Assert(Cat(o, carry_out) == (a + b + carry_in))
-
- return m
-
-
-class ALUTestCase(FHDLTestCase):
- def test_formal(self):
- module = Driver()
- self.assertFormal(module, mode="bmc", depth=2)
- self.assertFormal(module, mode="cover", depth=2)
- def test_ilang(self):
- dut = Driver()
- vl = rtlil.convert(dut, ports=[])
- with open("main_stage.il", "w") as f:
- f.write(vl)
-
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import Module, Signal, Elaboratable, Mux, Cat, signed
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.output_stage import ALUOutputStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
- def __init__(self):
- # inputs and outputs
- pass
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- rec = CompALUOpSubset()
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- comb += p.eq(AnyConst(width))
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
- m.submodules.dut = dut = ALUOutputStage(pspec)
-
- o = Signal(64)
- carry_out = Signal()
- carry_out32 = Signal()
- ov = Signal()
- ov32 = Signal()
- cr0 = Signal(4)
- so = Signal()
- comb += [dut.i.o.eq(o),
- dut.i.carry_out.eq(carry_out),
- dut.i.so.eq(so),
- dut.i.carry_out32.eq(carry_out32),
- dut.i.cr0.eq(cr0),
- dut.i.ov.eq(ov),
- dut.i.ov32.eq(ov32),
- o.eq(AnyConst(64)),
- carry_out.eq(AnyConst(1)),
- carry_out32.eq(AnyConst(1)),
- ov.eq(AnyConst(1)),
- ov32.eq(AnyConst(1)),
- cr0.eq(AnyConst(4)),
- so.eq(AnyConst(1))]
-
- comb += dut.i.ctx.op.eq(rec)
-
- with m.If(dut.i.ctx.op.invert_out):
- comb += Assert(dut.o.o == ~o)
- with m.Else():
- comb += Assert(dut.o.o == o)
-
- cr_out = Signal.like(cr0)
- comb += cr_out.eq(dut.o.cr0)
-
- o_signed = Signal(signed(64))
- comb += o_signed.eq(dut.o.o)
- # Assert only one of the comparison bits is set
- comb += Assert(cr_out[3] + cr_out[2] + cr_out[1] == 1)
- with m.If(o_signed == 0):
- comb += Assert(cr_out[1] == 1)
- with m.Elif(o_signed > 0):
- # sigh. see https://bugs.libre-soc.org/show_bug.cgi?id=305#c61
- # for OP_CMP we do b-a rather than a-b (just like ADD) and
- # then invert the *test condition*.
- with m.If(rec.insn_type == InternalOp.OP_CMP):
- comb += Assert(cr_out[3] == 1)
- with m.Else():
- comb += Assert(cr_out[2] == 1)
- with m.Elif(o_signed < 0):
- # ditto as above
- with m.If(rec.insn_type == InternalOp.OP_CMP):
- comb += Assert(cr_out[2] == 1)
- with m.Else():
- comb += Assert(cr_out[3] == 1)
-
-
- # Assert that op gets copied from the input to output
- for p in rec.ports():
- name = p.name
- rec_sig = p
- dut_sig = getattr(dut.o.ctx.op, name)
- comb += Assert(dut_sig == rec_sig)
-
-
- return m
-
-class GTCombinerTestCase(FHDLTestCase):
- def test_formal(self):
- module = Driver()
- self.assertFormal(module, mode="bmc", depth=4)
- self.assertFormal(module, mode="cover", depth=4)
- def test_ilang(self):
- dut = Driver()
- vl = rtlil.convert(dut, ports=[])
- with open("output_stage.il", "w") as f:
- f.write(vl)
-
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-# This stage is intended to adjust the input data before sending it to
-# the acutal ALU. Things like handling inverting the input, carry_in
-# generation for subtraction, and handling of immediates should happen
-# here
-from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
- unsigned)
-from nmutil.pipemodbase import PipeModBase
-from soc.decoder.power_enums import InternalOp
-from soc.alu.pipe_data import ALUInputData
-from soc.decoder.power_enums import CryIn
-
-
-class ALUInputStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "input")
-
- def ispec(self):
- return ALUInputData(self.pspec)
-
- def ospec(self):
- return ALUInputData(self.pspec)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- ctx = self.i.ctx
-
- ##### operand A #####
-
- # operand a to be as-is or inverted
- a = Signal.like(self.i.a)
-
- with m.If(ctx.op.invert_a):
- comb += a.eq(~self.i.a)
- with m.Else():
- comb += a.eq(self.i.a)
-
- comb += self.o.a.eq(a)
- comb += self.o.b.eq(self.i.b)
-
- ##### carry-in #####
-
- # either copy incoming carry or set to 1/0 as defined by op
- with m.Switch(ctx.op.input_carry):
- with m.Case(CryIn.ZERO):
- comb += self.o.carry_in.eq(0)
- with m.Case(CryIn.ONE):
- comb += self.o.carry_in.eq(1)
- with m.Case(CryIn.CA):
- comb += self.o.carry_in.eq(self.i.carry_in)
-
- ##### sticky overflow and context (both pass-through) #####
-
- comb += self.o.so.eq(self.i.so)
- comb += self.o.ctx.eq(ctx)
-
- return m
+++ /dev/null
-# This stage is intended to do most of the work of executing the Arithmetic
-# instructions. This would be like the additions, compares, and sign-extension
-# as well as carry and overflow generation. This module
-# however should not gate the carry or overflow, that's up to the
-# output stage
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
-from nmutil.pipemodbase import PipeModBase
-from soc.alu.pipe_data import ALUInputData, ALUOutputData
-from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-
-
-class ALUMainStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "main")
-
- def ispec(self):
- return ALUInputData(self.pspec)
-
- def ospec(self):
- return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- carry_out, o = self.o.carry_out, self.o.o
-
- # check if op is 32-bit, and get sign bit from operand a
- is_32bit = Signal(reset_less=True)
- sign_bit = Signal(reset_less=True)
- comb += is_32bit.eq(self.i.ctx.op.is_32bit)
- comb += sign_bit.eq(Mux(is_32bit, self.i.a[31], self.i.a[63]))
-
- # little trick: do the add using only one add (not 2)
- add_a = Signal(self.i.a.width + 2, reset_less=True)
- add_b = Signal(self.i.a.width + 2, reset_less=True)
- add_output = Signal(self.i.a.width + 2, reset_less=True)
- with m.If((self.i.ctx.op.insn_type == InternalOp.OP_ADD) |
- (self.i.ctx.op.insn_type == InternalOp.OP_CMP)):
- # in bit 0, 1+carry_in creates carry into bit 1 and above
- comb += add_a.eq(Cat(self.i.carry_in, self.i.a, Const(0, 1)))
- comb += add_b.eq(Cat(Const(1, 1), self.i.b, Const(0, 1)))
- comb += add_output.eq(add_a + add_b)
-
- ##########################
- # main switch-statement for handling arithmetic operations
-
- with m.Switch(self.i.ctx.op.insn_type):
- #### CMP, CMPL ####
- with m.Case(InternalOp.OP_CMP):
- # this is supposed to be inverted (b-a, not a-b)
- # however we have a trick: instead of adding either 2x 64-bit
- # MUXes to invert a and b, or messing with a 64-bit output,
- # swap +ve and -ve test in the *output* stage using an XOR gate
- comb += o.eq(add_output[1:-1])
-
- #### add ####
- with m.Case(InternalOp.OP_ADD):
- # bit 0 is not part of the result, top bit is the carry-out
- comb += o.eq(add_output[1:-1])
- comb += carry_out.eq(add_output[-1])
-
- #### exts (sign-extend) ####
- with m.Case(InternalOp.OP_EXTS):
- with m.If(self.i.ctx.op.data_len == 1):
- comb += o.eq(Cat(self.i.a[0:8], Repl(self.i.a[7], 64-8)))
- with m.If(self.i.ctx.op.data_len == 2):
- comb += o.eq(Cat(self.i.a[0:16], Repl(self.i.a[15], 64-16)))
- with m.If(self.i.ctx.op.data_len == 4):
- comb += o.eq(Cat(self.i.a[0:32], Repl(self.i.a[31], 64-32)))
- with m.Case(InternalOp.OP_CMPEQB):
- eqs = Signal(8, reset_less=True)
- src1 = Signal(8, reset_less=True)
- comb += src1.eq(self.i.a[0:8])
- for i in range(8):
- comb += eqs[i].eq(src1 == self.i.b[8*i:8*(i+1)])
- comb += self.o.cr0.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
-
- ###### sticky overflow and context, both pass-through #####
-
- comb += self.o.so.eq(self.i.so)
- comb += self.o.ctx.eq(self.i.ctx)
-
- return m
+++ /dev/null
-# This stage is intended to handle the gating of carry and overflow
-# out, summary overflow generation, and updating the condition
-# register
-from nmigen import (Module, Signal, Cat, Repl)
-from nmutil.pipemodbase import PipeModBase
-from soc.alu.pipe_data import ALUInputData, ALUOutputData
-from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-
-
-class ALUOutputStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "output")
-
- def ispec(self):
- return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
-
- def ospec(self):
- return ALUOutputData(self.pspec)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- # op requests inversion of the output
- o = Signal.like(self.i.o)
- with m.If(self.i.ctx.op.invert_out):
- comb += o.eq(~self.i.o)
- with m.Else():
- comb += o.eq(self.i.o)
-
- # create condition register cr0 and sticky-overflow
- is_zero = Signal(reset_less=True)
- is_positive = Signal(reset_less=True)
- is_negative = Signal(reset_less=True)
- msb_test = Signal(reset_less=True) # set equal to MSB, invert if OP=CMP
- is_cmp = Signal(reset_less=True) # true if OP=CMP
- so = Signal(reset_less=True)
-
- # TODO: if o[63] is XORed with "operand == OP_CMP"
- # that can be used as a test
- # see https://bugs.libre-soc.org/show_bug.cgi?id=305#c60
-
- comb += is_cmp.eq(self.i.ctx.op.insn_type == InternalOp.OP_CMP)
- comb += msb_test.eq(o[-1] ^ is_cmp)
- comb += is_zero.eq(o == 0)
- comb += is_positive.eq(~is_zero & ~msb_test)
- comb += is_negative.eq(~is_zero & msb_test)
- comb += so.eq(self.i.so | self.i.ov)
-
- comb += self.o.o.eq(o)
- with m.If(self.i.ctx.op.insn_type != InternalOp.OP_CMPEQB):
- comb += self.o.cr0.eq(Cat(so, is_zero, is_positive, is_negative))
- with m.Else():
- comb += self.o.cr0.eq(self.i.cr0)
-
- comb += self.o.so.eq(so)
-
- comb += self.o.ctx.eq(self.i.ctx)
-
- return m
+++ /dev/null
-from nmigen import Signal, Const
-from nmutil.dynamicpipe import SimpleHandshakeRedir
-from soc.alu.alu_input_record import CompALUOpSubset
-from ieee754.fpcommon.getop import FPPipeContext
-
-
-class IntegerData:
-
- def __init__(self, pspec):
- self.ctx = FPPipeContext(pspec)
- self.muxid = self.ctx.muxid
-
- def __iter__(self):
- yield from self.ctx
-
- def eq(self, i):
- return [self.ctx.eq(i.ctx)]
-
- def ports(self):
- return self.ctx.ports()
-
-
-class ALUInputData(IntegerData):
- def __init__(self, pspec):
- super().__init__(pspec)
- self.a = Signal(64, reset_less=True) # RA
- self.b = Signal(64, reset_less=True) # RB/immediate
- self.so = Signal(reset_less=True)
- self.carry_in = Signal(reset_less=True)
-
- def __iter__(self):
- yield from super().__iter__()
- yield self.a
- yield self.b
- yield self.carry_in
- yield self.so
-
- def eq(self, i):
- lst = super().eq(i)
- return lst + [self.a.eq(i.a), self.b.eq(i.b),
- self.carry_in.eq(i.carry_in),
- self.so.eq(i.so)]
-
-# TODO: ALUIntermediateData which does not have
-# cr0, ov, ov32 in it (because they are generated as outputs by
-# the final output stage, not by the intermediate stage)
-# https://bugs.libre-soc.org/show_bug.cgi?id=305#c19
-
-class ALUOutputData(IntegerData):
- def __init__(self, pspec):
- super().__init__(pspec)
- self.o = Signal(64, reset_less=True, name="stage_o")
- self.carry_out = Signal(reset_less=True)
- self.carry_out32 = Signal(reset_less=True)
- self.cr0 = Signal(4, reset_less=True)
- self.ov = Signal(reset_less=True)
- self.ov32 = Signal(reset_less=True)
- self.so = Signal(reset_less=True)
-
- def __iter__(self):
- yield from super().__iter__()
- yield self.o
- yield self.carry_out
- yield self.carry_out32
- yield self.cr0
- yield self.ov
- yield self.ov32
- yield self.so
-
- def eq(self, i):
- lst = super().eq(i)
- return lst + [self.o.eq(i.o),
- self.carry_out.eq(i.carry_out),
- self.carry_out32.eq(i.carry_out32),
- self.cr0.eq(i.cr0), self.ov.eq(i.ov),
- self.ov32.eq(i.ov32), self.so.eq(i.so)]
-
-
-class IntPipeSpec:
- def __init__(self, id_wid=2, op_wid=1):
- self.id_wid = id_wid
- self.op_wid = op_wid
- self.opkls = lambda _: CompALUOpSubset(name="op")
- self.stage = None
-
-
-class ALUPipeSpec(IntPipeSpec):
- def __init__(self, id_wid, op_wid):
- super().__init__(id_wid, op_wid)
- self.pipekls = SimpleHandshakeRedir
+++ /dev/null
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.alu.input_stage import ALUInputStage
-from soc.alu.main_stage import ALUMainStage
-from soc.alu.output_stage import ALUOutputStage
-
-class ALUStages(PipeModBaseChain):
- def get_chain(self):
- inp = ALUInputStage(self.pspec)
- main = ALUMainStage(self.pspec)
- out = ALUOutputStage(self.pspec)
- return [inp, main, out]
-
-
-class ALUBasePipe(ControlBase):
- def __init__(self, pspec):
- ControlBase.__init__(self)
- self.pipe1 = ALUStages(pspec)
- self._eqs = self.connect([self.pipe1])
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
- m.submodules.pipe = self.pipe1
- m.d.comb += self._eqs
- return m
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function, InternalOp)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-
-
-from soc.alu.pipeline import ALUBasePipe
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.alu.pipe_data import ALUPipeSpec
-import random
-
-class TestCase:
- def __init__(self, program, regs, sprs, name):
- self.program = program
- self.regs = regs
- self.sprs = sprs
- self.name = name
-
-def get_rec_width(rec):
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- return recwidth
-
-def set_alu_inputs(alu, dec2, sim):
- # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
- # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
- # and place it into data_i.b
-
- reg3_ok = yield dec2.e.read_reg3.ok
- reg1_ok = yield dec2.e.read_reg1.ok
- assert reg3_ok != reg1_ok
- if reg3_ok:
- data1 = yield dec2.e.read_reg3.data
- data1 = sim.gpr(data1).value
- elif reg1_ok:
- data1 = yield dec2.e.read_reg1.data
- data1 = sim.gpr(data1).value
- else:
- data1 = 0
-
- yield alu.p.data_i.a.eq(data1)
-
- # If there's an immediate, set the B operand to that
- reg2_ok = yield dec2.e.read_reg2.ok
- imm_ok = yield dec2.e.imm_data.imm_ok
- if imm_ok:
- data2 = yield dec2.e.imm_data.imm
- elif reg2_ok:
- data2 = yield dec2.e.read_reg2.data
- data2 = sim.gpr(data2).value
- else:
- data2 = 0
- yield alu.p.data_i.b.eq(data2)
-
-
-
-def set_extra_alu_inputs(alu, dec2, sim):
- carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
- yield alu.p.data_i.carry_in.eq(carry)
- so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
- yield alu.p.data_i.so.eq(so)
-
-
-# This test bench is a bit different than is usual. Initially when I
-# was writing it, I had all of the tests call a function to create a
-# device under test and simulator, initialize the dut, run the
-# simulation for ~2 cycles, and assert that the dut output what it
-# should have. However, this was really slow, since it needed to
-# create and tear down the dut and simulator for every test case.
-
-# Now, instead of doing that, every test case in ALUTestCase puts some
-# data into the test_data list below, describing the instructions to
-# be tested and the initial state. Once all the tests have been run,
-# test_data gets passed to TestRunner which then sets up the DUT and
-# simulator once, runs all the data through it, and asserts that the
-# results match the pseudocode sim at every cycle.
-
-# By doing this, I've reduced the time it takes to run the test suite
-# massively. Before, it took around 1 minute on my computer, now it
-# takes around 3 seconds
-
-test_data = []
-
-
-class ALUTestCase(FHDLTestCase):
- def __init__(self, name):
- super().__init__(name)
- self.test_name = name
- def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
- tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
- test_data.append(tc)
-
- def test_rand(self):
- insns = ["add", "add.", "subf"]
- for i in range(40):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1, 2"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- initial_regs[2] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_rand_imm(self):
- insns = ["addi", "addis", "subfic"]
- for i in range(10):
- choice = random.choice(insns)
- imm = random.randint(-(1<<15), (1<<15)-1)
- lst = [f"{choice} 3, 1, {imm}"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_adde(self):
- lst = ["adde. 5, 6, 7"]
- initial_regs = [0] * 32
- initial_regs[6] = random.randint(0, (1<<64)-1)
- initial_regs[7] = random.randint(0, (1<<64)-1)
- initial_sprs = {}
- xer = SelectableInt(0, 64)
- xer[XER_bits['CA']] = 1
- initial_sprs[special_sprs['XER']] = xer
- self.run_tst_program(Program(lst), initial_regs, initial_sprs)
-
- def test_cmp(self):
- lst = ["subf. 1, 6, 7",
- "cmp cr2, 1, 6, 7"]
- initial_regs = [0] * 32
- initial_regs[6] = 0x10
- initial_regs[7] = 0x05
- self.run_tst_program(Program(lst), initial_regs, {})
-
- def test_extsb(self):
- insns = ["extsb", "extsh", "extsw"]
- for i in range(10):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_cmpeqb(self):
- lst = ["cmpeqb cr0, 1, 2"]
- for i in range(20):
- initial_regs = [0] * 32
- initial_regs[1] = i
- initial_regs[2] = 0x01030507090b0d0f11
- self.run_tst_program(Program(lst), initial_regs, {})
-
- def test_ilang(self):
- rec = CompALUOpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- alu = ALUBasePipe(pspec)
- vl = rtlil.convert(alu, ports=alu.ports())
- with open("pipeline.il", "w") as f:
- f.write(vl)
-
-
-class TestRunner(FHDLTestCase):
- def __init__(self, test_data):
- super().__init__("run_all")
- self.test_data = test_data
-
- def run_all(self):
- m = Module()
- comb = m.d.comb
- instruction = Signal(32)
-
- pdecode = create_pdecode()
-
- m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
-
- rec = CompALUOpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- m.submodules.alu = alu = ALUBasePipe(pspec)
-
- comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
- comb += alu.p.valid_i.eq(1)
- comb += alu.n.ready_i.eq(1)
- comb += pdecode2.dec.raw_opcode_in.eq(instruction)
- sim = Simulator(m)
-
- sim.add_clock(1e-6)
- def process():
- for test in self.test_data:
- print(test.name)
- program = test.program
- self.subTest(test.name)
- simulator = ISA(pdecode2, test.regs, test.sprs, 0)
- gen = program.generate_instructions()
- instructions = list(zip(gen, program.assembly.splitlines()))
-
- index = simulator.pc.CIA.value//4
- while index < len(instructions):
- ins, code = instructions[index]
-
- print("0x{:X}".format(ins & 0xffffffff))
- print(code)
-
- # ask the decoder to decode this binary data (endian'd)
- yield pdecode2.dec.bigendian.eq(0) # little / big?
- yield instruction.eq(ins) # raw binary instr.
- yield Settle()
- fn_unit = yield pdecode2.e.fn_unit
- self.assertEqual(fn_unit, Function.ALU.value)
- yield from set_alu_inputs(alu, pdecode2, simulator)
- yield from set_extra_alu_inputs(alu, pdecode2, simulator)
- yield
- opname = code.split(' ')[0]
- yield from simulator.call(opname)
- index = simulator.pc.CIA.value//4
-
- vld = yield alu.n.valid_o
- while not vld:
- yield
- vld = yield alu.n.valid_o
- yield
- alu_out = yield alu.n.data_o.o
- out_reg_valid = yield pdecode2.e.write_reg.ok
- if out_reg_valid:
- write_reg_idx = yield pdecode2.e.write_reg.data
- expected = simulator.gpr(write_reg_idx).value
- print(f"expected {expected:x}, actual: {alu_out:x}")
- self.assertEqual(expected, alu_out)
- yield from self.check_extra_alu_outputs(alu, pdecode2,
- simulator, code)
-
- sim.add_sync_process(process)
- with sim.write_vcd("simulator.vcd", "simulator.gtkw",
- traces=[]):
- sim.run()
-
- def check_extra_alu_outputs(self, alu, dec2, sim, code):
- rc = yield dec2.e.rc.data
- if rc:
- cr_expected = sim.crl[0].get_range().value
- cr_actual = yield alu.n.data_o.cr0
- self.assertEqual(cr_expected, cr_actual, code)
-
- op = yield dec2.e.insn_type
- if op == InternalOp.OP_CMP.value or \
- op == InternalOp.OP_CMPEQB.value:
- bf = yield dec2.dec.BF
- cr_actual = yield alu.n.data_o.cr0
- cr_expected = sim.crl[bf].get_range().value
- self.assertEqual(cr_expected, cr_actual, code)
-
-
-
-if __name__ == "__main__":
- unittest.main(exit=False)
- suite = unittest.TestSuite()
- suite.addTest(TestRunner(test_data))
-
- runner = unittest.TextTestRunner()
- runner.run(suite)
+++ /dev/null
-from nmigen.hdl.rec import Record, Layout
-
-from soc.decoder.power_enums import InternalOp, Function, CryIn
-
-
-class CompBROpSubset(Record):
- """CompBROpSubset
-
- TODO: remove anything not needed by the Branch pipeline (determine this
- after all branch operations have been written. see
- https://bugs.libre-soc.org/show_bug.cgi?id=313#c3)
-
- a copy of the relevant subset information from Decode2Execute1Type
- needed for Branch operations. use with eq_from_execute1 (below) to
- grab subsets.
- """
- def __init__(self, name=None):
- layout = (('insn_type', InternalOp),
- ('fn_unit', Function),
- ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
- #'cr = Signal(32) # NO: this is from the CR SPR
- #'xerc = XerBits() # NO: this is from the XER SPR
- ('lk', 1),
- ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
- ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
- ('invert_a', 1),
- ('invert_out', 1),
- ('input_carry', CryIn),
- ('output_carry', 1),
- ('input_cr', 1),
- ('output_cr', 1),
- ('is_32bit', 1),
- ('is_signed', 1),
- ('insn', 32),
- ('byte_reverse', 1),
- ('sign_extend', 1))
-
- Record.__init__(self, Layout(layout), name=name)
-
- # grrr. Record does not have kwargs
- self.insn_type.reset_less = True
- self.fn_unit.reset_less = True
- #self.cr = Signal(32, reset_less = True
- #self.xerc = XerBits(
- self.lk.reset_less = True
- self.invert_a.reset_less = True
- self.invert_out.reset_less = True
- self.input_carry.reset_less = True
- self.output_carry.reset_less = True
- self.input_cr.reset_less = True
- self.output_cr.reset_less = True
- self.is_32bit.reset_less = True
- self.is_signed.reset_less = True
- self.byte_reverse.reset_less = True
- self.sign_extend.reset_less = True
-
- def eq_from_execute1(self, other):
- """ use this to copy in from Decode2Execute1Type
- """
- res = []
- for fname, sig in self.fields.items():
- eqfrom = other.fields[fname]
- res.append(sig.eq(eqfrom))
- return res
-
- def ports(self):
- return [self.insn_type,
- #self.cr,
- #self.xerc,
- self.lk,
- self.invert_a,
- self.invert_out,
- self.input_carry,
- self.output_carry,
- self.input_cr,
- self.output_cr,
- self.is_32bit,
- self.is_signed,
- self.byte_reverse,
- self.sign_extend,
- ]
+++ /dev/null
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import Module, Signal, Elaboratable, Mux
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.input_stage import ALUInputStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.branch.br_input_record import CompBROpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
- def __init__(self):
- # inputs and outputs
- pass
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- rec = CompBROpSubset()
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- comb += p.eq(AnyConst(width))
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
- m.submodules.dut = dut = ALUInputStage(pspec)
-
- a = Signal(64)
- b = Signal(64)
- comb += [dut.i.a.eq(a),
- dut.i.b.eq(b),
- a.eq(AnyConst(64)),
- b.eq(AnyConst(64))]
-
- comb += dut.i.ctx.op.eq(rec)
-
- # Assert that op gets copied from the input to output
- for p in rec.ports():
- name = p.name
- rec_sig = p
- dut_sig = getattr(dut.o.ctx.op, name)
- comb += Assert(dut_sig == rec_sig)
-
- with m.If(rec.invert_a):
- comb += Assert(dut.o.a == ~a)
- with m.Else():
- comb += Assert(dut.o.a == a)
-
- with m.If(rec.imm_data.imm_ok &
- ~(rec.insn_type == InternalOp.OP_RLC)):
- comb += Assert(dut.o.b == rec.imm_data.imm)
- with m.Else():
- comb += Assert(dut.o.b == b)
-
- return m
-
-class GTCombinerTestCase(FHDLTestCase):
- def test_formal(self):
- module = Driver()
- self.assertFormal(module, mode="bmc", depth=4)
- self.assertFormal(module, mode="cover", depth=4)
- def test_ilang(self):
- dut = Driver()
- vl = rtlil.convert(dut, ports=[])
- with open("input_stage.il", "w") as f:
- f.write(vl)
-
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
- signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.logical.main_stage import LogicalMainStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
- def __init__(self):
- # inputs and outputs
- pass
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- rec = CompALUOpSubset()
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- comb += p.eq(AnyConst(width))
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
- m.submodules.dut = dut = LogicalMainStage(pspec)
-
- # convenience variables
- a = dut.i.a
- b = dut.i.b
- carry_in = dut.i.carry_in
- so_in = dut.i.so
- carry_out = dut.o.carry_out
- o = dut.o.o
-
- # setup random inputs
- comb += [a.eq(AnyConst(64)),
- b.eq(AnyConst(64)),
- carry_in.eq(AnyConst(1)),
- so_in.eq(AnyConst(1))]
-
- comb += dut.i.ctx.op.eq(rec)
-
- # Assert that op gets copied from the input to output
- for rec_sig in rec.ports():
- name = rec_sig.name
- dut_sig = getattr(dut.o.ctx.op, name)
- comb += Assert(dut_sig == rec_sig)
-
- # signed and signed/32 versions of input a
- a_signed = Signal(signed(64))
- a_signed_32 = Signal(signed(32))
- comb += a_signed.eq(a)
- comb += a_signed_32.eq(a[0:32])
-
- # main assertion of arithmetic operations
- with m.Switch(rec.insn_type):
- with m.Case(InternalOp.OP_AND):
- comb += Assert(dut.o.o == a & b)
- with m.Case(InternalOp.OP_OR):
- comb += Assert(dut.o.o == a | b)
- with m.Case(InternalOp.OP_XOR):
- comb += Assert(dut.o.o == a ^ b)
-
- return m
-
-
-class LogicalTestCase(FHDLTestCase):
- def test_formal(self):
- module = Driver()
- self.assertFormal(module, mode="bmc", depth=2)
- self.assertFormal(module, mode="cover", depth=2)
- def test_ilang(self):
- dut = Driver()
- vl = rtlil.convert(dut, ports=[])
- with open("main_stage.il", "w") as f:
- f.write(vl)
-
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-# This stage is intended to adjust the input data before sending it to
-# the acutal ALU. Things like handling inverting the input, carry_in
-# generation for subtraction, and handling of immediates should happen
-# here
-from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
- unsigned)
-from nmutil.pipemodbase import PipeModBase
-from soc.decoder.power_enums import InternalOp
-from soc.alu.pipe_data import ALUInputData
-from soc.decoder.power_enums import CryIn
-
-
-class ALUInputStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "input")
-
- def ispec(self):
- return ALUInputData(self.pspec)
-
- def ospec(self):
- return ALUInputData(self.pspec)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- ##### operand A #####
-
- # operand a to be as-is or inverted
- a = Signal.like(self.i.a)
-
- with m.If(self.i.ctx.op.invert_a):
- comb += a.eq(~self.i.a)
- with m.Else():
- comb += a.eq(self.i.a)
-
- comb += self.o.a.eq(a)
-
- ##### operand B #####
-
- # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
- # remove this, just do self.o.b.eq(self.i.b) and move the
- # immediate-detection into set_alu_inputs in the unit test
- # If there's an immediate, set the B operand to that
- comb += self.o.b.eq(self.i.b)
-
- ##### carry-in #####
-
- # either copy incoming carry or set to 1/0 as defined by op
- with m.Switch(self.i.ctx.op.input_carry):
- with m.Case(CryIn.ZERO):
- comb += self.o.carry_in.eq(0)
- with m.Case(CryIn.ONE):
- comb += self.o.carry_in.eq(1)
- with m.Case(CryIn.CA):
- comb += self.o.carry_in.eq(self.i.carry_in)
-
- ##### sticky overflow and context (both pass-through) #####
-
- comb += self.o.so.eq(self.i.so)
- comb += self.o.ctx.eq(self.i.ctx)
-
- return m
+++ /dev/null
-# This stage is intended to do most of the work of executing Logical
-# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ
-# however input and output stages also perform bit-negation on input(s)
-# and output, as well as carry and overflow generation.
-# This module however should not gate the carry or overflow, that's up
-# to the output stage
-
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
-from nmutil.pipemodbase import PipeModBase
-from soc.branch.pipe_data import BranchInputData, BranchOutputData
-from soc.decoder.power_enums import InternalOp
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
-
-def br_ext(bd):
- return Cat(Const(0, 2), bd, Repl(bd[-1], 64-(bd.shape().width + 2)))
-
-"""
-Notes on BO Field:
-
-BO Description
-0000z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=0
-0001z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=0
-001at Branch if CR[BI]=0
-0100z Decrement the CTR, then branch if decremented CTR[M:63]!=0 and CR[BI]=1
-0101z Decrement the CTR, then branch if decremented CTR[M:63]=0 and CR[BI]=1
-011at Branch if CR[BI]=1
-1a00t Decrement the CTR, then branch if decremented CTR[M:63]!=0
-1a01t Decrement the CTR, then branch if decremented CTR[M:63]=0
-1z1zz Branch always
-"""
-
-class BranchMainStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "main")
- self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
- self.fields.create_specs()
-
- def ispec(self):
- return BranchInputData(self.pspec)
-
- def ospec(self):
- return BranchOutputData(self.pspec) # TODO: ALUIntermediateData
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- op = self.i.ctx.op
- lk = op.lk # see PowerDecode2 as to why this is done
- nia_o, lr_o = self.o.nia, self.o.lr
-
- # obtain relevant instruction fields
- i_fields = self.fields.FormI
- aa = Signal(i_fields.AA[0:-1].shape())
- comb += aa.eq(i_fields.AA[0:-1])
-
- br_imm_addr = Signal(64, reset_less=True)
- br_addr = Signal(64, reset_less=True)
- br_taken = Signal(reset_less=True)
-
- # Handle absolute or relative branches
- with m.If(aa):
- comb += br_addr.eq(br_imm_addr)
- with m.Else():
- comb += br_addr.eq(br_imm_addr + self.i.cia)
-
- # fields for conditional branches (BO and BI are same for BC and BCREG)
- # NOTE: here, BO and BI we would like be treated as CR regfile
- # selectors (similar to RA, RB, RS, RT). see comment here:
- # https://bugs.libre-soc.org/show_bug.cgi?id=313#c2
- b_fields = self.fields.FormB
- BO = b_fields.BO[0:-1]
- BI = b_fields.BI[0:-1]
-
- # The bit of CR selected by BI
- cr_bit = Signal(reset_less=True)
- comb += cr_bit.eq((self.i.cr & (1<<(31-BI))) != 0)
-
- # Whether the conditional branch should be taken
- bc_taken = Signal(reset_less=True)
- with m.If(BO[2]):
- comb += bc_taken.eq((cr_bit == BO[3]) | BO[4])
- with m.Else():
- # decrement the counter and place into output
- ctr = Signal(64, reset_less=True)
- comb += ctr.eq(self.i.ctr - 1)
- comb += self.o.ctr.data.eq(ctr)
- comb += self.o.ctr.ok.eq(1)
- # take either all 64 bits or only 32 of post-incremented counter
- ctr_m = Signal(64, reset_less=True)
- with m.If((op.is_32bit):
- comb += ctr_m.eq(ctr[:32])
- with m.Else():
- comb += ctr_m.eq(ctr)
- # check CTR zero/non-zero against BO[1]
- ctr_zero_bo1 = Signal(reset_less=True) # BO[1] == (ctr==0)
- comb += ctr_zero_bo1.eq(BO[1] ^ ctr_m.any())
- with m.If(BO[3:5] == 0b00):
- comb += bc_taken.eq(ctr_zero_bo1 & ~cr_bit)
- with m.Elif(BO[3:5] == 0b01):
- comb += bc_taken.eq(ctr_zero_bo1 & cr_bit)
- with m.Elif(BO[4] == 1):
- comb += bc_taken.eq(ctr_zero_bo1)
-
- ### Main Switch Statement ###
- with m.Switch(op.insn_type):
- #### branch ####
- with m.Case(InternalOp.OP_B):
- LI = i_fields.LI[0:-1]
- comb += br_imm_addr.eq(br_ext(LI))
- comb += br_taken.eq(1)
- #### branch conditional ####
- with m.Case(InternalOp.OP_BC):
- BD = b_fields.BD[0:-1]
- comb += br_imm_addr.eq(br_ext(BD))
- comb += br_taken.eq(bc_taken)
- #### branch conditional reg ####
- with m.Case(InternalOp.OP_BCREG):
- comb += br_imm_addr.eq(self.i.spr1) # SPR1 is set by decode unit
- comb += br_taken.eq(bc_taken)
-
- ###### output next instruction address #####
-
- comb += nia_o.data.eq(br_addr)
- comb += nia_o.ok.eq(br_taken)
-
- ###### link register - only activate on operations marked as "lk" #####
-
- with m.If(lk):
- # ctx.op.lk is the AND of the insn LK field *and* whether the
- # op is to "listen" to the link field
- comb += lr_o.data.eq(self.i.cia + 4)
- comb += lr_o.ok.eq(1)
-
- ###### and context #####
- comb += self.o.ctx.eq(self.i.ctx)
-
- return m
+++ /dev/null
-"""
- Optional Register allocation listed below. mandatory input
- (CompBROpSubset, CIA) not included.
-
- * CR is Condition Register (not an SPR)
- * SPR1, SPR2 and SPR3 are all from the SPR regfile. 3 ports are needed
-
- insn CR SPR1 SPR2 SPR3
- ---- -- ---- ---- ----
- op_b xx xx xx xx
- op_ba xx xx xx xx
- op_bl xx xx xx xx
- op_bla xx xx xx xx
- op_bc CR, xx, CTR xx
- op_bca CR, xx, CTR xx
- op_bcl CR, xx, CTR xx
- op_bcla CR, xx, CTR xx
- op_bclr CR, LR, CTR xx
- op_bclrl CR, LR, CTR xx
- op_bcctr CR, xx, CTR xx
- op_bcctrl CR, xx, CTR xx
- op_bctar CR, TAR, CTR, xx
- op_bctarl CR, TAR, CTR, xx
-
- op_sc xx xx xx MSR
- op_scv xx LR, SRR1, MSR
- op_rfscv xx LR, CTR, MSR
- op_rfid xx SRR0, SRR1, MSR
- op_hrfid xx HSRR0, HSRR1, MSR
-"""
-
-from nmigen import Signal, Const
-from ieee754.fpcommon.getop import FPPipeContext
-from soc.decoder.power_decoder2 import Data
-from soc.alu.pipe_data import IntegerData
-
-
-class BranchInputData(IntegerData):
- def __init__(self, pspec):
- super().__init__(pspec)
- # Note: for OP_BCREG, SPR1 will either be CTR, LR, or TAR
- # this involves the *decode* unit selecting the register, based
- # on detecting the operand being bcctr, bclr or bctar
-
- self.spr1 = Signal(64, reset_less=True) # see table above, SPR1
- self.spr2 = Signal(64, reset_less=True) # see table above, SPR2
- self.spr3 = Signal(64, reset_less=True) # see table above, SPR3
- self.cr = Signal(32, reset_less=True) # Condition Register(s) CR0-7
- self.cia = Signal(64, reset_less=True) # Current Instruction Address
-
- # convenience variables. not all of these are used at once
- self.ctr = self.srr0 = self.hsrr0 = self.spr2
- self.lr = self.tar = self.srr1 = self.hsrr1 = self.spr1
- self.msr = self.spr3
-
- def __iter__(self):
- yield from super().__iter__()
- yield self.spr1
- yield self.spr2
- yield self.spr3
- yield self.cr
- yield self.cia
-
- def eq(self, i):
- lst = super().eq(i)
- return lst + [self.spr1.eq(i.spr1), self.spr2.eq(i.spr2),
- self.spr3.eq(i.spr3),
- self.cr.eq(i.cr), self.cia.eq(i.cia)]
-
-
-class BranchOutputData(IntegerData):
- def __init__(self, pspec):
- super().__init__(pspec)
- self.lr = Data(64, name="lr")
- self.spr = Data(64, name="spr")
- self.nia = Data(64, name="nia")
-
- # convenience variables.
- self.ctr = self.spr
-
- def __iter__(self):
- yield from super().__iter__()
- yield from self.lr
- yield from self.spr
- yield from self.nia
-
- def eq(self, i):
- lst = super().eq(i)
- return lst + [self.lr.eq(i.lr), self.spr.eq(i.spr),
- self.nia.eq(i.nia)]
+++ /dev/null
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.branch.main_stage import BranchMainStage
-
-class BranchStages(PipeModBaseChain):
- def get_chain(self):
- main = BranchMainStage(self.pspec)
- return [main]
-
-
-class BranchBasePipe(ControlBase):
- def __init__(self, pspec):
- ControlBase.__init__(self)
- self.pipe1 = BranchStages(pspec)
- self._eqs = self.connect([self.pipe1])
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
- m.submodules.pipe = self.pipe1
- m.d.comb += self._eqs
- return m
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-
-
-from soc.branch.pipeline import BranchBasePipe
-from soc.branch.br_input_record import CompBROpSubset
-from soc.alu.pipe_data import ALUPipeSpec
-import random
-
-
-class TestCase:
- def __init__(self, program, regs, sprs, cr, name):
- self.program = program
- self.regs = regs
- self.sprs = sprs
- self.name = name
- self.cr = cr
-
-def get_rec_width(rec):
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- return recwidth
-
-
-# This test bench is a bit different than is usual. Initially when I
-# was writing it, I had all of the tests call a function to create a
-# device under test and simulator, initialize the dut, run the
-# simulation for ~2 cycles, and assert that the dut output what it
-# should have. However, this was really slow, since it needed to
-# create and tear down the dut and simulator for every test case.
-
-# Now, instead of doing that, every test case in ALUTestCase puts some
-# data into the test_data list below, describing the instructions to
-# be tested and the initial state. Once all the tests have been run,
-# test_data gets passed to TestRunner which then sets up the DUT and
-# simulator once, runs all the data through it, and asserts that the
-# results match the pseudocode sim at every cycle.
-
-# By doing this, I've reduced the time it takes to run the test suite
-# massively. Before, it took around 1 minute on my computer, now it
-# takes around 3 seconds
-
-test_data = []
-
-
-class BranchTestCase(FHDLTestCase):
- def __init__(self, name):
- super().__init__(name)
- self.test_name = name
- def run_tst_program(self, prog, initial_regs=[0] * 32,
- initial_sprs={}, initial_cr=0):
- tc = TestCase(prog, initial_regs, initial_sprs, initial_cr,
- self.test_name)
- test_data.append(tc)
-
- def test_unconditional(self):
- choices = ["b", "ba", "bl", "bla"]
- for i in range(20):
- choice = random.choice(choices)
- imm = random.randrange(-1<<23, (1<<23)-1) * 4
- lst = [f"{choice} {imm}"]
- initial_regs = [0] * 32
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_bc_cr(self):
- for i in range(20):
- bc = random.randrange(-1<<13, (1<<13)-1) * 4
- bo = random.choice([0b01100, 0b00100, 0b10100])
- bi = random.randrange(0, 31)
- cr = random.randrange(0, (1<<32)-1)
- lst = [f"bc {bo}, {bi}, {bc}"]
- initial_regs = [0] * 32
- self.run_tst_program(Program(lst), initial_cr=cr)
-
- def test_bc_ctr(self):
- for i in range(20):
- bc = random.randrange(-1<<13, (1<<13)-1) * 4
- bo = random.choice([0, 2, 8, 10, 16, 18])
- bi = random.randrange(0, 31)
- cr = random.randrange(0, (1<<32)-1)
- ctr = random.randint(0, (1<<32)-1)
- lst = [f"bc {bo}, {bi}, {bc}"]
- initial_sprs={9: SelectableInt(ctr, 64)}
- self.run_tst_program(Program(lst),
- initial_sprs=initial_sprs,
- initial_cr=cr)
-
- def test_ilang(self):
- rec = CompBROpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- alu = BranchBasePipe(pspec)
- vl = rtlil.convert(alu, ports=alu.ports())
- with open("logical_pipeline.il", "w") as f:
- f.write(vl)
-
-
-class TestRunner(FHDLTestCase):
- def __init__(self, test_data):
- super().__init__("run_all")
- self.test_data = test_data
-
- def run_all(self):
- m = Module()
- comb = m.d.comb
- instruction = Signal(32)
-
- pdecode = create_pdecode()
-
- m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
-
- rec = CompBROpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- m.submodules.branch = branch = BranchBasePipe(pspec)
-
- comb += branch.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
- comb += branch.p.valid_i.eq(1)
- comb += branch.n.ready_i.eq(1)
- comb += pdecode2.dec.raw_opcode_in.eq(instruction)
- sim = Simulator(m)
-
- sim.add_clock(1e-6)
- def process():
- for test in self.test_data:
- print(test.name)
- program = test.program
- self.subTest(test.name)
- simulator = ISA(pdecode2, test.regs, test.sprs, test.cr)
- initial_cia = 0x2000
- simulator.set_pc(initial_cia)
- gen = program.generate_instructions()
- instructions = list(zip(gen, program.assembly.splitlines()))
-
- index = (simulator.pc.CIA.value - initial_cia)//4
- while index < len(instructions) and index >= 0:
- print(index)
- ins, code = instructions[index]
-
- print("0x{:X}".format(ins & 0xffffffff))
- print(code)
-
- # ask the decoder to decode this binary data (endian'd)
- yield pdecode2.dec.bigendian.eq(0) # little / big?
- yield instruction.eq(ins) # raw binary instr.
- yield branch.p.data_i.cia.eq(simulator.pc.CIA.value)
- yield branch.p.data_i.cr.eq(simulator.cr.get_range().value)
- # note, here, the op will need further decoding in order
- # to set the correct SPRs on SPR1/2/3. op_bc* require
- # spr2 to be set to CTR, op_bctar require spr1 to be
- # set to TAR, op_bclr* require spr1 to be set to LR.
- # if op_sc*, op_rf* and op_hrfid are to be added here
- # then additional op-decoding is required, accordingly
- yield branch.p.data_i.spr2.eq(simulator.spr['CTR'].value)
- print(f"cr0: {simulator.crl[0].get_range()}")
- yield Settle()
- fn_unit = yield pdecode2.e.fn_unit
- self.assertEqual(fn_unit, Function.BRANCH.value, code)
- yield
- yield
- opname = code.split(' ')[0]
- prev_nia = simulator.pc.NIA.value
- yield from simulator.call(opname)
- index = (simulator.pc.CIA.value - initial_cia)//4
-
- yield from self.assert_outputs(branch, pdecode2,
- simulator, prev_nia, code)
-
-
- sim.add_sync_process(process)
- with sim.write_vcd("simulator.vcd", "simulator.gtkw",
- traces=[]):
- sim.run()
-
- def assert_outputs(self, branch, dec2, sim, prev_nia, code):
- branch_taken = yield branch.n.data_o.nia.ok
- sim_branch_taken = prev_nia != sim.pc.CIA
- self.assertEqual(branch_taken, sim_branch_taken, code)
- if branch_taken:
- branch_addr = yield branch.n.data_o.nia.data
- self.assertEqual(branch_addr, sim.pc.CIA.value, code)
-
- lk = yield dec2.e.lk
- branch_lk = yield branch.n.data_o.lr.ok
- self.assertEqual(lk, branch_lk, code)
- if lk:
- branch_lr = yield branch.n.data_o.lr.data
- self.assertEqual(sim.spr['LR'], branch_lr, code)
-
-
-if __name__ == "__main__":
- unittest.main(exit=False)
- suite = unittest.TestSuite()
- suite.addTest(TestRunner(test_data))
-
- runner = unittest.TextTestRunner()
- runner.run(suite)
+++ /dev/null
-# https://github.com/antonblanchard/microwatt/blob/master/countzero.vhdl
-from nmigen import Memory, Module, Signal, Cat, Elaboratable
-from nmigen.hdl.rec import Record, Layout
-from nmigen.cli import main
-
-
-def or4(a, b, c, d):
- return Cat(a.any(), b.any(), c.any(), d.any())
-
-
-class IntermediateResult(Record):
- def __init__(self, name=None):
- layout = (('v16', 15),
- ('sel_hi', 2),
- ('is_32bit', 1),
- ('count_right', 1))
- Record.__init__(self, Layout(layout), name=name)
-
-
-class ZeroCounter(Elaboratable):
- def __init__(self):
- self.rs_i = Signal(64, reset_less=True)
- self.count_right_i = Signal(1, reset_less=True)
- self.is_32bit_i = Signal(1, reset_less=True)
- self.result_o = Signal(64, reset_less=True)
-
- def ports(self):
- return [self.rs_i, self.count_right_i, self.is_32bit_i, self.result_o]
-
- def elaborate(self, platform):
- m = Module()
-
- # TODO: replace this with m.submodule.pe1 = PriorityEncoder(4)
- # m.submodule.pe2 = PriorityEncoder(4)
- # m.submodule.pe3 = PriorityEncoder(4)
- # etc.
- # and where right will assign input to v and !right will assign v[::-1]
- # so as to reverse the order of the input bits.
-
- def encoder(v, right):
- """
- Return the index of the leftmost or rightmost 1 in a set of 4 bits.
- Assumes v is not "0000"; if it is, return (right ? "11" : "00").
- """
- ret = Signal(2, reset_less=True)
- with m.If(right):
- with m.If(v[0]):
- m.d.comb += ret.eq(0)
- with m.Elif(v[1]):
- m.d.comb += ret.eq(1)
- with m.Elif(v[2]):
- m.d.comb += ret.eq(2)
- with m.Else():
- m.d.comb += ret.eq(3)
- with m.Else():
- with m.If(v[3]):
- m.d.comb += ret.eq(3)
- with m.Elif(v[2]):
- m.d.comb += ret.eq(2)
- with m.Elif(v[1]):
- m.d.comb += ret.eq(1)
- with m.Else():
- m.d.comb += ret.eq(0)
- return ret
-
- r = IntermediateResult()
- r_in = IntermediateResult()
-
- m.d.comb += r.eq(r_in) # make the module entirely combinatorial for now
-
- v = IntermediateResult()
- y = Signal(4, reset_less=True)
- z = Signal(4, reset_less=True)
- sel = Signal(6, reset_less=True)
- v4 = Signal(4, reset_less=True)
-
- # Test 4 groups of 16 bits each.
- # The top 2 groups are considered to be zero in 32-bit mode.
- m.d.comb += z.eq(or4(self.rs_i[0:16], self.rs_i[16:32],
- self.rs_i[32:48], self.rs_i[48:64]))
- with m.If(self.is_32bit_i):
- m.d.comb += v.sel_hi[1].eq(0)
- with m.If(self.count_right_i):
- m.d.comb += v.sel_hi[0].eq(~z[0])
- with m.Else():
- m.d.comb += v.sel_hi[0].eq(z[1])
- with m.Else():
- m.d.comb += v.sel_hi.eq(encoder(z, self.count_right_i))
-
- # Select the leftmost/rightmost non-zero group of 16 bits
- with m.Switch(v.sel_hi):
- with m.Case(0):
- m.d.comb += v.v16.eq(self.rs_i[0:16])
- with m.Case(1):
- m.d.comb += v.v16.eq(self.rs_i[16:32])
- with m.Case(2):
- m.d.comb += v.v16.eq(self.rs_i[32:48])
- with m.Case(3):
- m.d.comb += v.v16.eq(self.rs_i[48:64])
-
- # Latch this and do the rest in the next cycle, for the sake of timing
- m.d.comb += v.is_32bit.eq(self.is_32bit_i)
- m.d.comb += v.count_right.eq(self.count_right_i)
- m.d.comb += r_in.eq(v)
- m.d.comb += sel[4:6].eq(r.sel_hi)
-
- # Test 4 groups of 4 bits
- m.d.comb += y.eq(or4(r.v16[0:4], r.v16[4:8],
- r.v16[8:12], r.v16[12:16]))
- m.d.comb += sel[2:4].eq(encoder(y, r.count_right))
-
- # Select the leftmost/rightmost non-zero group of 4 bits
- with m.Switch(sel[2:4]):
- with m.Case(0):
- m.d.comb += v4.eq(r.v16[0:4])
- with m.Case(1):
- m.d.comb += v4.eq(r.v16[4:8])
- with m.Case(2):
- m.d.comb += v4.eq(r.v16[8:12])
- with m.Case(3):
- m.d.comb += v4.eq(r.v16[12:16])
-
- m.d.comb += sel[0:2].eq(encoder(v4, r.count_right))
-
- # sel is now the index of the leftmost/rightmost 1 bit in rs
- o = self.result_o
- with m.If(v4 == 0):
- # operand is zero, return 32 for 32-bit, else 64
- m.d.comb += o[5:7].eq(Cat(r.is_32bit, ~r.is_32bit))
- with m.Elif(r.count_right):
- # return (63 - sel), trimmed to 5 bits in 32-bit mode
- m.d.comb += o.eq(Cat(~sel[0:5], ~(sel[5] | r.is_32bit)))
- with m.Else():
- m.d.comb += o.eq(sel)
-
- return m
+++ /dev/null
-# https://github.com/antonblanchard/microwatt/blob/master/countzero_tb.vhdl
-from nmigen import Module, Signal
-from nmigen.cli import rtlil
-from nmigen.back.pysim import Simulator, Delay
-from nmigen.test.utils import FHDLTestCase
-import unittest
-from soc.countzero.countzero import ZeroCounter
-
-
-class ZeroCounterTestCase(FHDLTestCase):
- def test_zerocounter(self):
- m = Module()
- comb = m.d.comb
- m.submodules.dut = dut = ZeroCounter()
-
- sim = Simulator(m)
- # sim.add_clock(1e-6)
-
- def process():
- print("test zero input")
- yield dut.rs_i.eq(0)
- yield dut.is_32bit_i.eq(0)
- yield dut.count_right_i.eq(0)
- yield Delay(1e-6)
- result = yield dut.result_o
- assert result == 0x40
- # report "bad cntlzd 0 = " & to_hstring(result);
- assert(result == 0x40)
- yield dut.count_right_i.eq(1)
- yield Delay(1e-6)
- result = yield dut.result_o
- # report "bad cntlzd 0 = " & to_hstring(result);
- assert(result == 0x40)
- yield dut.is_32bit_i.eq(1)
- yield dut.count_right_i.eq(0)
- yield Delay(1e-6)
- result = yield dut.result_o
- # report "bad cntlzw 0 = " & to_hstring(result);
- assert(result == 0x20)
- yield dut.count_right_i.eq(1)
- yield Delay(1e-6)
- result = yield dut.result_o
- # report "bad cntlzw 0 = " & to_hstring(result);
- assert(result == 0x20)
- # TODO next tests
-
- yield dut.rs_i.eq(0b00010000)
- yield dut.is_32bit_i.eq(0)
- yield dut.count_right_i.eq(0)
- yield Delay(1e-6)
- result = yield dut.result_o
- assert result == 4, "result %d" % result
-
- yield dut.count_right_i.eq(1)
- yield Delay(1e-6)
- result = yield dut.result_o
- assert result == 59, "result %d" % result
-
- yield dut.is_32bit_i.eq(1)
- yield Delay(1e-6)
- result = yield dut.result_o
- assert result == 27, "result %d" % result
-
- yield dut.rs_i.eq(0b1100000100000000)
- yield dut.is_32bit_i.eq(0)
- yield dut.count_right_i.eq(0)
- yield Delay(1e-6)
- result = yield dut.result_o
- assert result == 14, "result %d" % result
-
- yield dut.count_right_i.eq(1)
- yield Delay(1e-6)
- result = yield dut.result_o
- assert result == 55, "result %d" % result
-
- yield dut.is_32bit_i.eq(1)
- yield Delay(1e-6)
- result = yield dut.result_o
- assert result == 23, "result %d" % result
-
- yield dut.count_right_i.eq(0)
- yield Delay(1e-6)
- result = yield dut.result_o
- assert result == 14, "result %d" % result
-
-
- sim.add_process(process) # or sim.add_sync_process(process), see below
-
- # run test and write vcd
- fn = "genullnau"
- with sim.write_vcd(fn+".vcd", fn+".gtkw", traces=dut.ports()):
- sim.run()
-
- # cntlzd_w
- # cnttzd_w
-
-
-if __name__ == "__main__":
-
- dut = ZeroCounter()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("countzero.il", "w") as f:
- f.write(vl)
-
- unittest.main()
+++ /dev/null
-# This stage is intended to do Condition Register instructions
-# and output, as well as carry and overflow generation.
-# NOTE: with the exception of mtcrf and mfcr, we really should be doing
-# the field decoding which
-# selects which bits of CR are to be read / written, back in the
-# decoder / insn-isue, have both self.i.cr and self.o.cr
-# be broken down into 4-bit-wide "registers", with their
-# own "Register File" (indexed by bt, ba and bb),
-# exactly how INT regs are done (by RA, RB, RS and RT)
-# however we are pushed for time so do it as *one* register.
-
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
-from nmutil.pipemodbase import PipeModBase
-from soc.cr.pipe_data import CRInputData, CROutputData
-from soc.decoder.power_enums import InternalOp
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
-
-
-class CRMainStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "main")
- self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
- self.fields.create_specs()
-
- def ispec(self):
- return CRInputData(self.pspec)
-
- def ospec(self):
- return CROutputData(self.pspec)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- op = self.i.ctx.op
- xl_fields = self.fields.FormXL
- xfx_fields = self.fields.FormXFX
- # default: cr_o remains same as cr input unless modified, below
- cr_o = Signal.like(self.i.cr)
- comb += cr_o.eq(self.i.cr)
-
- ##### prepare inputs / temp #####
-
- # Generate array for cr input so bits can be selected
- cr_arr = Array([Signal(name=f"cr_arr_{i}") for i in range(32)])
- for i in range(32):
- comb += cr_arr[i].eq(self.i.cr[31-i])
-
- # Generate array for cr output so the bit to write to can be
- # selected by a signal
- cr_out_arr = Array([Signal(name=f"cr_out_{i}") for i in range(32)])
- for i in range(32):
- comb += cr_o[31-i].eq(cr_out_arr[i])
- comb += cr_out_arr[i].eq(cr_arr[i])
-
- # Generate the mask for mtcrf, mtocrf, and mfocrf
- # replicate every fxm field in the insn to 4-bit, as a mask
- FXM = xfx_fields.FXM[0:-1]
- mask = Signal(32, reset_less=True)
- comb += mask.eq(Cat(*[Repl(FXM[i], 4) for i in range(8)]))
-
- #################################
- ##### main switch statement #####
-
- with m.Switch(op.insn_type):
- ##### mcrf #####
- with m.Case(InternalOp.OP_MCRF):
- # MCRF copies the 4 bits of crA to crB (for instance
- # copying cr2 to cr1)
- BF = xl_fields.BF[0:-1] # destination CR
- BFA = xl_fields.BFA[0:-1] # source CR
-
- for i in range(4):
- comb += cr_out_arr[BF*4 + i].eq(cr_arr[BFA*4 + i])
-
- ##### crand, cror, crnor etc. #####
- with m.Case(InternalOp.OP_CROP):
- # crand/cror and friends get decoded to the same opcode, but
- # one of the fields inside the instruction is a 4 bit lookup
- # table. This lookup table gets indexed by bits a and b from
- # the CR to determine what the resulting bit should be.
-
- # Grab the lookup table for cr_op type instructions
- lut = Array([Signal(name=f"lut{i}") for i in range(4)])
- # There's no field, just have to grab it directly from the insn
- for i in range(4):
- comb += lut[i].eq(self.i.ctx.op.insn[6+i])
-
- # Get the bit selector fields from the instruction
- BT = xl_fields.BT[0:-1]
- BA = xl_fields.BA[0:-1]
- BB = xl_fields.BB[0:-1]
-
- # Use the two input bits to look up the result in the LUT
- comb += cr_out_arr[BT].eq(lut[Cat(cr_arr[BB], cr_arr[BA])])
-
- ##### mtcrf #####
- with m.Case(InternalOp.OP_MTCRF):
- # mtocrf and mtcrf are essentially identical
- # put input (RA) - mask-selected - into output CR, leave
- # rest of CR alone.
- comb += cr_o.eq((self.i.a[0:32] & mask) | (self.i.cr & ~mask))
-
- ##### mfcr #####
- with m.Case(InternalOp.OP_MFCR):
- # Ugh. mtocrf and mtcrf have one random bit differentiating
- # them. This bit is not in any particular field, so this
- # extracts that bit from the instruction
- move_one = Signal(reset_less=True)
- comb += move_one.eq(self.i.ctx.op.insn[20])
-
- # mfocrf
- with m.If(move_one):
- comb += self.o.o.eq(self.i.cr & mask)
- # mfcrf
- with m.Else():
- comb += self.o.o.eq(self.i.cr)
-
- # output and context
- comb += self.o.cr.eq(cr_o)
- comb += self.o.ctx.eq(self.i.ctx)
-
- return m
+++ /dev/null
-from nmigen import Signal, Const
-from ieee754.fpcommon.getop import FPPipeContext
-from soc.alu.pipe_data import IntegerData
-
-
-class CRInputData(IntegerData):
- def __init__(self, pspec):
- super().__init__(pspec)
- self.a = Signal(64, reset_less=True) # RA
- self.cr = Signal(64, reset_less=True) # CR in
-
- def __iter__(self):
- yield from super().__iter__()
- yield self.a
- yield self.cr
-
- def eq(self, i):
- lst = super().eq(i)
- return lst + [self.a.eq(i.a),
- self.cr.eq(i.cr)]
-
-class CROutputData(IntegerData):
- def __init__(self, pspec):
- super().__init__(pspec)
- self.o = Signal(64, reset_less=True) # RA
- self.cr = Signal(64, reset_less=True) # CR in
-
- def __iter__(self):
- yield from super().__iter__()
- yield self.o
- yield self.cr
-
- def eq(self, i):
- lst = super().eq(i)
- return lst + [self.o.eq(i.o),
- self.cr.eq(i.cr)]
+++ /dev/null
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.cr.main_stage import CRMainStage
-
-class CRStages(PipeModBaseChain):
- def get_chain(self):
- main = CRMainStage(self.pspec)
- return [main]
-
-
-class CRBasePipe(ControlBase):
- def __init__(self, pspec):
- ControlBase.__init__(self)
- self.pipe1 = CRStages(pspec)
- self._eqs = self.connect([self.pipe1])
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
- m.submodules.pipe = self.pipe1
- m.d.comb += self._eqs
- return m
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-
-
-from soc.cr.pipeline import CRBasePipe
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.alu.pipe_data import ALUPipeSpec
-import random
-
-
-class TestCase:
- def __init__(self, program, regs, sprs, cr, name):
- self.program = program
- self.regs = regs
- self.sprs = sprs
- self.name = name
- self.cr = cr
-
-def get_rec_width(rec):
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- return recwidth
-
-
-# This test bench is a bit different than is usual. Initially when I
-# was writing it, I had all of the tests call a function to create a
-# device under test and simulator, initialize the dut, run the
-# simulation for ~2 cycles, and assert that the dut output what it
-# should have. However, this was really slow, since it needed to
-# create and tear down the dut and simulator for every test case.
-
-# Now, instead of doing that, every test case in ALUTestCase puts some
-# data into the test_data list below, describing the instructions to
-# be tested and the initial state. Once all the tests have been run,
-# test_data gets passed to TestRunner which then sets up the DUT and
-# simulator once, runs all the data through it, and asserts that the
-# results match the pseudocode sim at every cycle.
-
-# By doing this, I've reduced the time it takes to run the test suite
-# massively. Before, it took around 1 minute on my computer, now it
-# takes around 3 seconds
-
-test_data = []
-
-
-class CRTestCase(FHDLTestCase):
- def __init__(self, name):
- super().__init__(name)
- self.test_name = name
- def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={},
- initial_cr=0):
- tc = TestCase(prog, initial_regs, initial_sprs, initial_cr,
- self.test_name)
- test_data.append(tc)
-
- def test_crop(self):
- insns = ["crand", "cror", "crnand", "crnor", "crxor", "creqv",
- "crandc", "crorc"]
- for i in range(40):
- choice = random.choice(insns)
- ba = random.randint(0, 31)
- bb = random.randint(0, 31)
- bt = random.randint(0, 31)
- lst = [f"{choice} {ba}, {bb}, {bt}"]
- cr = random.randint(0, 7)
- self.run_tst_program(Program(lst), initial_cr=cr)
-
- def test_mcrf(self):
- lst = ["mcrf 0, 5"]
- cr = 0xffff0000
- self.run_tst_program(Program(lst), initial_cr=cr)
-
- def test_mtcrf(self):
- for i in range(20):
- mask = random.randint(0, 255)
- lst = [f"mtcrf {mask}, 2"]
- cr = random.randint(0, (1<<32)-1)
- initial_regs = [0] * 32
- initial_regs[2] = random.randint(0, (1<<32)-1)
- self.run_tst_program(Program(lst), initial_regs=initial_regs,
- initial_cr=cr)
- def test_mtocrf(self):
- for i in range(20):
- mask = 1<<random.randint(0, 7)
- lst = [f"mtocrf {mask}, 2"]
- cr = random.randint(0, (1<<32)-1)
- initial_regs = [0] * 32
- initial_regs[2] = random.randint(0, (1<<32)-1)
- self.run_tst_program(Program(lst), initial_regs=initial_regs,
- initial_cr=cr)
-
- def test_mfcr(self):
- for i in range(5):
- lst = ["mfcr 2"]
- cr = random.randint(0, (1<<32)-1)
- self.run_tst_program(Program(lst), initial_cr=cr)
-
- def test_mfocrf(self):
- for i in range(20):
- mask = 1<<random.randint(0, 7)
- lst = [f"mfocrf 2, {mask}"]
- cr = random.randint(0, (1<<32)-1)
- self.run_tst_program(Program(lst), initial_cr=cr)
-
-
- def test_ilang(self):
- rec = CompALUOpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- alu = CRBasePipe(pspec)
- ports = alu.ports()
- vl = rtlil.convert(alu, ports=alu.ports())
- with open("logical_pipeline.il", "w") as f:
- f.write(vl)
-
-
-class TestRunner(FHDLTestCase):
- def __init__(self, test_data):
- super().__init__("run_all")
- self.test_data = test_data
-
- def set_inputs(self, alu, dec2, simulator):
- yield alu.p.data_i.cr.eq(simulator.cr.get_range().value)
-
- reg3_ok = yield dec2.e.read_reg3.ok
- if reg3_ok:
- reg3_sel = yield dec2.e.read_reg3.data
- reg3 = simulator.gpr(reg3_sel).value
- yield alu.p.data_i.a.eq(reg3)
-
- def run_all(self):
- m = Module()
- comb = m.d.comb
- instruction = Signal(32)
-
- pdecode = create_pdecode()
-
- m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
-
- rec = CompALUOpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- m.submodules.alu = alu = CRBasePipe(pspec)
-
- comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
- comb += alu.p.valid_i.eq(1)
- comb += alu.n.ready_i.eq(1)
- comb += pdecode2.dec.raw_opcode_in.eq(instruction)
- sim = Simulator(m)
-
- sim.add_clock(1e-6)
- def process():
- for test in self.test_data:
- print(test.name)
- program = test.program
- self.subTest(test.name)
- simulator = ISA(pdecode2, test.regs, test.sprs, test.cr)
- gen = program.generate_instructions()
- instructions = list(zip(gen, program.assembly.splitlines()))
-
- index = simulator.pc.CIA.value//4
- while index < len(instructions):
- ins, code = instructions[index]
-
- print("0x{:X}".format(ins & 0xffffffff))
- print(code)
-
- # ask the decoder to decode this binary data (endian'd)
- yield pdecode2.dec.bigendian.eq(0) # little / big?
- yield instruction.eq(ins) # raw binary instr.
- yield Settle()
- yield from self.set_inputs(alu, pdecode2, simulator)
- fn_unit = yield pdecode2.e.fn_unit
- self.assertEqual(fn_unit, Function.CR.value, code)
- yield
- opname = code.split(' ')[0]
- yield from simulator.call(opname)
- index = simulator.pc.CIA.value//4
-
- vld = yield alu.n.valid_o
- while not vld:
- yield
- vld = yield alu.n.valid_o
- yield
- cr_out = yield pdecode2.e.output_cr
- if cr_out:
- cr_expected = simulator.cr.get_range().value
- cr_real = yield alu.n.data_o.cr
- msg = f"real: {cr_expected:x}, actual: {cr_real:x}"
- msg += " code: %s" % code
- self.assertEqual(cr_expected, cr_real, msg)
-
- reg_out = yield pdecode2.e.write_reg.ok
- if reg_out:
- reg_sel = yield pdecode2.e.write_reg.data
- reg_data = simulator.gpr(reg_sel).value
- output = yield alu.n.data_o.o
- msg = f"real: {reg_data:x}, actual: {output:x}"
- self.assertEqual(reg_data, output)
-
- sim.add_sync_process(process)
- with sim.write_vcd("simulator.vcd", "simulator.gtkw",
- traces=[]):
- sim.run()
- def check_extra_alu_outputs(self, alu, dec2, sim):
- rc = yield dec2.e.rc.data
- if rc:
- cr_expected = sim.crl[0].get_range().value
- cr_actual = yield alu.n.data_o.cr0
- self.assertEqual(cr_expected, cr_actual)
-
-
-if __name__ == "__main__":
- unittest.main(exit=False)
- suite = unittest.TestSuite()
- suite.addTest(TestRunner(test_data))
-
- runner = unittest.TextTestRunner()
- runner.run(suite)
+++ /dev/null
-from nmigen import Elaboratable, Signal, Module, Repl, Cat, Const, Array
-from nmigen.cli import main
-
-
-class Bpermd(Elaboratable):
- """This class does a Bit Permute on a Doubleword
-
- X-form bpermd RA,RS,RB]
-
- Eight permuted bits are produced. For each permuted bit i where i ranges
- from 0 to 7 and for each byte i of RS, do the following. If byte i of RS
- is less than 64, permuted bit i is setto the bit of RB specified by byte
- i of RS; otherwise permuted bit i is set to 0. The permuted bits are
- placed in the least-significantbyte of RA, and the remaining bits are
- filled with 0s.
- Special Registers Altered: None
-
- Programming note:
- The fact that the permuted bit is 0 if the corresponding index value
- exceeds 63 permits the permuted bits to be selected from a 128-bit
- quantity, using a single index register. For example, assume that the
- 128-bit quantity Q, from which the permuted bits are to be selected, is
- in registers r2(high-order 64 bits of Q) and r3 (low-order 64 bits of Q),
- that the index values are in register r1, with each byte of r1 containing
- a value in the range 0:127, and that each byte of register r4 contains
- the value 64. The following code sequence selects eight permuted bits
- from Q and places them into the low-order byte of r6.
- """
-
- def __init__(self, width):
- self.width = width
- self.rs = Signal(width, reset_less=True)
- self.ra = Signal(width, reset_less=True)
- self.rb = Signal(width, reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
- perm = Signal(self.width, reset_less=True)
- rb64 = [Signal(1, reset_less=True, name=f"rb64_{i}") for i in range(64)]
- for i in range(64):
- m.d.comb += rb64[i].eq(self.rb[i])
- rb64 = Array(rb64)
- for i in range(8):
- index = self.rs[8*i:8*i+8]
- idx = Signal(8, name=f"idx_{i}", reset_less=True)
- m.d.comb += idx.eq(index)
- with m.If(idx < 64):
- m.d.comb += perm[i].eq(rb64[idx])
- m.d.comb += self.ra[0:8].eq(perm)
- return m
-
-
-if __name__ == "__main__":
- bperm = Bpermd(width=64)
- main(bperm, ports=[bperm.rs, bperm.ra, bperm.rb])
+++ /dev/null
-# Proof of correctness for bit permute module
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
- signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.logical.bperm import Bpermd
-
-import unittest
-
-
-# So formal verification is a little different than writing a test
-# case, as you're actually generating logic around your module to
-# check that it behaves a certain way. So here, I'm going to create a
-# module to put my formal assertions in
-class Driver(Elaboratable):
- def __init__(self):
- # We don't need any inputs and outputs here, so I won't
- # declare any
- pass
-
- def elaborate(self, platform):
- # standard stuff
- m = Module()
- comb = m.d.comb
-
- # instantiate the device under test as a submodule
- m.submodules.bperm = bperm = Bpermd(64)
-
- # Grab the inputs and outputs of the DUT to make them more
- # convenient to access
- rs = bperm.rs
- rb = bperm.rb
- ra = bperm.ra
-
- # Before we prove any properties about the DUT, we need to set
- # up its inputs. There's a couple ways to do this, you could
- # define some inputs and outputs for the driver module and
- # wire them up to the DUT, but that's kind of a pain. The
- # other option is to use AnyConst/AnySeq, which tells yosys
- # that those inputs can take on any value.
-
- # AnyConst should be used when the input should take on a
- # random value, but that value should be constant throughout
- # the test.
- # AnySeq should be used when the input can change on every
- # cycle
-
- # Since this is a combinatorial circuit, it really doesn't
- # matter which one you choose, so I chose AnyConst. If this
- # was a sequential circuit, (especially a state machine) you'd
- # want to use AnySeq
- comb += [rs.eq(AnyConst(64)),
- rb.eq(AnyConst(64))]
-
-
- # The pseudocode in the Power ISA manual (v3.1) is as follows:
- # do i = 0 to 7
- # index <- RS[8*i:8*i+8]
- # if index < 64:
- # perm[i] <- RB[index]
- # else:
- # perm[i] <- 0
- # RA <- 56'b0 || perm[0:8] # big endian though
-
- # Looking at this, I can identify 3 properties that the bperm
- # module should keep:
- # 1. RA[8:64] should always equal 0
- # 2. If RB[i*8:i*8+8] >= 64 then RA[i] should equal 0
- # 3. If RB[i*8:i*8+8] < 64 then RA[i] should RS[index]
-
- # Now we need to Assert that the properties above hold:
-
- # Property 1: RA[8:64] should always equal 0
- comb += Assert(ra[8:] == 0)
- # Notice how we're adding Assert to comb like it's a circuit?
- # That's because it kind of is. If you run this proof and have
- # yosys graph the ilang, you'll be able to see an equals
- # comparison cell feeding into an assert cell
-
- # Now we need to prove property #2. I'm going to leave this to
- # you Cole. I'd start by writing a for loop and extracting the
- # 8 indices into signals. Then I'd write an if statement
- # checking if the index is >= 64 (it's hardware, so use an
- # m.If()). Finally, I'd add an assert that checks whether
- # ra[i] is equal to 0
-
-
-
- return m
-
-
-class TestCase(FHDLTestCase):
- # This bit here is actually in charge of running the formal
- # proof. It has nmigen spit out the ilang, and feeds it to
- # SymbiYosys to run the proof. If the proof fails, yosys will
- # generate a .vcd file showing how it was able to violate your
- # assertions in proof_bperm_formal/engine_0/trace.vcd. From that
- # you should be able to figure out what went wrong, and either
- # correct the assertion or fix the DUT
- def test_formal(self):
- module = Driver()
- # This runs a Bounded Model Check on the driver module
- # above. What that does is it starts at some initial state,
- # and steps it through `depth` cycles, checking that the
- # assertions hold at every cycle. Since this is a
- # combinatorial module, it only needs 1 cycle to prove
- # everything.
- self.assertFormal(module, mode="bmc", depth=2)
- self.assertFormal(module, mode="cover", depth=2)
-
- # As mentioned above, you can look at the graph in yosys and see
- # all the assertion cells
- def test_ilang(self):
- dut = Driver()
- vl = rtlil.convert(dut, ports=[])
- with open("bperm.il", "w") as f:
- f.write(vl)
-
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import Module, Signal, Elaboratable, Mux
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.alu.input_stage import ALUInputStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
- def __init__(self):
- # inputs and outputs
- pass
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- rec = CompALUOpSubset()
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- comb += p.eq(AnyConst(width))
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
- m.submodules.dut = dut = ALUInputStage(pspec)
-
- a = Signal(64)
- b = Signal(64)
- comb += [dut.i.a.eq(a),
- dut.i.b.eq(b),
- a.eq(AnyConst(64)),
- b.eq(AnyConst(64))]
-
-
- comb += dut.i.ctx.op.eq(rec)
-
-
- # Assert that op gets copied from the input to output
- for p in rec.ports():
- name = p.name
- rec_sig = p
- dut_sig = getattr(dut.o.ctx.op, name)
- comb += Assert(dut_sig == rec_sig)
-
- with m.If(rec.invert_a):
- comb += Assert(dut.o.a == ~a)
- with m.Else():
- comb += Assert(dut.o.a == a)
-
- with m.If(rec.imm_data.imm_ok &
- ~(rec.insn_type == InternalOp.OP_RLC)):
- comb += Assert(dut.o.b == rec.imm_data.imm)
- with m.Else():
- comb += Assert(dut.o.b == b)
-
-
-
-
- return m
-
-class GTCombinerTestCase(FHDLTestCase):
- def test_formal(self):
- module = Driver()
- self.assertFormal(module, mode="bmc", depth=4)
- self.assertFormal(module, mode="cover", depth=4)
- def test_ilang(self):
- dut = Driver()
- vl = rtlil.convert(dut, ports=[])
- with open("input_stage.il", "w") as f:
- f.write(vl)
-
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
- signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.logical.main_stage import LogicalMainStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
- def __init__(self):
- # inputs and outputs
- pass
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- rec = CompALUOpSubset()
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- comb += p.eq(AnyConst(width))
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
- m.submodules.dut = dut = LogicalMainStage(pspec)
-
- # convenience variables
- a = dut.i.a
- b = dut.i.b
- carry_in = dut.i.carry_in
- so_in = dut.i.so
- carry_out = dut.o.carry_out
- o = dut.o.o
-
- # setup random inputs
- comb += [a.eq(AnyConst(64)),
- b.eq(AnyConst(64)),
- carry_in.eq(AnyConst(1)),
- so_in.eq(AnyConst(1))]
-
- comb += dut.i.ctx.op.eq(rec)
-
- # Assert that op gets copied from the input to output
- for rec_sig in rec.ports():
- name = rec_sig.name
- dut_sig = getattr(dut.o.ctx.op, name)
- comb += Assert(dut_sig == rec_sig)
-
- # signed and signed/32 versions of input a
- a_signed = Signal(signed(64))
- a_signed_32 = Signal(signed(32))
- comb += a_signed.eq(a)
- comb += a_signed_32.eq(a[0:32])
-
- # main assertion of arithmetic operations
- with m.Switch(rec.insn_type):
- with m.Case(InternalOp.OP_AND):
- comb += Assert(dut.o.o == a & b)
- with m.Case(InternalOp.OP_OR):
- comb += Assert(dut.o.o == a | b)
- with m.Case(InternalOp.OP_XOR):
- comb += Assert(dut.o.o == a ^ b)
-
- return m
-
-
-class LogicalTestCase(FHDLTestCase):
- def test_formal(self):
- module = Driver()
- self.assertFormal(module, mode="bmc", depth=2)
- self.assertFormal(module, mode="cover", depth=2)
- def test_ilang(self):
- dut = Driver()
- vl = rtlil.convert(dut, ports=[])
- with open("main_stage.il", "w") as f:
- f.write(vl)
-
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-# This stage is intended to adjust the input data before sending it to
-# the acutal ALU. Things like handling inverting the input, carry_in
-# generation for subtraction, and handling of immediates should happen
-# here
-from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
- unsigned)
-from nmutil.pipemodbase import PipeModBase
-from soc.decoder.power_enums import InternalOp
-from soc.alu.pipe_data import ALUInputData
-from soc.decoder.power_enums import CryIn
-
-
-class ALUInputStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "input")
-
- def ispec(self):
- return ALUInputData(self.pspec)
-
- def ospec(self):
- return ALUInputData(self.pspec)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- ##### operand A #####
-
- # operand a to be as-is or inverted
- a = Signal.like(self.i.a)
-
- with m.If(self.i.ctx.op.invert_a):
- comb += a.eq(~self.i.a)
- with m.Else():
- comb += a.eq(self.i.a)
-
- comb += self.o.a.eq(a)
-
- ##### operand B #####
-
- # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
- # remove this, just do self.o.b.eq(self.i.b) and move the
- # immediate-detection into set_alu_inputs in the unit test
- # If there's an immediate, set the B operand to that
- comb += self.o.b.eq(self.i.b)
-
- ##### carry-in #####
-
- # either copy incoming carry or set to 1/0 as defined by op
- with m.Switch(self.i.ctx.op.input_carry):
- with m.Case(CryIn.ZERO):
- comb += self.o.carry_in.eq(0)
- with m.Case(CryIn.ONE):
- comb += self.o.carry_in.eq(1)
- with m.Case(CryIn.CA):
- comb += self.o.carry_in.eq(self.i.carry_in)
-
- ##### sticky overflow and context (both pass-through) #####
-
- comb += self.o.so.eq(self.i.so)
- comb += self.o.ctx.eq(self.i.ctx)
-
- return m
+++ /dev/null
-# This stage is intended to do most of the work of executing Logical
-# instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ
-# however input and output stages also perform bit-negation on input(s)
-# and output, as well as carry and overflow generation.
-# This module however should not gate the carry or overflow, that's up
-# to the output stage
-
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
-from nmutil.pipemodbase import PipeModBase
-from soc.logical.pipe_data import ALUInputData
-from soc.alu.pipe_data import ALUOutputData
-from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-from soc.countzero.countzero import ZeroCounter
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
-
-
-def array_of(count, bitwidth):
- res = []
- for i in range(count):
- res.append(Signal(bitwidth, reset_less=True))
- return res
-
-
-class LogicalMainStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "main")
- self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
- self.fields.create_specs()
-
- def ispec(self):
- return ALUInputData(self.pspec)
-
- def ospec(self):
- return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o
-
- ##########################
- # main switch for logic ops AND, OR and XOR, cmpb, parity, and popcount
-
- with m.Switch(op.insn_type):
-
- ###### AND, OR, XOR #######
- with m.Case(InternalOp.OP_AND):
- comb += o.eq(a & b)
- with m.Case(InternalOp.OP_OR):
- comb += o.eq(a | b)
- with m.Case(InternalOp.OP_XOR):
- comb += o.eq(a ^ b)
-
- ###### cmpb #######
- with m.Case(InternalOp.OP_CMPB):
- l = []
- for i in range(8):
- slc = slice(i*8, (i+1)*8)
- l.append(Repl(a[slc] == b[slc], 8))
- comb += o.eq(Cat(*l))
-
- ###### popcount #######
- with m.Case(InternalOp.OP_POPCNT):
- # starting from a, perform successive addition-reductions
- # creating arrays big enough to store the sum, each time
- pc = [a]
- # QTY32 2-bit (to take 2x 1-bit sums) etc.
- work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)]
- for l, b in work:
- pc.append(array_of(l, b))
- pc8 = pc[3] # array of 8 8-bit counts (popcntb)
- pc32 = pc[5] # array of 2 32-bit counts (popcntw)
- popcnt = pc[-1] # array of 1 64-bit count (popcntd)
- # cascade-tree of adds
- for idx, (l, b) in enumerate(work):
- for i in range(l):
- stt, end = i*2, i*2+1
- src, dst = pc[idx], pc[idx+1]
- comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
- Cat(src[end], Const(0, 1)))
- # decode operation length
- with m.If(op.data_len[2:4] == 0b00):
- # popcntb - pack 8x 4-bit answers into output
- for i in range(8):
- comb += o[i*8:i*8+4].eq(pc8[i])
- with m.Elif(op.data_len[3] == 0):
- # popcntw - pack 2x 5-bit answers into output
- for i in range(2):
- comb += o[i*32:i*32+5].eq(pc32[i])
- with m.Else():
- # popcntd - put 1x 6-bit answer into output
- comb += o.eq(popcnt[0])
-
- ###### parity #######
- with m.Case(InternalOp.OP_PRTY):
- # strange instruction which XORs together the LSBs of each byte
- par0 = Signal(reset_less=True)
- par1 = Signal(reset_less=True)
- comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor())
- comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor())
- with m.If(op.data_len[3] == 1):
- comb += o.eq(par0 ^ par1)
- with m.Else():
- comb += o[0].eq(par0)
- comb += o[32].eq(par1)
-
- ###### cntlz #######
- with m.Case(InternalOp.OP_CNTZ):
- XO = self.fields.FormX.XO[0:-1]
- m.submodules.countz = countz = ZeroCounter()
- comb += countz.rs_i.eq(a)
- comb += countz.is_32bit_i.eq(op.is_32bit)
- comb += countz.count_right_i.eq(XO[-1])
- comb += o.eq(countz.result_o)
-
- ###### bpermd #######
- # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt
-
- ###### sticky overflow and context, both pass-through #####
-
- comb += self.o.so.eq(self.i.so)
- comb += self.o.ctx.eq(self.i.ctx)
-
- return m
+++ /dev/null
-from nmigen import Signal, Const
-from ieee754.fpcommon.getop import FPPipeContext
-from soc.alu.pipe_data import IntegerData
-
-
-class ALUInputData(IntegerData):
- def __init__(self, pspec):
- super().__init__(pspec)
- self.a = Signal(64, reset_less=True) # RA
- self.b = Signal(64, reset_less=True) # RB/immediate
- self.so = Signal(reset_less=True)
- self.carry_in = Signal(reset_less=True)
-
- def __iter__(self):
- yield from super().__iter__()
- yield self.a
- yield self.b
- yield self.carry_in
- yield self.so
-
- def eq(self, i):
- lst = super().eq(i)
- return lst + [self.a.eq(i.a), self.b.eq(i.b),
- self.carry_in.eq(i.carry_in),
- self.so.eq(i.so)]
+++ /dev/null
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.alu.input_stage import ALUInputStage
-from soc.logical.main_stage import LogicalMainStage
-from soc.alu.output_stage import ALUOutputStage
-
-class LogicalStages(PipeModBaseChain):
- def get_chain(self):
- inp = ALUInputStage(self.pspec)
- main = LogicalMainStage(self.pspec)
- out = ALUOutputStage(self.pspec)
- return [inp, main, out]
-
-
-class LogicalBasePipe(ControlBase):
- def __init__(self, pspec):
- ControlBase.__init__(self)
- self.pipe1 = LogicalStages(pspec)
- self._eqs = self.connect([self.pipe1])
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
- m.submodules.pipe = self.pipe1
- m.d.comb += self._eqs
- return m
+++ /dev/null
-'''Empty until I write the unit test'''
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-
-
-from soc.logical.pipeline import LogicalBasePipe
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.alu.pipe_data import ALUPipeSpec
-import random
-
-
-class TestCase:
- def __init__(self, program, regs, sprs, name):
- self.program = program
- self.regs = regs
- self.sprs = sprs
- self.name = name
-
-def get_rec_width(rec):
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- return recwidth
-
-def set_alu_inputs(alu, dec2, sim):
- # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
- # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
- # and place it into data_i.b
-
- reg3_ok = yield dec2.e.read_reg3.ok
- reg1_ok = yield dec2.e.read_reg1.ok
- assert reg3_ok != reg1_ok
- if reg3_ok:
- data1 = yield dec2.e.read_reg3.data
- data1 = sim.gpr(data1).value
- elif reg1_ok:
- data1 = yield dec2.e.read_reg1.data
- data1 = sim.gpr(data1).value
- else:
- data1 = 0
-
- yield alu.p.data_i.a.eq(data1)
-
- # If there's an immediate, set the B operand to that
- reg2_ok = yield dec2.e.read_reg2.ok
- imm_ok = yield dec2.e.imm_data.imm_ok
- if imm_ok:
- data2 = yield dec2.e.imm_data.imm
- elif reg2_ok:
- data2 = yield dec2.e.read_reg2.data
- data2 = sim.gpr(data2).value
- else:
- data2 = 0
- yield alu.p.data_i.b.eq(data2)
-
-
-
-def set_extra_alu_inputs(alu, dec2, sim):
- carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
- yield alu.p.data_i.carry_in.eq(carry)
- so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
- yield alu.p.data_i.so.eq(so)
-
-
-# This test bench is a bit different than is usual. Initially when I
-# was writing it, I had all of the tests call a function to create a
-# device under test and simulator, initialize the dut, run the
-# simulation for ~2 cycles, and assert that the dut output what it
-# should have. However, this was really slow, since it needed to
-# create and tear down the dut and simulator for every test case.
-
-# Now, instead of doing that, every test case in ALUTestCase puts some
-# data into the test_data list below, describing the instructions to
-# be tested and the initial state. Once all the tests have been run,
-# test_data gets passed to TestRunner which then sets up the DUT and
-# simulator once, runs all the data through it, and asserts that the
-# results match the pseudocode sim at every cycle.
-
-# By doing this, I've reduced the time it takes to run the test suite
-# massively. Before, it took around 1 minute on my computer, now it
-# takes around 3 seconds
-
-test_data = []
-
-
-class LogicalTestCase(FHDLTestCase):
- def __init__(self, name):
- super().__init__(name)
- self.test_name = name
- def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
- tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
- test_data.append(tc)
-
- def test_rand(self):
- insns = ["and", "or", "xor"]
- for i in range(40):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1, 2"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- initial_regs[2] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_rand_imm_logical(self):
- insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"]
- for i in range(10):
- choice = random.choice(insns)
- imm = random.randint(0, (1<<16)-1)
- lst = [f"{choice} 3, 1, {imm}"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- @unittest.skip("broken")
- def test_cntz(self):
- insns = ["cntlzd", "cnttzd"]
- for i in range(10):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_parity(self):
- insns = ["prtyw", "prtyd"]
- for i in range(10):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_popcnt(self):
- insns = ["popcntb", "popcntw", "popcntd"]
- for i in range(10):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_cmpb(self):
- lst = ["cmpb 3, 1, 2"]
- initial_regs = [0] * 32
- initial_regs[1] = 0xdeadbeefcafec0de
- initial_regs[2] = 0xd0adb0000afec1de
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_ilang(self):
- rec = CompALUOpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- alu = LogicalBasePipe(pspec)
- vl = rtlil.convert(alu, ports=alu.ports())
- with open("logical_pipeline.il", "w") as f:
- f.write(vl)
-
-
-class TestRunner(FHDLTestCase):
- def __init__(self, test_data):
- super().__init__("run_all")
- self.test_data = test_data
-
- def run_all(self):
- m = Module()
- comb = m.d.comb
- instruction = Signal(32)
-
- pdecode = create_pdecode()
-
- m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
-
- rec = CompALUOpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- m.submodules.alu = alu = LogicalBasePipe(pspec)
-
- comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
- comb += alu.p.valid_i.eq(1)
- comb += alu.n.ready_i.eq(1)
- comb += pdecode2.dec.raw_opcode_in.eq(instruction)
- sim = Simulator(m)
-
- sim.add_clock(1e-6)
- def process():
- for test in self.test_data:
- print(test.name)
- program = test.program
- self.subTest(test.name)
- simulator = ISA(pdecode2, test.regs, test.sprs, 0)
- gen = program.generate_instructions()
- instructions = list(zip(gen, program.assembly.splitlines()))
-
- index = simulator.pc.CIA.value//4
- while index < len(instructions):
- ins, code = instructions[index]
-
- print("0x{:X}".format(ins & 0xffffffff))
- print(code)
-
- # ask the decoder to decode this binary data (endian'd)
- yield pdecode2.dec.bigendian.eq(0) # little / big?
- yield instruction.eq(ins) # raw binary instr.
- yield Settle()
- fn_unit = yield pdecode2.e.fn_unit
- self.assertEqual(fn_unit, Function.LOGICAL.value, code)
- yield from set_alu_inputs(alu, pdecode2, simulator)
- yield from set_extra_alu_inputs(alu, pdecode2, simulator)
- yield
- opname = code.split(' ')[0]
- yield from simulator.call(opname)
- index = simulator.pc.CIA.value//4
-
- vld = yield alu.n.valid_o
- while not vld:
- yield
- vld = yield alu.n.valid_o
- yield
- alu_out = yield alu.n.data_o.o
- out_reg_valid = yield pdecode2.e.write_reg.ok
- if out_reg_valid:
- write_reg_idx = yield pdecode2.e.write_reg.data
- expected = simulator.gpr(write_reg_idx).value
- print(f"expected {expected:x}, actual: {alu_out:x}")
- self.assertEqual(expected, alu_out, code)
- yield from self.check_extra_alu_outputs(alu, pdecode2,
- simulator)
-
- sim.add_sync_process(process)
- with sim.write_vcd("simulator.vcd", "simulator.gtkw",
- traces=[]):
- sim.run()
- def check_extra_alu_outputs(self, alu, dec2, sim):
- rc = yield dec2.e.rc.data
- if rc:
- cr_expected = sim.crl[0].get_range().value
- cr_actual = yield alu.n.data_o.cr0
- self.assertEqual(cr_expected, cr_actual)
-
-
-if __name__ == "__main__":
- unittest.main(exit=False)
- suite = unittest.TestSuite()
- suite.addTest(TestRunner(test_data))
-
- runner = unittest.TextTestRunner()
- runner.run(suite)
+++ /dev/null
-# Proof of correctness for partitioned equal signal combiner
-# Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
-
-from nmigen import (Module, Signal, Elaboratable, Mux, Cat, Repl,
- signed)
-from nmigen.asserts import Assert, AnyConst, Assume, Cover
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-
-from soc.shift_rot.main_stage import ShiftRotMainStage
-from soc.alu.pipe_data import ALUPipeSpec
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp
-import unittest
-
-
-# This defines a module to drive the device under test and assert
-# properties about its outputs
-class Driver(Elaboratable):
- def __init__(self):
- # inputs and outputs
- pass
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- rec = CompALUOpSubset()
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- comb += p.eq(AnyConst(width))
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=recwidth)
- m.submodules.dut = dut = ShiftRotMainStage(pspec)
-
- # convenience variables
- a = dut.i.rs
- b = dut.i.rb
- ra = dut.i.ra
- carry_in = dut.i.carry_in
- so_in = dut.i.so
- carry_out = dut.o.carry_out
- o = dut.o.o
-
- # setup random inputs
- comb += [a.eq(AnyConst(64)),
- b.eq(AnyConst(64)),
- carry_in.eq(AnyConst(1)),
- so_in.eq(AnyConst(1))]
-
- comb += dut.i.ctx.op.eq(rec)
-
- # Assert that op gets copied from the input to output
- for rec_sig in rec.ports():
- name = rec_sig.name
- dut_sig = getattr(dut.o.ctx.op, name)
- comb += Assert(dut_sig == rec_sig)
-
- # signed and signed/32 versions of input a
- a_signed = Signal(signed(64))
- a_signed_32 = Signal(signed(32))
- comb += a_signed.eq(a)
- comb += a_signed_32.eq(a[0:32])
-
- # main assertion of arithmetic operations
- with m.Switch(rec.insn_type):
- with m.Case(InternalOp.OP_SHL):
- comb += Assume(ra == 0)
- with m.If(rec.is_32bit):
- comb += Assert(o[0:32] == ((a << b[0:6]) & 0xffffffff))
- comb += Assert(o[32:64] == 0)
- with m.Else():
- comb += Assert(o == ((a << b[0:7]) & ((1 << 64)-1)))
- with m.Case(InternalOp.OP_SHR):
- comb += Assume(ra == 0)
- with m.If(~rec.is_signed):
- with m.If(rec.is_32bit):
- comb += Assert(o[0:32] == (a[0:32] >> b[0:6]))
- comb += Assert(o[32:64] == 0)
- with m.Else():
- comb += Assert(o == (a >> b[0:7]))
- with m.Else():
- with m.If(rec.is_32bit):
- comb += Assert(o[0:32] == (a_signed_32 >> b[0:6]))
- comb += Assert(o[32:64] == Repl(a[31], 32))
- with m.Else():
- comb += Assert(o == (a_signed >> b[0:7]))
-
- return m
-
-
-class ALUTestCase(FHDLTestCase):
- def test_formal(self):
- module = Driver()
- self.assertFormal(module, mode="bmc", depth=2)
- self.assertFormal(module, mode="cover", depth=2)
- def test_ilang(self):
- dut = Driver()
- vl = rtlil.convert(dut, ports=[])
- with open("main_stage.il", "w") as f:
- f.write(vl)
-
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-# This stage is intended to adjust the input data before sending it to
-# the acutal ALU. Things like handling inverting the input, carry_in
-# generation for subtraction, and handling of immediates should happen
-# here
-from nmigen import (Module, Signal, Cat, Const, Mux, Repl, signed,
- unsigned)
-from nmutil.pipemodbase import PipeModBase
-from soc.decoder.power_enums import InternalOp
-from soc.shift_rot.pipe_data import ShiftRotInputData
-from soc.decoder.power_enums import CryIn
-
-
-class ShiftRotInputStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "input")
-
- def ispec(self):
- return ShiftRotInputData(self.pspec)
-
- def ospec(self):
- return ShiftRotInputData(self.pspec)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- ##### operand A #####
-
- # operand a to be as-is or inverted
- a = Signal.like(self.i.ra)
-
- with m.If(self.i.ctx.op.invert_a):
- comb += a.eq(~self.i.ra)
- with m.Else():
- comb += a.eq(self.i.ra)
-
- comb += self.o.ra.eq(a)
- comb += self.o.rb.eq(self.i.rb)
- comb += self.o.rs.eq(self.i.rs)
-
-
- ##### carry-in #####
-
- # either copy incoming carry or set to 1/0 as defined by op
- with m.Switch(self.i.ctx.op.input_carry):
- with m.Case(CryIn.ZERO):
- comb += self.o.carry_in.eq(0)
- with m.Case(CryIn.ONE):
- comb += self.o.carry_in.eq(1)
- with m.Case(CryIn.CA):
- comb += self.o.carry_in.eq(self.i.carry_in)
-
- ##### sticky overflow and context (both pass-through) #####
-
- comb += self.o.so.eq(self.i.so)
- comb += self.o.ctx.eq(self.i.ctx)
-
- return m
+++ /dev/null
-# This stage is intended to do most of the work of executing shift
-# instructions, as well as carry and overflow generation. This module
-# however should not gate the carry or overflow, that's up to the
-# output stage
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
-from nmutil.pipemodbase import PipeModBase
-from soc.alu.pipe_data import ALUOutputData
-from soc.shift_rot.pipe_data import ShiftRotInputData
-from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-from soc.shift_rot.rotator import Rotator
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
-
-
-class ShiftRotMainStage(PipeModBase):
- def __init__(self, pspec):
- super().__init__(pspec, "main")
- self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
- self.fields.create_specs()
-
- def ispec(self):
- return ShiftRotInputData(self.pspec)
-
- def ospec(self):
- return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- # obtain me and mb fields from instruction.
- m_fields = self.fields.instrs['M']
- md_fields = self.fields.instrs['MD']
- mb = Signal(m_fields['MB'][0:-1].shape())
- me = Signal(m_fields['ME'][0:-1].shape())
- mb_extra = Signal(1, reset_less=True)
- comb += mb.eq(m_fields['MB'][0:-1])
- comb += me.eq(m_fields['ME'][0:-1])
- comb += mb_extra.eq(md_fields['mb'][0:-1][0])
-
- # set up microwatt rotator module
- m.submodules.rotator = rotator = Rotator()
- comb += [
- rotator.me.eq(me),
- rotator.mb.eq(mb),
- rotator.mb_extra.eq(mb_extra),
- rotator.rs.eq(self.i.rs),
- rotator.ra.eq(self.i.ra),
- rotator.shift.eq(self.i.rb),
- rotator.is_32bit.eq(self.i.ctx.op.is_32bit),
- rotator.arith.eq(self.i.ctx.op.is_signed),
- ]
-
- # instruction rotate type
- mode = Signal(3, reset_less=True)
- with m.Switch(self.i.ctx.op.insn_type):
- with m.Case(InternalOp.OP_SHL): comb += mode.eq(0b000)
- with m.Case(InternalOp.OP_SHR): comb += mode.eq(0b001) # R-shift
- with m.Case(InternalOp.OP_RLC): comb += mode.eq(0b110) # clear LR
- with m.Case(InternalOp.OP_RLCL): comb += mode.eq(0b010) # clear L
- with m.Case(InternalOp.OP_RLCR): comb += mode.eq(0b100) # clear R
-
- comb += Cat(rotator.right_shift,
- rotator.clear_left,
- rotator.clear_right).eq(mode)
-
- # outputs from the microwatt rotator module
- comb += [self.o.o.eq(rotator.result_o),
- self.o.carry_out.eq(rotator.carry_out_o)]
-
- ###### sticky overflow and context, both pass-through #####
-
- comb += self.o.so.eq(self.i.so)
- comb += self.o.ctx.eq(self.i.ctx)
-
- return m
+++ /dev/null
-from nmigen import (Elaboratable, Signal, Module)
-import math
-
-class MaskGen(Elaboratable):
- """MaskGen - create a diff mask
-
- example: x=5 --> a=0b11111
- y=3 --> b=0b00111
- o: 0b11000
- x=2 --> a=0b00011
- y=4 --> b=0b01111
- o: 0b10011
- """
- def __init__(self, width):
- self.width = width
- self.shiftwidth = math.ceil(math.log2(width))
- self.mb = Signal(self.shiftwidth, reset_less=True)
- self.me = Signal(self.shiftwidth, reset_less=True)
-
- self.o = Signal(width, reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- x = Signal.like(self.mb)
- y = Signal.like(self.mb)
-
- comb += x.eq(64 - self.mb)
- comb += y.eq(63 - self.me)
-
- mask_a = Signal.like(self.o)
- mask_b = Signal.like(self.o)
-
- comb += mask_a.eq((1<<x) - 1)
- comb += mask_b.eq((1<<y) - 1)
-
- with m.If(x > y):
- comb += self.o.eq(mask_a ^ mask_b)
- with m.Else():
- comb += self.o.eq(mask_a ^ ~mask_b)
-
-
- return m
-
- def ports(self):
- return [self.mb, self.me, self.o]
+++ /dev/null
-from nmigen import Signal, Const
-from nmutil.dynamicpipe import SimpleHandshakeRedir
-from soc.alu.alu_input_record import CompALUOpSubset
-from ieee754.fpcommon.getop import FPPipeContext
-from soc.alu.pipe_data import IntegerData
-
-
-class ShiftRotInputData(IntegerData):
- def __init__(self, pspec):
- super().__init__(pspec)
- self.ra = Signal(64, reset_less=True) # RA
- self.rs = Signal(64, reset_less=True) # RS
- self.rb = Signal(64, reset_less=True) # RB/immediate
- self.so = Signal(reset_less=True)
- self.carry_in = Signal(reset_less=True)
-
- def __iter__(self):
- yield from super().__iter__()
- yield self.ra
- yield self.rs
- yield self.rb
- yield self.carry_in
- yield self.so
-
- def eq(self, i):
- lst = super().eq(i)
- return lst + [self.rs.eq(i.rs), self.ra.eq(i.ra),
- self.rb.eq(i.rb),
- self.carry_in.eq(i.carry_in),
- self.so.eq(i.so)]
+++ /dev/null
-from nmutil.singlepipe import ControlBase
-from nmutil.pipemodbase import PipeModBaseChain
-from soc.shift_rot.input_stage import ShiftRotInputStage
-from soc.shift_rot.main_stage import ShiftRotMainStage
-from soc.alu.output_stage import ALUOutputStage
-
-class ShiftRotStages(PipeModBaseChain):
- def get_chain(self):
- inp = ShiftRotInputStage(self.pspec)
- main = ShiftRotMainStage(self.pspec)
- out = ALUOutputStage(self.pspec)
- return [inp, main, out]
-
-
-class ShiftRotBasePipe(ControlBase):
- def __init__(self, pspec):
- ControlBase.__init__(self)
- self.pipe1 = ShiftRotStages(pspec)
- self._eqs = self.connect([self.pipe1])
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
- m.submodules.pipe = self.pipe1
- m.d.comb += self._eqs
- return m
+++ /dev/null
-# Manual translation and adaptation of rotator.vhdl from microwatt into nmigen
-#
-
-from nmigen import (Elaboratable, Signal, Module, Const, Cat,
- unsigned, signed)
-from soc.shift_rot.rotl import ROTL
-
-# note BE bit numbering
-def right_mask(m, mask_begin):
- ret = Signal(64, name="right_mask", reset_less=True)
- with m.If(mask_begin <= 64):
- m.d.comb += ret.eq((1<<(64-mask_begin)) - 1)
- return ret
-
-def left_mask(m, mask_end):
- ret = Signal(64, name="left_mask", reset_less=True)
- m.d.comb += ret.eq(~((1<<(63-mask_end)) - 1))
- return ret
-
-
-class Rotator(Elaboratable):
- """Rotator: covers multiple POWER9 rotate functions
-
- supported modes:
-
- * sl[wd]
- * rlw*, rldic, rldicr, rldimi
- * rldicl, sr[wd]
- * sra[wd][i]
-
- use as follows:
-
- * shift = RB[0:7]
- * arith = 1 when is_signed
- * right_shift = 1 when insn_type is OP_SHR
- * clear_left = 1 when insn_type is OP_RLC or OP_RLCL
- * clear_right = 1 when insn_type is OP_RLC or OP_RLCR
- """
- def __init__(self):
- # input
- self.me = Signal(5, reset_less=True) # ME field
- self.mb = Signal(5, reset_less=True) # MB field
- self.mb_extra = Signal(1, reset_less=True) # extra bit of mb in MD-form
- self.ra = Signal(64, reset_less=True) # RA
- self.rs = Signal(64, reset_less=True) # RS
- self.ra = Signal(64, reset_less=True) # RA
- self.shift = Signal(7, reset_less=True) # RB[0:7]
- self.is_32bit = Signal(reset_less=True)
- self.right_shift = Signal(reset_less=True)
- self.arith = Signal(reset_less=True)
- self.clear_left = Signal(reset_less=True)
- self.clear_right = Signal(reset_less=True)
- # output
- self.result_o = Signal(64, reset_less=True)
- self.carry_out_o = Signal(reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- ra, rs = self.ra, self.rs
-
- # temporaries
- rot_in = Signal(64, reset_less=True)
- rot_count = Signal(6, reset_less=True)
- rot = Signal(64, reset_less=True)
- sh = Signal(7, reset_less=True)
- mb = Signal(7, reset_less=True)
- me = Signal(7, reset_less=True)
- mr = Signal(64, reset_less=True)
- ml = Signal(64, reset_less=True)
- output_mode = Signal(2, reset_less=True)
-
- # First replicate bottom 32 bits to both halves if 32-bit
- comb += rot_in[0:32].eq(rs[0:32])
- with m.If(self.is_32bit):
- comb += rot_in[32:64].eq(rs[0:32])
- with m.Else():
- comb += rot_in[32:64].eq(rs[32:64])
-
- shift_signed = Signal(signed(6))
- comb += shift_signed.eq(self.shift[0:6])
-
- # Negate shift count for right shifts
- with m.If(self.right_shift):
- comb += rot_count.eq(-shift_signed)
- with m.Else():
- comb += rot_count.eq(self.shift[0:6])
-
- # ROTL submodule
- m.submodules.rotl = rotl = ROTL(64)
- comb += rotl.a.eq(rot_in)
- comb += rotl.b.eq(rot_count)
- comb += rot.eq(rotl.o)
-
- # Trim shift count to 6 bits for 32-bit shifts
- comb += sh.eq(Cat(self.shift[0:6], self.shift[6] & ~self.is_32bit))
-
- # XXX errr... we should already have these, in Fields? oh well
- # Work out mask begin/end indexes (caution, big-endian bit numbering)
-
- # mask-begin (mb)
- with m.If(self.clear_left):
- comb += mb.eq(self.mb)
- with m.If(self.is_32bit):
- comb += mb[5:7].eq(Const(0b01, 2))
- with m.Else():
- comb += mb[5:7].eq(Cat(self.mb_extra, Const(0b0, 1)))
- with m.Elif(self.right_shift):
- # this is basically mb = sh + (is_32bit? 32: 0);
- comb += mb.eq(sh)
- with m.If(self.is_32bit):
- comb += mb[5:7].eq(Cat(~sh[5], sh[5]))
- with m.Else():
- comb += mb.eq(Cat(Const(0b0, 5), self.is_32bit, Const(0b0, 1)))
-
- # mask-end (me)
- with m.If(self.clear_right & self.is_32bit):
- # TODO: track down where this is. have to use fields.
- comb += me.eq(Cat(self.me, Const(0b01, 2)))
- with m.Elif(self.clear_right & ~self.clear_left):
- # this is me, have to use fields
- comb += me.eq(Cat(self.mb, self.mb_extra, Const(0b0, 1)))
- with m.Else():
- # effectively, 63 - sh
- comb += me.eq(Cat(~sh[0:6], sh[6]))
-
- # Calculate left and right masks
- comb += mr.eq(right_mask(m, mb))
- comb += ml.eq(left_mask(m, me))
-
- # Work out output mode
- # 00 for sl[wd]
- # 0w for rlw*, rldic, rldicr, rldimi, where w = 1 iff mb > me
- # 10 for rldicl, sr[wd]
- # 1z for sra[wd][i], z = 1 if rs is negative
- with m.If((self.clear_left & ~self.clear_right) | self.right_shift):
- comb += output_mode.eq(Cat(self.arith & rot_in[63], Const(1, 1)))
- with m.Else():
- mbgt = self.clear_right & (mb[0:6] > me[0:6])
- comb += output_mode.eq(Cat(mbgt, Const(0, 1)))
-
- # Generate output from rotated input and masks
- with m.Switch(output_mode):
- with m.Case(0b00):
- comb += self.result_o.eq((rot & (mr & ml)) | (ra & ~(mr & ml)))
- with m.Case(0b01):
- comb += self.result_o.eq((rot & (mr | ml)) | (ra & ~(mr | ml)))
- with m.Case(0b10):
- comb += self.result_o.eq(rot & mr)
- with m.Case(0b11):
- comb += self.result_o.eq(rot | ~mr)
- # Generate carry output for arithmetic shift right of -ve value
- comb += self.carry_out_o.eq(rs & ~ml)
-
- return m
-
+++ /dev/null
-from nmigen import (Elaboratable, Signal, Module)
-import math
-
-class ROTL(Elaboratable):
- def __init__(self, width):
- self.width = width
- self.shiftwidth = math.ceil(math.log2(width))
- self.a = Signal(width, reset_less=True)
- self.b = Signal(self.shiftwidth, reset_less=True)
-
- self.o = Signal(width, reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
-
- shl = Signal.like(self.a)
- shr = Signal.like(self.a)
-
- comb += shl.eq(self.a << self.b)
- comb += shr.eq(self.a >> (self.width - self.b))
-
- comb += self.o.eq(shl | shr)
- return m
+++ /dev/null
-from nmigen import Signal, Module
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-from soc.alu.maskgen import MaskGen
-from soc.decoder.helpers import MASK
-import random
-import unittest
-
-class MaskGenTestCase(FHDLTestCase):
- def test_maskgen(self):
- m = Module()
- comb = m.d.comb
- m.submodules.dut = dut = MaskGen(64)
- mb = Signal.like(dut.mb)
- me = Signal.like(dut.me)
- o = Signal.like(dut.o)
-
- comb += [
- dut.mb.eq(mb),
- dut.me.eq(me),
- o.eq(dut.o)]
-
- sim = Simulator(m)
-
- def process():
- for x in range(0, 64):
- for y in range(0, 64):
- yield mb.eq(x)
- yield me.eq(y)
- yield Delay(1e-6)
-
- expected = MASK(x, y)
- result = yield o
- self.assertEqual(expected, result)
-
- sim.add_process(process) # or sim.add_sync_process(process), see below
- with sim.write_vcd("maskgen.vcd", "maskgen.gtkw", traces=dut.ports()):
- sim.run()
-
- def test_ilang(self):
- dut = MaskGen(64)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("maskgen.il", "w") as f:
- f.write(vl)
-
-if __name__ == '__main__':
- unittest.main()
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-
-
-from soc.shift_rot.pipeline import ShiftRotBasePipe
-from soc.alu.alu_input_record import CompALUOpSubset
-from soc.alu.pipe_data import ALUPipeSpec
-import random
-
-class TestCase:
- def __init__(self, program, regs, sprs, name):
- self.program = program
- self.regs = regs
- self.sprs = sprs
- self.name = name
-
-def get_rec_width(rec):
- recwidth = 0
- # Setup random inputs for dut.op
- for p in rec.ports():
- width = p.width
- recwidth += width
- return recwidth
-
-def set_alu_inputs(alu, dec2, sim):
- inputs = []
- # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
- # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
- # and place it into data_i.b
-
- reg3_ok = yield dec2.e.read_reg3.ok
- if reg3_ok:
- reg3_sel = yield dec2.e.read_reg3.data
- data3 = sim.gpr(reg3_sel).value
- else:
- data3 = 0
- reg1_ok = yield dec2.e.read_reg1.ok
- if reg1_ok:
- reg1_sel = yield dec2.e.read_reg1.data
- data1 = sim.gpr(reg1_sel).value
- else:
- data1 = 0
- reg2_ok = yield dec2.e.read_reg2.ok
- imm_ok = yield dec2.e.imm_data.ok
- if reg2_ok:
- reg2_sel = yield dec2.e.read_reg2.data
- data2 = sim.gpr(reg2_sel).value
- elif imm_ok:
- data2 = yield dec2.e.imm_data.imm
- else:
- data2 = 0
-
- yield alu.p.data_i.ra.eq(data1)
- yield alu.p.data_i.rb.eq(data2)
- yield alu.p.data_i.rs.eq(data3)
-
-
-def set_extra_alu_inputs(alu, dec2, sim):
- carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
- yield alu.p.data_i.carry_in.eq(carry)
- so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
- yield alu.p.data_i.so.eq(so)
-
-
-# This test bench is a bit different than is usual. Initially when I
-# was writing it, I had all of the tests call a function to create a
-# device under test and simulator, initialize the dut, run the
-# simulation for ~2 cycles, and assert that the dut output what it
-# should have. However, this was really slow, since it needed to
-# create and tear down the dut and simulator for every test case.
-
-# Now, instead of doing that, every test case in ALUTestCase puts some
-# data into the test_data list below, describing the instructions to
-# be tested and the initial state. Once all the tests have been run,
-# test_data gets passed to TestRunner which then sets up the DUT and
-# simulator once, runs all the data through it, and asserts that the
-# results match the pseudocode sim at every cycle.
-
-# By doing this, I've reduced the time it takes to run the test suite
-# massively. Before, it took around 1 minute on my computer, now it
-# takes around 3 seconds
-
-test_data = []
-
-
-class ALUTestCase(FHDLTestCase):
- def __init__(self, name):
- super().__init__(name)
- self.test_name = name
- def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
- tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
- test_data.append(tc)
-
-
- def test_shift(self):
- insns = ["slw", "sld", "srw", "srd", "sraw", "srad"]
- for i in range(20):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1, 2"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- initial_regs[2] = random.randint(0, 63)
- print(initial_regs[1], initial_regs[2])
- self.run_tst_program(Program(lst), initial_regs)
-
-
- def test_shift_arith(self):
- lst = ["sraw 3, 1, 2"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- initial_regs[2] = random.randint(0, 63)
- print(initial_regs[1], initial_regs[2])
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_shift_once(self):
- lst = ["slw 3, 1, 4",
- "slw 3, 1, 2"]
- initial_regs = [0] * 32
- initial_regs[1] = 0x80000000
- initial_regs[2] = 0x40
- initial_regs[4] = 0x00
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_rlwinm(self):
- for i in range(10):
- mb = random.randint(0,31)
- me = random.randint(0,31)
- sh = random.randint(0,31)
- lst = [f"rlwinm 3, 1, {mb}, {me}, {sh}"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_rlwimi(self):
- lst = ["rlwimi 3, 1, 5, 20, 6"]
- initial_regs = [0] * 32
- initial_regs[1] = 0xdeadbeef
- initial_regs[3] = 0x12345678
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_rlwnm(self):
- lst = ["rlwnm 3, 1, 2, 20, 6"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- initial_regs[2] = random.randint(0, 63)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_rldicl(self):
- lst = ["rldicl 3, 1, 5, 20"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_rldicr(self):
- lst = ["rldicr 3, 1, 5, 20"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_rlc(self):
- insns = ["rldic", "rldicl", "rldicr"]
- for i in range(20):
- choice = random.choice(insns)
- sh = random.randint(0, 63)
- m = random.randint(0, 63)
- lst = [f"{choice} 3, 1, {sh}, {m}"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_ilang(self):
- rec = CompALUOpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- alu = ShiftRotBasePipe(pspec)
- vl = rtlil.convert(alu, ports=alu.ports())
- with open("pipeline.il", "w") as f:
- f.write(vl)
-
-
-class TestRunner(FHDLTestCase):
- def __init__(self, test_data):
- super().__init__("run_all")
- self.test_data = test_data
-
- def run_all(self):
- m = Module()
- comb = m.d.comb
- instruction = Signal(32)
-
- pdecode = create_pdecode()
-
- m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
-
- rec = CompALUOpSubset()
-
- pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- m.submodules.alu = alu = ShiftRotBasePipe(pspec)
-
- comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
- comb += alu.p.valid_i.eq(1)
- comb += alu.n.ready_i.eq(1)
- comb += pdecode2.dec.raw_opcode_in.eq(instruction)
- sim = Simulator(m)
-
- sim.add_clock(1e-6)
- def process():
- for test in self.test_data:
- print(test.name)
- program = test.program
- self.subTest(test.name)
- simulator = ISA(pdecode2, test.regs, test.sprs, 0)
- gen = program.generate_instructions()
- instructions = list(zip(gen, program.assembly.splitlines()))
-
- index = simulator.pc.CIA.value//4
- while index < len(instructions):
- ins, code = instructions[index]
-
- print("0x{:X}".format(ins & 0xffffffff))
- print(code)
-
- # ask the decoder to decode this binary data (endian'd)
- yield pdecode2.dec.bigendian.eq(0) # little / big?
- yield instruction.eq(ins) # raw binary instr.
- yield Settle()
- fn_unit = yield pdecode2.e.fn_unit
- self.assertEqual(fn_unit, Function.SHIFT_ROT.value)
- yield from set_alu_inputs(alu, pdecode2, simulator)
- yield from set_extra_alu_inputs(alu, pdecode2, simulator)
- yield
- opname = code.split(' ')[0]
- yield from simulator.call(opname)
- index = simulator.pc.CIA.value//4
-
- vld = yield alu.n.valid_o
- while not vld:
- yield
- vld = yield alu.n.valid_o
- yield
- alu_out = yield alu.n.data_o.o
- out_reg_valid = yield pdecode2.e.write_reg.ok
- if out_reg_valid:
- write_reg_idx = yield pdecode2.e.write_reg.data
- expected = simulator.gpr(write_reg_idx).value
- msg = f"expected {expected:x}, actual: {alu_out:x}"
- self.assertEqual(expected, alu_out, msg)
- yield from self.check_extra_alu_outputs(alu, pdecode2,
- simulator)
-
- sim.add_sync_process(process)
- with sim.write_vcd("simulator.vcd", "simulator.gtkw",
- traces=[]):
- sim.run()
- def check_extra_alu_outputs(self, alu, dec2, sim):
- rc = yield dec2.e.rc.data
- if rc:
- cr_expected = sim.crl[0].get_range().value
- cr_actual = yield alu.n.data_o.cr0
- self.assertEqual(cr_expected, cr_actual)
-
-
-if __name__ == "__main__":
- unittest.main(exit=False)
- suite = unittest.TestSuite()
- suite.addTest(TestRunner(test_data))
-
- runner = unittest.TextTestRunner()
- runner.run(suite)