From 4de4739d2e1cca5a84e888657f41fd335cdab9ce Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 9 Apr 2020 13:57:29 +0100 Subject: [PATCH] experiment morphing ALU to take subset of Decode2ToExecute1 --- src/soc/experiment/alu_hier.py | 154 +++++++++++++++++++++++++++++---- src/soc/experiment/compalu.py | 11 +-- 2 files changed, 139 insertions(+), 26 deletions(-) diff --git a/src/soc/experiment/alu_hier.py b/src/soc/experiment/alu_hier.py index 27331dfd..9fd21c49 100644 --- a/src/soc/experiment/alu_hier.py +++ b/src/soc/experiment/alu_hier.py @@ -10,22 +10,92 @@ only one cycle (sync) """ from nmigen import Elaboratable, Signal, Module, Const, Mux +from nmigen.hdl.rec import Record, Layout from nmigen.cli import main from nmigen.cli import verilog, rtlil -from soc.decoder.power_enums import InternalOp +from nmigen.compat.sim import run_simulation + +from soc.decoder.power_enums import InternalOp, CryIn import operator +class CompALUOpSubset(Record): + """CompALUOpSubset + + a copy of the relevant subset information from Decode2Execute1Type + needed for ALU operations. + """ + def __init__(self): + layout = (('insn_type', InternalOp), + ('nia', 64), + ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))), + #'cr = Signal(32, reset_less=True) # NO: this is from the CR SPR + #'xerc = XerBits() # NO: this is from the XER SPR + ('lk', 1), + ('rc', Layout((("rc", 1), ("rc_ok", 1)))), + ('oe', Layout((("oe", 1), ("oe_ok", 1)))), + ('invert_a', 1), + ('invert_out', 1), + ('input_carry', CryIn), + ('output_carry', 1), + ('input_cr', 1), + ('output_cr', 1), + ('is_32bit', 1), + ('is_signed', 1), + ('byte_reverse', 1), + ('sign_extend', 1)) + + Record.__init__(self, Layout(layout)) + + # grrr. Record does not have kwargs + self.insn_type.reset_less = True + self.nia.reset_less = True + #self.cr = Signal(32, reset_less = True + #self.xerc = XerBits( + self.lk.reset_less = True + self.invert_a.reset_less = True + self.invert_out.reset_less = True + self.input_carry.reset_less = True + self.output_carry.reset_less = True + self.input_cr.reset_less = True + self.output_cr.reset_less = True + self.is_32bit.reset_less = True + self.is_signed.reset_less = True + self.byte_reverse.reset_less = True + self.sign_extend.reset_less = True + + def ports(self): + return [self.insn_type, + self.nia, + #self.cr, + #self.xerc, + self.lk, + self.invert_a, + self.invert_out, + self.input_carry, + self.output_carry, + self.input_cr, + self.output_cr, + self.is_32bit, + self.is_signed, + self.byte_reverse, + self.sign_extend, + ] + class Adder(Elaboratable): def __init__(self, width): + self.invert_a = Signal() self.a = Signal(width) self.b = Signal(width) self.o = Signal(width) def elaborate(self, platform): m = Module() - m.d.comb += self.o.eq(self.a + self.b) + with m.If(self.invert_a): + m.d.comb += self.o.eq((~self.a) + self.b) + with m.Else(): + m.d.comb += self.o.eq(self.a + self.b) return m @@ -75,7 +145,7 @@ class ALU(Elaboratable): self.n_ready_i = Signal() self.n_valid_o = Signal() self.counter = Signal(4) - self.op = Signal(InternalOp) + self.op = CompALUOpSubset() self.a = Signal(width) self.b = Signal(width) self.o = Signal(width) @@ -84,21 +154,23 @@ class ALU(Elaboratable): def elaborate(self, platform): m = Module() add = Adder(self.width) - sub = Subtractor(self.width) mul = Multiplier(self.width) shf = Shifter(self.width) m.submodules.add = add - m.submodules.sub = sub m.submodules.mul = mul m.submodules.shf = shf # really should not activate absolutely all ALU inputs like this - for mod in [add, sub, mul, shf]: + for mod in [add, mul, shf]: m.d.comb += [ mod.a.eq(self.a), mod.b.eq(self.b), ] + + # pass invert (and carry later) + m.d.comb += add.invert_a.eq(self.op.invert_a) + go_now = Signal(reset_less=True) # testing no-delay ALU with m.If(self.p_valid_i): @@ -108,26 +180,28 @@ class ALU(Elaboratable): m.d.sync += self.p_ready_o.eq(1) # as this is a "fake" pipeline, just grab the output right now - with m.If(self.op == InternalOp.OP_ADD): + with m.If(self.op.insn_type == InternalOp.OP_ADD): m.d.sync += self.o.eq(add.o) - with m.Elif(self.op == InternalOp.OP_MUL_L64): + with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64): m.d.sync += self.o.eq(mul.o) - with m.Elif(self.op == InternalOp.OP_SHR): + with m.Elif(self.op.insn_type == InternalOp.OP_SHR): m.d.sync += self.o.eq(shf.o) # TODO: SUB - with m.Switch(self.op): - for i, mod in enumerate([add, sub, mul, shf]): - with m.Case(i): - m.d.sync += self.o.eq(mod.o) - with m.If(self.op == 2): # MUL, to take 5 instructions + # NOTE: all of these are fake, just something to test + + # MUL, to take 5 instructions + with m.If(self.op.insn_type == InternalOp.OP_MUL_L64.value): m.d.sync += self.counter.eq(5) - with m.Elif(self.op == 3): # SHIFT to take 7 + # SHIFT to take 7 + with m.Elif(self.op.insn_type == InternalOp.OP_SHR.value): m.d.sync += self.counter.eq(7) - with m.Elif(self.op == 1): # SUB to take 1, straight away + # SUB to take 1, straight away + with m.If(self.op.insn_type == InternalOp.OP_ADD.value): m.d.sync += self.counter.eq(1) m.d.comb += go_now.eq(1) - with m.Else(): # ADD to take 2 + # ADD to take 2 + with m.Else(): m.d.sync += self.counter.eq(2) with m.Else(): # input says no longer valid, so drop ready as well. @@ -151,7 +225,7 @@ class ALU(Elaboratable): return m def __iter__(self): - yield self.op + yield from self.op.ports() yield self.a yield self.b yield self.o @@ -247,13 +321,55 @@ class BranchALU(Elaboratable): def ports(self): return list(self) +def run_op(dut, a, b, op, inv_a=0): + yield dut.a.eq(a) + yield dut.b.eq(b) + yield dut.op.insn_type.eq(op) + yield dut.op.invert_a.eq(inv_a) + yield dut.n_ready_i.eq(0) + yield dut.p_valid_i.eq(1) + yield + while True: + yield + n_valid_o = yield dut.n_valid_o + if n_valid_o: + break + yield -if __name__ == "__main__": + result = yield dut.o + yield dut.p_valid_i.eq(0) + yield dut.n_ready_i.eq(0) + yield + + return result + + +def alu_sim(dut): + result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD) + print ("alu_sim add", result) + assert (result == 8) + + result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64) + print ("alu_sim mul", result) + assert (result == 6) + + result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1) + print ("alu_sim add-inv", result) + assert (result == 65533) + + +def test_alu(): alu = ALU(width=16) + run_simulation(alu, alu_sim(alu), vcd_name='test_alusim.vcd') + vl = rtlil.convert(alu, ports=alu.ports()) with open("test_alu.il", "w") as f: f.write(vl) + +if __name__ == "__main__": + test_alu() + alu = BranchALU(width=16) vl = rtlil.convert(alu, ports=alu.ports()) with open("test_branch_alu.il", "w") as f: diff --git a/src/soc/experiment/compalu.py b/src/soc/experiment/compalu.py index 55e2799f..9fcced73 100644 --- a/src/soc/experiment/compalu.py +++ b/src/soc/experiment/compalu.py @@ -6,6 +6,7 @@ from nmutil.latch import SRLatch, latchregister from soc.decoder.power_decoder2 import Data from soc.decoder.power_enums import InternalOp +from alu_hier import CompALUOpSubset """ Computation Unit (aka "ALU Manager"). @@ -39,13 +40,9 @@ from soc.decoder.power_enums import InternalOp register is placed combinatorially onto the output, and (2) the req_l latch is cleared, busy is dropped, and the Comp Unit is back through its revolving door to do another task. - - Notes on oper_i: - - * bits[0:2] are for the ALU, add=0, sub=1, shift=2, mul=3 - * bit[2] are the immediate (bit[2]=1 == immediate mode) """ + class ComputationUnitNoDelay(Elaboratable): def __init__(self, rwid, e, alu): self.rwid = rwid @@ -60,8 +57,8 @@ class ComputationUnitNoDelay(Elaboratable): self.go_die_i = Signal() # go die (reset) # operation / data input - self.oper_i = e.insn_type # operand - self.imm_i = e.imm_data # immediate in + self.oper_i = CompALUOpSubset() # operand + self.imm_i = self.oper_i.imm_data # immediate in self.src1_i = Signal(rwid, reset_less=True) # oper1 in self.src2_i = Signal(rwid, reset_less=True) # oper2 in -- 2.30.2