X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fexperiment%2Fscore6600_multi.py;h=22d8e2d1a76d93d8b33318db42b18c1e66bce0f2;hb=65f1492b3d3531687ba90c5c537453cde0e6e5fd;hp=2c9b2febfeeb49c01985cddac82279d6e6af8ccc;hpb=979e000673b7ae1186536b08570bbbb122be9cf7;p=soc.git diff --git a/src/soc/experiment/score6600_multi.py b/src/soc/experiment/score6600_multi.py index 2c9b2feb..22d8e2d1 100644 --- a/src/soc/experiment/score6600_multi.py +++ b/src/soc/experiment/score6600_multi.py @@ -4,7 +4,7 @@ from nmigen.hdl.ast import unsigned from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory from nmigen.back.pysim import Delay -from soc.regfile.regfile import RegFileArray, treereduce +from soc.regfile.regfile import RegFileArray, ortreereduce from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix from soc.scoremulti.fu_reg_matrix import FURegDepMatrix from soc.scoreboard.global_pending import GlobalPending @@ -15,15 +15,19 @@ from soc.scoreboard.instruction_q import Instruction, InstructionQ from soc.scoreboard.memfu import MemFunctionUnits from soc.experiment.compalu import ComputationUnitNoDelay -from soc.experiment.compalu_multi import ComputationUnitNoDelay as MultiCompUnit -from soc.experiment.compldst import LDSTCompUnit -from soc.experiment.testmem import TestMemory +from soc.experiment.compalu_multi import MultiCompUnit, go_record +from soc.experiment.compldst_multi import LDSTCompUnit +from soc.experiment.compldst_multi import CompLDSTOpSubset +from soc.experiment.l0_cache import TstL0CacheBuffer -from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset +from soc.experiment.alu_hier import ALU, BranchALU +from soc.fu.alu.alu_input_record import CompALUOpSubset -from soc.decoder.power_enums import InternalOp, Function +from soc.decoder.power_enums import MicrOp, Function from soc.decoder.power_decoder import (create_pdecode) from soc.decoder.power_decoder2 import (PowerDecode2) +from soc.decoder.power_decoder2 import Decode2ToExecute1Type + from soc.simulator.program import Program @@ -85,10 +89,11 @@ class CompUnitsBase(Elaboratable): # inputs self.issue_i = Signal(n_units, reset_less=True) - self.go_rd0_i = Signal(n_units, reset_less=True) - self.go_rd1_i = Signal(n_units, reset_less=True) - self.go_rd_i = [self.go_rd0_i, self.go_rd1_i] # XXX HACK! - self.go_wr_i = Signal(n_units, reset_less=True) + self.rd0 = go_record(n_units, "rd0") + self.rd1 = go_record(n_units, "rd1") + self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK! + self.wr0 = go_record(n_units, "wr0") + self.go_wr_i = [self.wr0.go] self.shadown_i = Signal(n_units, reset_less=True) self.go_die_i = Signal(n_units, reset_less=True) if ldstmode: @@ -97,10 +102,8 @@ class CompUnitsBase(Elaboratable): # outputs self.busy_o = Signal(n_units, reset_less=True) - self.rd_rel0_o = Signal(n_units, reset_less=True) - self.rd_rel1_o = Signal(n_units, reset_less=True) - self.rd_rel_o = [self.rd_rel0_o, self.rd_rel1_o] # HACK! - self.req_rel_o = Signal(n_units, reset_less=True) + self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK! + self.req_rel_o = self.wr0.rel self.done_o = Signal(n_units, reset_less=True) if ldstmode: self.ld_o = Signal(n_units, reset_less=True) # op is LD @@ -140,36 +143,24 @@ class CompUnitsBase(Elaboratable): done_l.append(alu.done_o) shadow_l.append(alu.shadown_i) godie_l.append(alu.go_die_i) - print (alu, alu.rd_rel_o) - if isinstance(alu, LDSTCompUnit) or \ - isinstance(alu, CompUnitBR) or \ - isinstance(alu, ComputationUnitNoDelay): - dummy1 = Signal(64, reset_less=True) - dummy2 = Signal(64, reset_less=True) - dummy3 = Signal(64, reset_less=True) - go_wr_l.append(dummy1) - go_rd_l0.append(dummy2) - go_rd_l1.append(dummy3) - else: - rd_rel0_l.append(alu.rd_rel_o[0]) - rd_rel1_l.append(alu.rd_rel_o[1]) - go_wr_l.append(alu.go_wr_i[0]) - go_rd_l0.append(alu.go_rd_i[0]) - go_rd_l1.append(alu.go_rd_i[1]) - rd_rel0_l.append(Const(0, 64)) # FIXME - rd_rel1_l.append(Const(0, 64)) # FIXME + print(alu, "rel", alu.req_rel_o, alu.rd_rel_o) + rd_rel0_l.append(alu.rd_rel_o[0]) + rd_rel1_l.append(alu.rd_rel_o[1]) + go_wr_l.append(alu.go_wr_i) + go_rd_l0.append(alu.go_rd_i[0]) + go_rd_l1.append(alu.go_rd_i[1]) issue_l.append(alu.issue_i) busy_l.append(alu.busy_o) - comb += self.rd_rel0_o.eq(Cat(*rd_rel0_l)) - comb += self.rd_rel1_o.eq(Cat(*rd_rel1_l)) + comb += self.rd0.rel.eq(Cat(*rd_rel0_l)) + comb += self.rd1.rel.eq(Cat(*rd_rel1_l)) comb += self.req_rel_o.eq(Cat(*req_rel_l)) comb += self.done_o.eq(Cat(*done_l)) comb += self.busy_o.eq(Cat(*busy_l)) comb += Cat(*godie_l).eq(self.go_die_i) comb += Cat(*shadow_l).eq(self.shadown_i) - comb += Cat(*go_wr_l).eq(self.go_wr_i) - comb += Cat(*go_rd_l0).eq(self.go_rd0_i) - comb += Cat(*go_rd_l1).eq(self.go_rd1_i) + comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO + comb += Cat(*go_rd_l0).eq(self.rd0.go) + comb += Cat(*go_rd_l1).eq(self.rd1.go) comb += Cat(*issue_l).eq(self.issue_i) # connect data register input/output @@ -179,10 +170,10 @@ class CompUnitsBase(Elaboratable): # protected by a single go_wr. multi-issue requires a bus # to be inserted here. if self.units: - data_o = treereduce(self.units, "data_o") + data_o = ortreereduce(self.units, "data_o") comb += self.data_o.eq(data_o) if self.ldstmode: - addr_o = treereduce(self.units, "addr_o") + addr_o = ortreereduce(self.units, "addr_o") comb += self.addr_o.eq(addr_o) for i, alu in enumerate(self.units): @@ -223,7 +214,7 @@ class CompUnitsBase(Elaboratable): class CompUnitLDSTs(CompUnitsBase): - def __init__(self, rwid, opwid, n_ldsts, mem): + def __init__(self, rwid, opwid, n_ldsts, l0): """ Inputs: * :rwid: bit width of register file(s) - both FP and INT @@ -232,18 +223,13 @@ class CompUnitLDSTs(CompUnitsBase): self.opwid = opwid # inputs - self.oper_i = Signal(opwid, reset_less=True) - self.imm_i = Signal(rwid, reset_less=True) - - # Int ALUs - self.alus = [] - for i in range(n_ldsts): - self.alus.append(ALU(rwid)) + self.op = CompLDSTOpSubset("cul_i") + # LD/ST Units units = [] - for alu in self.alus: - aluopwid = 4 # see compldst.py for "internal" opcode - units.append(LDSTCompUnit(rwid, aluopwid, alu, mem)) + for i in range(n_ldsts): + pi = l0.l0.dports[i].pi + units.append(LDSTCompUnit(pi, rwid, awid=48)) CompUnitsBase.__init__(self, rwid, units, ldstmode=True) @@ -251,11 +237,9 @@ class CompUnitLDSTs(CompUnitsBase): m = CompUnitsBase.elaborate(self, platform) comb = m.d.comb - # hand the same operation to all units, 4 lower bits though - for alu in self.units: - comb += alu.oper_i[0:4].eq(self.oper_i) - comb += alu.imm_i.eq(self.imm_i) - comb += alu.isalu_i.eq(0) + # hand the same operation to all units + for ldst in self.units: + comb += ldst.oper_i.eq(self.op) return m @@ -272,8 +256,6 @@ class CompUnitALUs(CompUnitsBase): # inputs self.op = CompALUOpSubset("cua_i") - self.oper_i = Signal(opwid, reset_less=True) - self.imm_i = Signal(rwid, reset_less=True) # Int ALUs alus = [] @@ -283,7 +265,7 @@ class CompUnitALUs(CompUnitsBase): units = [] for alu in alus: aluopwid = 3 # extra bit for immediate mode - units.append(MultiCompUnit(rwid, alu)) + units.append(MultiCompUnit(rwid, alu, CompALUOpSubset)) CompUnitsBase.__init__(self, rwid, units) @@ -294,8 +276,6 @@ class CompUnitALUs(CompUnitsBase): # hand the subset of operation to ALUs for alu in self.units: comb += alu.oper_i.eq(self.op) - #comb += alu.oper_i[0:3].eq(self.oper_i) - #comb += alu.imm_i.eq(self.imm_i) return m @@ -314,13 +294,14 @@ class CompUnitBR(CompUnitsBase): self.opwid = opwid # inputs + self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset self.oper_i = Signal(opwid, reset_less=True) self.imm_i = Signal(rwid, reset_less=True) # Branch ALU and CU self.bgt = BranchALU(rwid) aluopwid = 3 # extra bit for immediate mode - self.br1 = ComputationUnitNoDelay(rwid, self.bgt) + self.br1 = MultiCompUnit(rwid, self.bgt, CompALUOpSubset) CompUnitsBase.__init__(self, rwid, [self.br1]) def elaborate(self, platform): @@ -329,6 +310,7 @@ class CompUnitBR(CompUnitsBase): # hand the same operation to all units for alu in self.units: + # comb += alu.oper_i.eq(self.op) # TODO comb += alu.oper_i.eq(self.oper_i) #comb += alu.imm_i.eq(self.imm_i) @@ -353,30 +335,34 @@ class FunctionUnits(Elaboratable): rsel = [] rd = [] for i in range(n_src): - j = i + 1 # name numbering to match src1/src2 + j = i + 1 # name numbering to match src1/src2 src.append(Signal(n_reg, name="src%d" % j, reset_less=True)) - rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True)) + rsel.append(Signal(n_reg, name="src%d_rsel_o" % + j, reset_less=True)) rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True)) dst = [] dsel = [] wr = [] for i in range(n_dst): - j = i + 1 # name numbering to match src1/src2 + j = i + 1 # name numbering to match src1/src2 dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True)) - dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True)) + dsel.append(Signal(n_reg, name="dst%d_rsel_o" % + j, reset_less=True)) wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True)) wpnd = [] pend = [] for i in range(nf): - j = i + 1 # name numbering to match src1/src2 - pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True)) - wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True)) + j = i + 1 # name numbering to match src1/src2 + pend.append(Signal(nf, name="rd_src%d_pend_o" % + j, reset_less=True)) + wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % + j, reset_less=True)) self.dest_i = Array(dst) # Dest in (top) self.src_i = Array(src) # oper in (top) # for Register File Select Lines (horizontal), per-reg - self.dst_rsel_o = Array(dsel) # dest reg (bot) + self.dst_rsel_o = Array(dsel) # dest reg (bot) self.src_rsel_o = Array(rsel) # src reg (bot) self.go_rd_i = Array(rd) @@ -418,13 +404,13 @@ class FunctionUnits(Elaboratable): # Connect function issue / arrays, and dest/src1/src2 for i in range(self.n_src): - print (i, self.go_rd_i, intfudeps.go_rd_i) + print(i, self.go_rd_i, intfudeps.go_rd_i) comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i]) comb += intregdeps.src_i[i].eq(self.src_i[i]) comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i]) comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i]) for i in range(self.n_dst): - print (i, self.go_wr_i, intfudeps.go_wr_i) + print(i, self.go_wr_i, intfudeps.go_wr_i) comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i]) comb += intregdeps.dest_i[i].eq(self.dest_i[i]) comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i]) @@ -450,18 +436,17 @@ class Scoreboard(Elaboratable): self.fpregs = RegFileArray(rwid, n_regs) # Memory (test for now) - self.mem = TestMemory(self.rwid, 8) # not too big, takes too long + self.l0 = TstL0CacheBuffer() # issue q needs to get at these self.aluissue = IssueUnitGroup(2) self.lsissue = IssueUnitGroup(2) self.brissue = IssueUnitGroup(1) # and these - self.alu_op = CompALUOpSubset("alu") + self.instr = Decode2ToExecute1Type("sc_instr") self.br_oper_i = Signal(4, reset_less=True) self.br_imm_i = Signal(rwid, reset_less=True) self.ls_oper_i = Signal(4, reset_less=True) - self.ls_imm_i = Signal(rwid, reset_less=True) # inputs self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in @@ -488,7 +473,7 @@ class Scoreboard(Elaboratable): m.submodules.intregs = self.intregs m.submodules.fpregs = self.fpregs - m.submodules.mem = mem = self.mem + m.submodules.l0 = l0 = self.l0 # register ports int_dest = self.intregs.write_port("dest") @@ -506,7 +491,7 @@ class Scoreboard(Elaboratable): # LDST Comp Units n_ldsts = 2 - cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem) + cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0) # Comp Units m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub]) @@ -573,11 +558,10 @@ class Scoreboard(Elaboratable): ] # take these to outside (issue needs them) - comb += cua.op.eq(self.alu_op) + comb += cua.op.eq_from_execute1(self.instr) comb += cub.oper_i.eq(self.br_oper_i) comb += cub.imm_i.eq(self.br_imm_i) - comb += cul.oper_i.eq(self.ls_oper_i) - comb += cul.imm_i.eq(self.ls_imm_i) + comb += cul.op.eq_from_execute1(self.instr) # TODO: issueunit.f (FP) @@ -596,7 +580,9 @@ class Scoreboard(Elaboratable): # Memory Function Unit # --------- reset_b = Signal(cul.n_units, reset_less=True) - sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i) + # XXX was cul.go_wr_i not done.o + # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i) + sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i) comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel @@ -614,7 +600,7 @@ class Scoreboard(Elaboratable): # TODO: adr_rel_o needs to go into L1 Cache. for now, # just immediately activate go_adr - comb += cul.go_ad_i.eq(cul.adr_rel_o) + sync += cul.go_ad_i.eq(cul.adr_rel_o) # connect up address data comb += memfus.addrs_i[0].eq(cul.units[0].addr_o) @@ -671,13 +657,15 @@ class Scoreboard(Elaboratable): # --------- int_rd_o = intfus.readable_o rrel_o = cu.rd_rel_o + rqrl_o = cu.req_rel_o for i in range(fu_n_src): comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus]) - comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[i][0:n_intfus]) + comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus]) int_wr_o = intfus.writable_o for i in range(fu_n_dst): - comb += ipick1.req_rel_i[i][0:n_intfus].eq(cu.done_o[0:n_intfus]) - comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[i][0:n_intfus]) + # XXX FIXME: rqrl_o[i] here + comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus]) + comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus]) # --------- # Shadow Matrix @@ -872,28 +860,21 @@ class IssueToScoreboard(Elaboratable): comb += sc.int_src1_i.eq(src1) comb += sc.int_src2_i.eq(src2) comb += sc.reg_enable_i.eq(1) # enable the regfile + comb += sc.instr.eq(instr) # choose a Function-Unit-Group with m.If(fu == Function.ALU): # alu - comb += sc.alu_op.eq_from_execute1(instr) - comb += sc.aluissue.insn_i.eq(1) + comb += sc.aluissue.insn_i.eq(1) # enable alu issue comb += wait_issue_alu.eq(1) + with m.Elif(fu == Function.LDST): # ld/st + comb += sc.lsissue.insn_i.eq(1) # enable ldst issue + comb += wait_issue_ls.eq(1) + with m.Elif((op & (0x3 << 2)) != 0): # branch comb += sc.br_oper_i.eq(Cat(op[0:2], opi)) comb += sc.br_imm_i.eq(imm) comb += sc.brissue.insn_i.eq(1) comb += wait_issue_br.eq(1) - with m.Elif((op & (0x3 << 4)) != 0): # ld/st - # see compldst.py - # bit 0: ADD/SUB - # bit 1: immed - # bit 4: LD - # bit 5: ST - comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6])) - comb += sc.ls_imm_i.eq(imm) - comb += sc.lsissue.insn_i.eq(1) - comb += wait_issue_ls.eq(1) - # XXX TODO # these indicate that the instruction is to be made # shadow-dependent on @@ -935,7 +916,7 @@ def power_instr_q(dut, pdecode2, ins, code): def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest, branch_success, branch_fail): instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest, - 'imm_data': (imm, op_imm), + 'imm_data': (imm, op_imm), 'read_reg1': src1, 'read_reg2': src2}] sendlen = 1 @@ -949,11 +930,11 @@ def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest, yield dut.data_i[idx].insn_type.eq(insn_type) yield dut.data_i[idx].fn_unit.eq(fn_unit) yield dut.data_i[idx].read_reg1.data.eq(reg1) - yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO + yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO yield dut.data_i[idx].read_reg2.data.eq(reg2) - yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO + yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO yield dut.data_i[idx].write_reg.data.eq(dest) - yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO + yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO yield dut.data_i[idx].imm_data.data.eq(imm) yield dut.data_i[idx].imm_data.ok.eq(op_imm) di = yield dut.data_i[idx] @@ -1172,21 +1153,30 @@ def power_sim(m, dut, pdecode2, instruction, alusim): for i in range(1, dut.n_regs): #val = randint(0, (1<