from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
from nmigen.back.pysim import Delay
-from soc.regfile.regfile import RegFileArray, treereduce
+from soc.regfile.regfile import RegFileArray, ortreereduce
from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
from soc.scoreboard.global_pending import GlobalPending
from soc.scoreboard.memfu import MemFunctionUnits
from soc.experiment.compalu import ComputationUnitNoDelay
-from soc.experiment.compalu_multi import ComputationUnitNoDelay as MultiCompUnit
-from soc.experiment.compldst import LDSTCompUnit
-from soc.experiment.testmem import TestMemory
+from soc.experiment.compalu_multi import MultiCompUnit, go_record
+from soc.experiment.compldst_multi import LDSTCompUnit
+from soc.experiment.compldst_multi import CompLDSTOpSubset
+from soc.experiment.l0_cache import TstL0CacheBuffer
-from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
+from soc.experiment.alu_hier import ALU, BranchALU
+from soc.fu.alu.alu_input_record import CompALUOpSubset
-from soc.decoder.power_enums import InternalOp, Function
+from soc.decoder.power_enums import MicrOp, Function
from soc.decoder.power_decoder import (create_pdecode)
from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_decoder2 import Decode2ToExecute1Type
+
from soc.simulator.program import Program
# inputs
self.issue_i = Signal(n_units, reset_less=True)
- self.go_rd0_i = Signal(n_units, reset_less=True)
- self.go_rd1_i = Signal(n_units, reset_less=True)
- self.go_rd_i = [self.go_rd0_i, self.go_rd1_i] # XXX HACK!
- self.go_wr_i = Signal(n_units, reset_less=True)
+ self.rd0 = go_record(n_units, "rd0")
+ self.rd1 = go_record(n_units, "rd1")
+ self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
+ self.wr0 = go_record(n_units, "wr0")
+ self.go_wr_i = [self.wr0.go]
self.shadown_i = Signal(n_units, reset_less=True)
self.go_die_i = Signal(n_units, reset_less=True)
if ldstmode:
# outputs
self.busy_o = Signal(n_units, reset_less=True)
- self.rd_rel0_o = Signal(n_units, reset_less=True)
- self.rd_rel1_o = Signal(n_units, reset_less=True)
- self.rd_rel_o = [self.rd_rel0_o, self.rd_rel1_o] # HACK!
- self.req_rel_o = Signal(n_units, reset_less=True)
+ self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
+ self.req_rel_o = self.wr0.rel
self.done_o = Signal(n_units, reset_less=True)
if ldstmode:
self.ld_o = Signal(n_units, reset_less=True) # op is LD
done_l.append(alu.done_o)
shadow_l.append(alu.shadown_i)
godie_l.append(alu.go_die_i)
- print (alu, alu.rd_rel_o)
- if isinstance(alu, LDSTCompUnit) or \
- isinstance(alu, CompUnitBR) or \
- isinstance(alu, ComputationUnitNoDelay):
- dummy1 = Signal(64, reset_less=True)
- dummy2 = Signal(64, reset_less=True)
- dummy3 = Signal(64, reset_less=True)
- go_wr_l.append(dummy1)
- go_rd_l0.append(dummy2)
- go_rd_l1.append(dummy3)
- else:
- rd_rel0_l.append(alu.rd_rel_o[0])
- rd_rel1_l.append(alu.rd_rel_o[1])
- go_wr_l.append(alu.go_wr_i[0])
- go_rd_l0.append(alu.go_rd_i[0])
- go_rd_l1.append(alu.go_rd_i[1])
- rd_rel0_l.append(Const(0, 64)) # FIXME
- rd_rel1_l.append(Const(0, 64)) # FIXME
+ print(alu, "rel", alu.req_rel_o, alu.rd_rel_o)
+ rd_rel0_l.append(alu.rd_rel_o[0])
+ rd_rel1_l.append(alu.rd_rel_o[1])
+ go_wr_l.append(alu.go_wr_i)
+ go_rd_l0.append(alu.go_rd_i[0])
+ go_rd_l1.append(alu.go_rd_i[1])
issue_l.append(alu.issue_i)
busy_l.append(alu.busy_o)
- comb += self.rd_rel0_o.eq(Cat(*rd_rel0_l))
- comb += self.rd_rel1_o.eq(Cat(*rd_rel1_l))
+ comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
+ comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
comb += self.req_rel_o.eq(Cat(*req_rel_l))
comb += self.done_o.eq(Cat(*done_l))
comb += self.busy_o.eq(Cat(*busy_l))
comb += Cat(*godie_l).eq(self.go_die_i)
comb += Cat(*shadow_l).eq(self.shadown_i)
- comb += Cat(*go_wr_l).eq(self.go_wr_i)
- comb += Cat(*go_rd_l0).eq(self.go_rd0_i)
- comb += Cat(*go_rd_l1).eq(self.go_rd1_i)
+ comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
+ comb += Cat(*go_rd_l0).eq(self.rd0.go)
+ comb += Cat(*go_rd_l1).eq(self.rd1.go)
comb += Cat(*issue_l).eq(self.issue_i)
# connect data register input/output
# protected by a single go_wr. multi-issue requires a bus
# to be inserted here.
if self.units:
- data_o = treereduce(self.units, "data_o")
+ data_o = ortreereduce(self.units, "data_o")
comb += self.data_o.eq(data_o)
if self.ldstmode:
- addr_o = treereduce(self.units, "addr_o")
+ addr_o = ortreereduce(self.units, "addr_o")
comb += self.addr_o.eq(addr_o)
for i, alu in enumerate(self.units):
class CompUnitLDSTs(CompUnitsBase):
- def __init__(self, rwid, opwid, n_ldsts, mem):
+ def __init__(self, rwid, opwid, n_ldsts, l0):
""" Inputs:
* :rwid: bit width of register file(s) - both FP and INT
self.opwid = opwid
# inputs
- self.oper_i = Signal(opwid, reset_less=True)
- self.imm_i = Signal(rwid, reset_less=True)
-
- # Int ALUs
- self.alus = []
- for i in range(n_ldsts):
- self.alus.append(ALU(rwid))
+ self.op = CompLDSTOpSubset("cul_i")
+ # LD/ST Units
units = []
- for alu in self.alus:
- aluopwid = 4 # see compldst.py for "internal" opcode
- units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
+ for i in range(n_ldsts):
+ pi = l0.l0.dports[i].pi
+ units.append(LDSTCompUnit(pi, rwid, awid=48))
CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
m = CompUnitsBase.elaborate(self, platform)
comb = m.d.comb
- # hand the same operation to all units, 4 lower bits though
- for alu in self.units:
- comb += alu.oper_i[0:4].eq(self.oper_i)
- comb += alu.imm_i.eq(self.imm_i)
- comb += alu.isalu_i.eq(0)
+ # hand the same operation to all units
+ for ldst in self.units:
+ comb += ldst.oper_i.eq(self.op)
return m
# inputs
self.op = CompALUOpSubset("cua_i")
- self.oper_i = Signal(opwid, reset_less=True)
- self.imm_i = Signal(rwid, reset_less=True)
# Int ALUs
alus = []
units = []
for alu in alus:
aluopwid = 3 # extra bit for immediate mode
- units.append(MultiCompUnit(rwid, alu))
+ units.append(MultiCompUnit(rwid, alu, CompALUOpSubset))
CompUnitsBase.__init__(self, rwid, units)
# hand the subset of operation to ALUs
for alu in self.units:
comb += alu.oper_i.eq(self.op)
- #comb += alu.oper_i[0:3].eq(self.oper_i)
- #comb += alu.imm_i.eq(self.imm_i)
return m
self.opwid = opwid
# inputs
+ self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
self.oper_i = Signal(opwid, reset_less=True)
self.imm_i = Signal(rwid, reset_less=True)
# Branch ALU and CU
self.bgt = BranchALU(rwid)
aluopwid = 3 # extra bit for immediate mode
- self.br1 = ComputationUnitNoDelay(rwid, self.bgt)
+ self.br1 = MultiCompUnit(rwid, self.bgt, CompALUOpSubset)
CompUnitsBase.__init__(self, rwid, [self.br1])
def elaborate(self, platform):
# hand the same operation to all units
for alu in self.units:
+ # comb += alu.oper_i.eq(self.op) # TODO
comb += alu.oper_i.eq(self.oper_i)
#comb += alu.imm_i.eq(self.imm_i)
rsel = []
rd = []
for i in range(n_src):
- j = i + 1 # name numbering to match src1/src2
+ j = i + 1 # name numbering to match src1/src2
src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
- rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
+ rsel.append(Signal(n_reg, name="src%d_rsel_o" %
+ j, reset_less=True))
rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
dst = []
dsel = []
wr = []
for i in range(n_dst):
- j = i + 1 # name numbering to match src1/src2
+ j = i + 1 # name numbering to match src1/src2
dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
- dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
+ dsel.append(Signal(n_reg, name="dst%d_rsel_o" %
+ j, reset_less=True))
wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
wpnd = []
pend = []
for i in range(nf):
- j = i + 1 # name numbering to match src1/src2
- pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
- wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
+ j = i + 1 # name numbering to match src1/src2
+ pend.append(Signal(nf, name="rd_src%d_pend_o" %
+ j, reset_less=True))
+ wpnd.append(Signal(nf, name="wr_dst%d_pend_o" %
+ j, reset_less=True))
self.dest_i = Array(dst) # Dest in (top)
self.src_i = Array(src) # oper in (top)
# for Register File Select Lines (horizontal), per-reg
- self.dst_rsel_o = Array(dsel) # dest reg (bot)
+ self.dst_rsel_o = Array(dsel) # dest reg (bot)
self.src_rsel_o = Array(rsel) # src reg (bot)
self.go_rd_i = Array(rd)
# Connect function issue / arrays, and dest/src1/src2
for i in range(self.n_src):
- print (i, self.go_rd_i, intfudeps.go_rd_i)
+ print(i, self.go_rd_i, intfudeps.go_rd_i)
comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
comb += intregdeps.src_i[i].eq(self.src_i[i])
comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
for i in range(self.n_dst):
- print (i, self.go_wr_i, intfudeps.go_wr_i)
+ print(i, self.go_wr_i, intfudeps.go_wr_i)
comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
comb += intregdeps.dest_i[i].eq(self.dest_i[i])
comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
self.fpregs = RegFileArray(rwid, n_regs)
# Memory (test for now)
- self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
+ self.l0 = TstL0CacheBuffer()
# issue q needs to get at these
self.aluissue = IssueUnitGroup(2)
self.lsissue = IssueUnitGroup(2)
self.brissue = IssueUnitGroup(1)
# and these
- self.alu_op = CompALUOpSubset("alu")
+ self.instr = Decode2ToExecute1Type("sc_instr")
self.br_oper_i = Signal(4, reset_less=True)
self.br_imm_i = Signal(rwid, reset_less=True)
self.ls_oper_i = Signal(4, reset_less=True)
- self.ls_imm_i = Signal(rwid, reset_less=True)
# inputs
self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
m.submodules.intregs = self.intregs
m.submodules.fpregs = self.fpregs
- m.submodules.mem = mem = self.mem
+ m.submodules.l0 = l0 = self.l0
# register ports
int_dest = self.intregs.write_port("dest")
# LDST Comp Units
n_ldsts = 2
- cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
+ cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0)
# Comp Units
m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
]
# take these to outside (issue needs them)
- comb += cua.op.eq(self.alu_op)
+ comb += cua.op.eq_from_execute1(self.instr)
comb += cub.oper_i.eq(self.br_oper_i)
comb += cub.imm_i.eq(self.br_imm_i)
- comb += cul.oper_i.eq(self.ls_oper_i)
- comb += cul.imm_i.eq(self.ls_imm_i)
+ comb += cul.op.eq_from_execute1(self.instr)
# TODO: issueunit.f (FP)
# Memory Function Unit
# ---------
reset_b = Signal(cul.n_units, reset_less=True)
- sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
+ # XXX was cul.go_wr_i not done.o
+ # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
+ sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
# TODO: adr_rel_o needs to go into L1 Cache. for now,
# just immediately activate go_adr
- comb += cul.go_ad_i.eq(cul.adr_rel_o)
+ sync += cul.go_ad_i.eq(cul.adr_rel_o)
# connect up address data
comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
int_wr_o = intfus.writable_o
for i in range(fu_n_dst):
- comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[i][0:n_intfus])
+ # XXX FIXME: rqrl_o[i] here
+ comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
# ---------
comb += sc.int_src1_i.eq(src1)
comb += sc.int_src2_i.eq(src2)
comb += sc.reg_enable_i.eq(1) # enable the regfile
+ comb += sc.instr.eq(instr)
# choose a Function-Unit-Group
with m.If(fu == Function.ALU): # alu
- comb += sc.alu_op.eq_from_execute1(instr)
- comb += sc.aluissue.insn_i.eq(1)
+ comb += sc.aluissue.insn_i.eq(1) # enable alu issue
comb += wait_issue_alu.eq(1)
+ with m.Elif(fu == Function.LDST): # ld/st
+ comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
+ comb += wait_issue_ls.eq(1)
+
with m.Elif((op & (0x3 << 2)) != 0): # branch
comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
comb += sc.br_imm_i.eq(imm)
comb += sc.brissue.insn_i.eq(1)
comb += wait_issue_br.eq(1)
- with m.Elif((op & (0x3 << 4)) != 0): # ld/st
- # see compldst.py
- # bit 0: ADD/SUB
- # bit 1: immed
- # bit 4: LD
- # bit 5: ST
- comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
- comb += sc.ls_imm_i.eq(imm)
- comb += sc.lsissue.insn_i.eq(1)
- comb += wait_issue_ls.eq(1)
-
# XXX TODO
# these indicate that the instruction is to be made
# shadow-dependent on
def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
branch_success, branch_fail):
instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
- 'imm_data': (imm, op_imm),
+ 'imm_data': (imm, op_imm),
'read_reg1': src1, 'read_reg2': src2}]
sendlen = 1
yield dut.data_i[idx].insn_type.eq(insn_type)
yield dut.data_i[idx].fn_unit.eq(fn_unit)
yield dut.data_i[idx].read_reg1.data.eq(reg1)
- yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
+ yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
yield dut.data_i[idx].read_reg2.data.eq(reg2)
- yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
+ yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
yield dut.data_i[idx].write_reg.data.eq(dest)
- yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
+ yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
yield dut.data_i[idx].imm_data.data.eq(imm)
yield dut.data_i[idx].imm_data.ok.eq(op_imm)
di = yield dut.data_i[idx]
for i in range(1, dut.n_regs):
#val = randint(0, (1<<alusim.rwidth)-1)
#val = 31+i*3
- val = i # XXX actually, not random at all
+ val = i # XXX actually, not random at all
yield dut.intregs.regs[i].reg.eq(val)
alusim.setval(i, val)
# create some instructions
- lst = [#"addi 3, 0, 0x1234",
- #"addi 2, 0, 0x4321",
- "add 1, 3, 2"]
+ lst = []
+ if False:
+ lst += ["addi 2, 0, 0x4321",
+ "addi 3, 0, 0x1234",
+ "add 1, 3, 2",
+ "add 4, 3, 5"
+ ]
+ if True:
+ lst += ["lbzu 6, 7(2)",
+
+ ]
+
with Program(lst) as program:
gen = program.generate_instructions()
# issue instruction(s), wait for issue to be free before proceeding
for ins, code in zip(gen, program.assembly.splitlines()):
yield instruction.eq(ins) # raw binary instr.
- yield #Delay(1e-6)
+ yield # Delay(1e-6)
print("binary 0x{:X}".format(ins & 0xffffffff))
print("assembly", code)
instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
if True:
- instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
+ instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
0, 0, (0, 0)))
- instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
+ instrs.append((5, 3, 3, MicrOp.OP_ADD, Function.ALU,
0, 0, (0, 0)))
if False:
- instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
+ instrs.append((3, 5, 5, MicrOp.OP_MUL_L64, Function.ALU,
1, 7, (0, 0)))
if False:
- instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
+ instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
0, 0, (0, 0)))
if False:
# issue instruction(s), wait for issue to be free before proceeding
for i, instr in enumerate(instrs):
- print (i, instr)
+ print(i, instr)
src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
vcd_name='test_powerboard6600.vcd')
- #run_simulation(dut, scoreboard_sim(dut, alusim),
+ # run_simulation(dut, scoreboard_sim(dut, alusim),
# vcd_name='test_scoreboard6600.vcd')
# run_simulation(dut, scoreboard_branch_sim(dut, alusim),