from random import randint
+class CompUnits(Elaboratable):
+
+ def __init__(self, rwid, n_units):
+ """ Inputs:
+
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :n_units: number of ALUs
+ """
+ self.n_units = n_units
+ self.rwid = rwid
+
+ self.issue_i = Signal(n_units, reset_less=True)
+ self.go_rd_i = Signal(n_units, reset_less=True)
+ self.go_wr_i = Signal(n_units, reset_less=True)
+ self.busy_o = Signal(n_units, reset_less=True)
+ self.req_rel_o = Signal(n_units, reset_less=True)
+
+ self.dest_o = Signal(rwid, reset_less=True)
+ self.src1_data_i = Signal(rwid, reset_less=True)
+ self.src2_data_i = Signal(rwid, reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ # Int ALUs
+ add = ALU(self.rwid)
+ sub = ALU(self.rwid)
+ m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 1, add)
+ m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 1, sub)
+ int_alus = [comp1, comp2]
+
+ m.d.comb += comp1.oper_i.eq(Const(0)) # temporary/experiment: op=add
+ m.d.comb += comp2.oper_i.eq(Const(1)) # temporary/experiment: op=sub
+
+ go_rd_l = []
+ go_wr_l = []
+ issue_l = []
+ busy_l = []
+ req_rel_l = []
+ for alu in int_alus:
+ req_rel_l.append(alu.req_rel_o)
+ go_wr_l.append(alu.go_wr_i)
+ go_rd_l.append(alu.go_rd_i)
+ issue_l.append(alu.issue_i)
+ busy_l.append(alu.busy_o)
+ m.d.comb += self.req_rel_o.eq(Cat(*req_rel_l))
+ m.d.comb += self.busy_o.eq(Cat(*busy_l))
+ m.d.comb += Cat(*go_wr_l).eq(self.go_wr_i)
+ m.d.comb += Cat(*go_rd_l).eq(self.go_rd_i)
+ m.d.comb += Cat(*issue_l).eq(self.issue_i)
+
+ # connect data register input/output
+
+ # merge (OR) all integer FU / ALU outputs to a single value
+ # bit of a hack: treereduce needs a list with an item named "dest_o"
+ dest_o = treereduce(int_alus)
+ m.d.comb += self.dest_o.eq(dest_o)
+
+ for i, alu in enumerate(int_alus):
+ m.d.comb += alu.src1_i.eq(self.src1_data_i)
+ m.d.comb += alu.src2_i.eq(self.src2_data_i)
+
+ return m
+
+
+class FunctionUnits(Elaboratable):
+
+ def __init__(self, n_regs, n_int_alus):
+ self.n_regs = n_regs
+ self.n_int_alus = n_int_alus
+
+ self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
+ self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
+ self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
+
+ self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
+ self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
+ self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
+
+ self.req_rel_i = Signal(n_int_alus, reset_less = True)
+ self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
+ self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
+ self.readable_o = Signal(n_int_alus, reset_less=True)
+ self.writable_o = Signal(n_int_alus, reset_less=True)
+
+ self.go_rd_i = Signal(n_int_alus, reset_less=True)
+ self.go_wr_i = Signal(n_int_alus, reset_less=True)
+ self.req_rel_o = Signal(n_int_alus, reset_less=True)
+ self.fn_issue_i = Signal(n_int_alus, reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ n_int_fus = self.n_int_alus
+
+ # Integer FU-FU Dep Matrix
+ intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
+ m.submodules.intfudeps = intfudeps
+ # Integer FU-Reg Dep Matrix
+ intregdeps = FURegDepMatrix(n_int_fus, self.n_regs)
+ m.submodules.intregdeps = intregdeps
+
+ m.d.comb += self.g_int_rd_pend_o.eq(intregdeps.rd_pend_o)
+ m.d.comb += self.g_int_wr_pend_o.eq(intregdeps.wr_pend_o)
+
+ m.d.comb += intfudeps.rd_pend_i.eq(self.g_int_rd_pend_o)
+ m.d.comb += intfudeps.wr_pend_i.eq(self.g_int_wr_pend_o)
+
+ m.d.sync += intfudeps.issue_i.eq(self.fn_issue_i)
+ m.d.sync += intfudeps.go_rd_i.eq(self.go_rd_i)
+ m.d.sync += intfudeps.go_wr_i.eq(self.go_wr_i)
+ m.d.comb += self.readable_o.eq(intfudeps.readable_o)
+ m.d.comb += self.writable_o.eq(intfudeps.writable_o)
+
+ # Connect function issue / arrays, and dest/src1/src2
+ m.d.comb += intregdeps.dest_i.eq(self.dest_i)
+ m.d.comb += intregdeps.src1_i.eq(self.src1_i)
+ m.d.comb += intregdeps.src2_i.eq(self.src2_i)
+
+ m.d.comb += intregdeps.go_rd_i.eq(self.go_rd_i)
+ m.d.comb += intregdeps.go_wr_i.eq(self.go_wr_i)
+ m.d.comb += intregdeps.issue_i.eq(self.fn_issue_i)
+
+ m.d.comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
+ m.d.comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
+ m.d.comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
+
+ return m
+
class Scoreboard(Elaboratable):
def __init__(self, rwid, n_regs):
fp_src1 = self.fpregs.read_port("src1")
fp_src2 = self.fpregs.read_port("src2")
- # Int ALUs
- add = ALU(self.rwid)
- sub = ALU(self.rwid)
- m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 1, add)
- m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 1, sub)
- int_alus = [comp1, comp2]
+ # Int ALUs and Comp Units
+ n_int_alus = 2
+ m.submodules.cu = cu = CompUnits(self.rwid, n_int_alus)
- m.d.comb += comp1.oper_i.eq(Const(0)) # temporary/experiment: op=add
- m.d.comb += comp2.oper_i.eq(Const(1)) # temporary/experiment: op=sub
+ # Int FUs
+ m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
# Count of number of FUs
- n_int_fus = len(int_alus)
+ n_int_fus = n_int_alus
n_fp_fus = 0 # for now
- n_fus = n_int_fus + n_fp_fus # plus FP FUs
-
- # Integer FU-FU Dep Matrix
- intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
- m.submodules.intfudeps = intfudeps
- # Integer FU-Reg Dep Matrix
- intregdeps = FURegDepMatrix(self.n_regs, n_int_fus)
- m.submodules.intregdeps = intregdeps
-
# Integer Priority Picker 1: Adder + Subtractor
intpick1 = GroupPicker(2) # picks between add and sub
m.submodules.intpick1 = intpick1
- # Global Pending Vectors (INT and TODO FP)
- g_int_src1_pend_v = intregdeps.rd_src2_pend_o
- g_int_src2_pend_v = intregdeps.rd_src1_pend_o
- g_int_rd_pend_v = intregdeps.rd_pend_o
- g_int_wr_pend_v = intregdeps.wr_pend_o
-
# INT/FP Issue Unit
regdecode = RegDecode(self.n_regs)
m.submodules.regdecode = regdecode
self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
# connect global rd/wr pending vectors
- m.d.comb += issueunit.i.g_wr_pend_i.eq(g_int_wr_pend_v)
+ m.d.comb += issueunit.i.g_wr_pend_i.eq(intfus.g_int_wr_pend_o)
# TODO: issueunit.f (FP)
# and int function issue / busy arrays, and dest/src1/src2
- fn_busy_l = []
- fn_issue_l = []
- for i, alu in enumerate(int_alus):
- fn_busy_l.append(alu.busy_o)
- fn_issue_l.append(issueunit.i.fn_issue_o[i])
-
- m.d.comb += alu.issue_i.eq(fn_issue_l[i])
- # XXX sync, so as to stop a simulation infinite loop
- m.d.comb += issueunit.i.busy_i[i].eq(alu.busy_o)
- #m.d.comb += alu.dest_i.eq(issueunit.i.dest_i)
- #m.d.comb += alu.src1_i.eq(issueunit.i.src1_i)
- #m.d.comb += alu.src2_i.eq(issueunit.i.src2_i)
- # NOTE: req_rel_o connected to picker, below.
-
- fn_issue_o = Signal(len(fn_issue_l), reset_less=True)
- m.d.comb += fn_issue_o.eq(Cat(*fn_issue_l))
- #---------
- # connect fu-fu matrix
- #---------
-
- m.d.comb += intfudeps.rd_pend_i.eq(g_int_rd_pend_v)
- m.d.comb += intfudeps.wr_pend_i.eq(g_int_wr_pend_v)
+ m.d.comb += intfus.dest_i.eq(regdecode.dest_o)
+ m.d.comb += intfus.src1_i.eq(regdecode.src1_o)
+ m.d.comb += intfus.src2_i.eq(regdecode.src2_o)
- # Group Picker... done manually for now. TODO: cat array of pick sigs
- go_rd_i = intfudeps.go_rd_i
- go_wr_i = intfudeps.go_wr_i
- m.d.comb += go_rd_i[0].eq(intpick1.go_rd_o[0]) # add rd
- m.d.comb += go_wr_i[0].eq(intpick1.go_wr_o[0]) # add wr
-
- m.d.comb += go_rd_i[1].eq(intpick1.go_rd_o[1]) # sub rd
- m.d.comb += go_wr_i[1].eq(intpick1.go_wr_o[1]) # sub wr
+ fn_issue_o = issueunit.i.fn_issue_o
- m.d.comb += intfudeps.issue_i.eq(fn_issue_o)
+ m.d.comb += intfus.fn_issue_i.eq(fn_issue_o)
+ # XXX sync, so as to stop a simulation infinite loop
+ m.d.sync += issueunit.i.busy_i.eq(cu.busy_o)
#---------
- # connect fu-dep matrix
+ # connect fu-fu matrix
#---------
- r_go_rd_i = intregdeps.go_rd_i
- r_go_wr_i = intregdeps.go_wr_i
- m.d.comb += r_go_rd_i.eq(go_rd_i)
- m.d.comb += r_go_wr_i.eq(go_wr_i)
- m.d.comb += intregdeps.dest_i.eq(regdecode.dest_o)
- m.d.comb += intregdeps.src1_i.eq(regdecode.src1_o)
- m.d.comb += intregdeps.src2_i.eq(regdecode.src2_o)
- m.d.comb += intregdeps.issue_i.eq(fn_issue_o)
+ # Group Picker... done manually for now. TODO: cat array of pick sigs
+ go_rd_o = intpick1.go_rd_o
+ go_wr_o = intpick1.go_wr_o
+ go_rd_i = intfus.go_rd_i
+ go_wr_i = intfus.go_wr_i
+ m.d.comb += go_rd_i[0:2].eq(go_rd_o[0:2]) # add rd
+ m.d.comb += go_wr_i[0:2].eq(go_wr_o[0:2]) # add wr
# Connect Picker
#---------
- m.d.comb += intpick1.req_rel_i[0].eq(int_alus[0].req_rel_o)
- m.d.comb += intpick1.req_rel_i[1].eq(int_alus[1].req_rel_o)
- int_readable_o = intfudeps.readable_o
- int_writable_o = intfudeps.writable_o
- m.d.comb += intpick1.readable_i[0].eq(int_readable_o[0]) # add rd
- m.d.comb += intpick1.writable_i[0].eq(int_writable_o[0]) # add wr
- m.d.comb += intpick1.readable_i[1].eq(int_readable_o[1]) # sub rd
- m.d.comb += intpick1.writable_i[1].eq(int_writable_o[1]) # sub wr
+ m.d.sync += intpick1.go_rd_i[0:2].eq(~go_rd_i[0:2])
+ m.d.comb += intpick1.req_rel_i[0:2].eq(cu.req_rel_o[0:2])
+ int_readable_o = intfus.readable_o
+ int_writable_o = intfus.writable_o
+ m.d.comb += intpick1.readable_i[0:2].eq(int_readable_o[0:2])
+ m.d.comb += intpick1.writable_i[0:2].eq(int_writable_o[0:2])
#---------
# Connect Register File(s)
#---------
- m.d.sync += int_dest.wen.eq(intregdeps.dest_rsel_o)
- m.d.comb += int_src1.ren.eq(intregdeps.src1_rsel_o)
- m.d.comb += int_src2.ren.eq(intregdeps.src2_rsel_o)
+ print ("intregdeps wen len", len(intfus.dest_rsel_o))
+ m.d.sync += int_dest.wen.eq(intfus.dest_rsel_o)
+ m.d.comb += int_src1.ren.eq(intfus.src1_rsel_o)
+ m.d.comb += int_src2.ren.eq(intfus.src2_rsel_o)
- # merge (OR) all integer FU / ALU outputs to a single value
- # bit of a hack: treereduce needs a list with an item named "dest_o"
- dest_o = treereduce(int_alus)
- m.d.comb += int_dest.data_i.eq(dest_o)
+ # connect ALUs to regfule
+ m.d.comb += int_dest.data_i.eq(cu.dest_o)
+ m.d.comb += cu.src1_data_i.eq(int_src1.data_o)
+ m.d.comb += cu.src2_data_i.eq(int_src2.data_o)
- # connect ALUs
- for i, alu in enumerate(int_alus):
- m.d.comb += alu.go_rd_i.eq(intpick1.go_rd_o[i])
- m.d.comb += alu.go_wr_i.eq(intpick1.go_wr_o[i])
- #m.d.comb += alu.issue_i.eq(fn_issue_l[i])
- #m.d.comb += fn_busy_l[i].eq(alu.busy_o) # XXX ignore, use fnissue
- m.d.comb += alu.src1_i.eq(int_src1.data_o)
- m.d.comb += alu.src2_i.eq(int_src2.data_o)
+ # connect ALU Computation Units
+ m.d.sync += cu.go_rd_i[0:2].eq(go_rd_o[0:2])
+ m.d.sync += cu.go_wr_i[0:2].eq(go_wr_o[0:2])
+ m.d.sync += cu.issue_i[0:2].eq(fn_issue_o[0:2])
return m
yield dut.int_store_i.eq(0)
for i in range(1, dut.n_regs):
- yield dut.intregs.regs[i].reg.eq(i)
- alusim.setval(i, i)
+ yield dut.intregs.regs[i].reg.eq(i*2)
+ alusim.setval(i, i*2)
+
+ yield
+ instrs = []
if False:
- yield from int_instr(dut, alusim, IADD, 4, 3, 5)
- yield from print_reg(dut, [3,4,5])
- yield
- yield from int_instr(dut, alusim, IADD, 5, 2, 5)
- yield from print_reg(dut, [3,4,5])
- yield
- yield from int_instr(dut, alusim, ISUB, 5, 1, 3)
- yield from print_reg(dut, [3,4,5])
- yield
- for i in range(len(dut.int_insn_i)):
- yield dut.int_insn_i[i].eq(0)
- yield from print_reg(dut, [3,4,5])
- yield
- yield from print_reg(dut, [3,4,5])
- yield
- yield from print_reg(dut, [3,4,5])
- yield
+ for i in range(2):
+ src1 = randint(1, dut.n_regs-1)
+ src2 = randint(1, dut.n_regs-1)
+ while True:
+ dest = randint(1, dut.n_regs-1)
+ break
+ if dest not in [src1, src2]:
+ break
+ #src1 = 2
+ #src2 = 3
+ #dest = 2
- yield from alusim.check(dut)
+ op = randint(0, 1)
+ op = i % 2
+ instrs.append((src1, src2, dest, op))
- for i in range(100):
- src1 = randint(1, dut.n_regs-1)
- src2 = randint(1, dut.n_regs-1)
- while True:
- dest = randint(1, dut.n_regs-1)
- break
- if dest not in [src1, src2]:
- break
- #src1 = 7
- #src2 = 7
- dest = src2
+ if False:
+ instrs.append((2, 3, 3, 0))
+ instrs.append((5, 3, 3, 1))
- op = randint(0, 1)
- print ("random %d: %d %d %d %d\n" % (i, op, src1, src2, dest))
+ if True:
+ instrs.append((7, 2, 6, 1))
+ instrs.append((3, 7, 1, 1))
+ instrs.append((2, 2, 3, 1))
+
+ for i, (src1, src2, dest, op) in enumerate(instrs):
+
+ print ("instr %d: %d %d %d %d\n" % (i, op, src1, src2, dest))
yield from int_instr(dut, alusim, op, src1, src2, dest)
yield from print_reg(dut, [3,4,5])
- yield
- yield from print_reg(dut, [3,4,5])
- for i in range(len(dut.int_insn_i)):
- yield dut.int_insn_i[i].eq(0)
- yield
- yield
-
+ while True:
+ yield
+ issue_o = yield dut.issue_o
+ if issue_o:
+ yield from print_reg(dut, [3,4,5])
+ for i in range(len(dut.int_insn_i)):
+ yield dut.int_insn_i[i].eq(0)
+ break
+ print ("busy",)
+ yield from print_reg(dut, [3,4,5])
+ yield
yield
yield from print_reg(dut, [3,4,5])
yield
yield
yield from alusim.check(dut)
+ yield from alusim.dump(dut)
def explore_groups(dut):