from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
from regfile.regfile import RegFileArray, treereduce
-from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit
from scoreboard.fu_fu_matrix import FUFUDepMatrix
from scoreboard.fu_reg_matrix import FURegDepMatrix
from scoreboard.global_pending import GlobalPending
from scoreboard.group_picker import GroupPicker
from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
-from scoreboard.shadow import ShadowMatrix, WaWGrid
+from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
from compalu import ComputationUnitNoDelay
* :rwid: bit width of register file(s) - both FP and INT
* :n_units: number of ALUs
+
+ Note: bgt unit is returned so that a shadow unit can be created
+ for it
+
"""
self.n_units = n_units
self.rwid = rwid
+ # inputs
self.issue_i = Signal(n_units, reset_less=True)
self.go_rd_i = Signal(n_units, reset_less=True)
self.go_wr_i = Signal(n_units, reset_less=True)
self.shadown_i = Signal(n_units, reset_less=True)
self.go_die_i = Signal(n_units, reset_less=True)
+
+ # outputs
self.busy_o = Signal(n_units, reset_less=True)
self.rd_rel_o = Signal(n_units, reset_less=True)
self.req_rel_o = Signal(n_units, reset_less=True)
+ # in/out register data (note: not register#, actual data)
self.dest_o = Signal(rwid, reset_less=True)
self.src1_data_i = Signal(rwid, reset_less=True)
self.src2_data_i = Signal(rwid, reset_less=True)
+ # Branch ALU and CU
+ self.bgt = BranchALU(self.rwid)
+ self.br1 = ComputationUnitNoDelay(self.rwid, 2, self.bgt)
+
def elaborate(self, platform):
m = Module()
sub = ALU(self.rwid)
mul = ALU(self.rwid)
shf = ALU(self.rwid)
- # Branch ALU
- bgt = BranchALU(self.rwid)
+ bgt = self.bgt
m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 2, add)
m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 2, sub)
m.submodules.comp3 = comp3 = ComputationUnitNoDelay(self.rwid, 2, mul)
m.submodules.comp4 = comp4 = ComputationUnitNoDelay(self.rwid, 2, shf)
- m.submodules.br1 = br1 = ComputationUnitNoDelay(self.rwid, 2, bgt)
+ m.submodules.br1 = br1 = self.br1
int_alus = [comp1, comp2, comp3, comp4, br1]
m.d.comb += comp1.oper_i.eq(Const(0, 2)) # op=add
n_int_alus = 5
m.submodules.cu = cu = CompUnits(self.rwid, n_int_alus)
m.d.comb += cu.go_die_i.eq(0)
+ bgt = cu.bgt # get at the branch computation unit
# Int FUs
m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
m.submodules.issueunit = issueunit
# Shadow Matrix. currently n_int_fus shadows, to be used for
- # write-after-write hazards
- m.submodules.shadows = shadows = ShadowMatrix(n_int_fus, n_int_fus)
+ # write-after-write hazards. NOTE: there is one extra for branches,
+ # so the shadow width is increased by 1
+ m.submodules.shadows = shadows = ShadowMatrix(n_int_fus, n_int_fus+1)
+
# combined go_rd/wr + go_die (go_die used to reset latches)
go_rd_rst = Signal(n_int_fus, reset_less=True)
go_wr_rst = Signal(n_int_fus, reset_less=True)
fn_issue_prev = Signal(n_int_fus)
prev_shadow = Signal(n_int_fus)
+ # Branch Speculation recorder. tracks the success/fail state as
+ # each instruction is issued, so that when the branch occurs the
+ # allow/cancel can be issued as appropriate.
+ m.submodules.specrec = bspec = BranchSpeculationRecord(n_int_fus)
+
#---------
# ok start wiring things together...
# "now hear de word of de looord... dem bones dem bones dem dryy bones"
m.d.comb += go_rd_rst.eq(go_rd_o | shadows.go_die_o)
m.d.comb += go_wr_rst.eq(go_wr_o | shadows.go_die_o)
+ #---------
+ # NOTE; this setup is for the instruction order preservation...
+
# connect shadows / go_dies to Computation Units
m.d.comb += cu.shadown_i[0:n_int_fus].eq(shadows.shadown_o[0:n_int_fus])
m.d.comb += cu.go_die_i[0:n_int_fus].eq(shadows.go_die_o[0:n_int_fus])
# if the previous is completed (!busy) don't cast the shadow!
m.d.comb += prev_shadow.eq(~fn_issue_o & fn_issue_prev & cu.busy_o)
for i in range(n_int_fus):
- m.d.comb += shadows.shadow_i[i].eq(prev_shadow)
+ m.d.comb += shadows.shadow_i[i][0:n_int_fus].eq(prev_shadow)
+
+ #---------
+ # ... and this is for branch speculation. it uses the extra bit
+ # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
+ # only needs to set shadow_i, s_fail_i and s_good_i
+
+ m.d.comb += shadows.s_good_i[n_int_fus].eq(bspec.good_o[i])
+ m.d.comb += shadows.s_fail_i[n_int_fus].eq(bspec.fail_o[i])
+
+ with m.If(self.branch_succ_i | self.branch_fail_i):
+ for i in range(n_int_fus):
+ m.d.comb += shadows.shadow_i[i][n_int_fus].eq(1)
+
+ # finally, we need an indicator to the test infrastructure as to
+ # whether the branch succeeded or failed, plus, link up to the
+ # "recorder" of whether the instruction was under shadow or not
+
+ m.d.comb += bspec.issue_i.eq(fn_issue_o)
+ m.d.comb += bspec.good_i.eq(self.branch_succ_i)
+ m.d.comb += bspec.fail_i.eq(self.branch_fail_i)
+ # branch is active (TODO: a better signal: this is over-using the
+ # go_write signal - actually the branch should not be "writing")
+ with m.If(cu.br1.go_wr_i):
+ m.d.sync += self.branch_direction_o.eq(cu.br1.data_o+Const(1, 2))
+ m.d.comb += bspec.branch_i.eq(1)
#---------
# Connect Register File(s)
print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
-def create_random_ops(n_ops, shadowing=False):
+def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
insts = []
for i in range(n_ops):
src1 = randint(1, dut.n_regs-1)
src2 = randint(1, dut.n_regs-1)
dest = randint(1, dut.n_regs-1)
- op = randint(0, 3)
+ op = randint(0, max_opnums)
if shadowing:
- instrs.append((src1, src2, dest, op, (False, False)))
+ insts.append((src1, src2, dest, op, (False, False)))
else:
- instrs.append((src1, src2, dest, op))
+ insts.append((src1, src2, dest, op))
return insts
alusim.setval(i, val)
# create some instructions: branches create a tree
- insts = create_random_ops(5)
+ insts = create_random_ops(dut, 5)
src1 = randint(1, dut.n_regs-1)
src2 = randint(1, dut.n_regs-1)
op = randint(4, 7)
- branch_ok = create_random_ops(5)
- branch_fail = create_random_ops(5)
+ branch_ok = create_random_ops(dut, 5)
+ branch_fail = create_random_ops(dut, 5)
insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
# create some instructions (some random, some regression tests)
instrs = []
if True:
- for i in range(10):
- src1 = randint(1, dut.n_regs-1)
- src2 = randint(1, dut.n_regs-1)
- while True:
- dest = randint(1, dut.n_regs-1)
- break
- if dest not in [src1, src2]:
- break
- #src1 = 2
- #src2 = 3
- #dest = 2
-
- op = randint(0, 4)
- #op = i % 2
- #op = 0
-
- instrs.append((src1, src2, dest, op))
+ instrs = create_random_ops(dut, 10, False, 4)
if False:
instrs.append((2, 3, 3, 0))