from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
from regfile.regfile import RegFileArray, treereduce
-from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit
from scoreboard.fu_fu_matrix import FUFUDepMatrix
from scoreboard.fu_reg_matrix import FURegDepMatrix
from scoreboard.global_pending import GlobalPending
from scoreboard.group_picker import GroupPicker
from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
-from scoreboard.shadow import ShadowMatrix, WaWGrid
+from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
from compalu import ComputationUnitNoDelay
-from alu_hier import ALU
+from alu_hier import ALU, BranchALU
from nmutil.latch import SRLatch
from random import randint
+
class CompUnits(Elaboratable):
def __init__(self, rwid, n_units):
* :rwid: bit width of register file(s) - both FP and INT
* :n_units: number of ALUs
+
+ Note: bgt unit is returned so that a shadow unit can be created
+ for it
+
"""
self.n_units = n_units
self.rwid = rwid
+ # inputs
self.issue_i = Signal(n_units, reset_less=True)
self.go_rd_i = Signal(n_units, reset_less=True)
self.go_wr_i = Signal(n_units, reset_less=True)
self.shadown_i = Signal(n_units, reset_less=True)
self.go_die_i = Signal(n_units, reset_less=True)
+
+ # outputs
self.busy_o = Signal(n_units, reset_less=True)
self.rd_rel_o = Signal(n_units, reset_less=True)
self.req_rel_o = Signal(n_units, reset_less=True)
+ # in/out register data (note: not register#, actual data)
self.dest_o = Signal(rwid, reset_less=True)
self.src1_data_i = Signal(rwid, reset_less=True)
self.src2_data_i = Signal(rwid, reset_less=True)
+ # Branch ALU and CU
+ self.bgt = BranchALU(self.rwid)
+ self.br1 = ComputationUnitNoDelay(self.rwid, 2, self.bgt)
+
def elaborate(self, platform):
m = Module()
sub = ALU(self.rwid)
mul = ALU(self.rwid)
shf = ALU(self.rwid)
+ bgt = self.bgt
+
m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 2, add)
m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 2, sub)
m.submodules.comp3 = comp3 = ComputationUnitNoDelay(self.rwid, 2, mul)
m.submodules.comp4 = comp4 = ComputationUnitNoDelay(self.rwid, 2, shf)
- int_alus = [comp1, comp2, comp3, comp4]
+ m.submodules.br1 = br1 = self.br1
+ int_alus = [comp1, comp2, comp3, comp4, br1]
m.d.comb += comp1.oper_i.eq(Const(0, 2)) # op=add
m.d.comb += comp2.oper_i.eq(Const(1, 2)) # op=sub
m.d.comb += comp3.oper_i.eq(Const(2, 2)) # op=mul
m.d.comb += comp4.oper_i.eq(Const(3, 2)) # op=shf
+ m.d.comb += br1.oper_i.eq(Const(0, 2)) # op=bgt
go_rd_l = []
go_wr_l = []
self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
self.reg_enable_i = Signal(reset_less=True) # enable reg decode
+ # outputs
self.issue_o = Signal(reset_less=True) # instruction was accepted
self.busy_o = Signal(reset_less=True) # at least one CU is busy
+ # for branch speculation experiment. branch_direction = 0 if
+ # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
+ # branch_succ and branch_fail are requests to have the current
+ # instruction be dependent on the branch unit "shadow" capability.
+ self.branch_succ_i = Signal(reset_less=True)
+ self.branch_fail_i = Signal(reset_less=True)
+ self.branch_direction_o = Signal(2, reset_less=True)
+
def elaborate(self, platform):
m = Module()
m.submodules.intregs = self.intregs
m.submodules.fpregs = self.fpregs
+ # dummy values
+ m.d.sync += self.branch_succ_i.eq(Const(0))
+ m.d.sync += self.branch_fail_i.eq(Const(0))
+ m.d.sync += self.branch_direction_o.eq(Const(0))
+
# register ports
int_dest = self.intregs.write_port("dest")
int_src1 = self.intregs.read_port("src1")
fp_src2 = self.fpregs.read_port("src2")
# Int ALUs and Comp Units
- n_int_alus = 4
+ n_int_alus = 5
m.submodules.cu = cu = CompUnits(self.rwid, n_int_alus)
m.d.comb += cu.go_die_i.eq(0)
+ bgt = cu.bgt # get at the branch computation unit
# Int FUs
m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
m.submodules.issueunit = issueunit
# Shadow Matrix. currently n_int_fus shadows, to be used for
- # write-after-write hazards
- m.submodules.shadows = shadows = ShadowMatrix(n_int_fus, n_int_fus)
+ # write-after-write hazards. NOTE: there is one extra for branches,
+ # so the shadow width is increased by 1
+ m.submodules.shadows = shadows = ShadowMatrix(n_int_fus, n_int_fus+1)
+
# combined go_rd/wr + go_die (go_die used to reset latches)
go_rd_rst = Signal(n_int_fus, reset_less=True)
go_wr_rst = Signal(n_int_fus, reset_less=True)
fn_issue_prev = Signal(n_int_fus)
prev_shadow = Signal(n_int_fus)
+ # Branch Speculation recorder. tracks the success/fail state as
+ # each instruction is issued, so that when the branch occurs the
+ # allow/cancel can be issued as appropriate.
+ m.submodules.specrec = bspec = BranchSpeculationRecord(n_int_fus)
+
#---------
# ok start wiring things together...
# "now hear de word of de looord... dem bones dem bones dem dryy bones"
m.d.comb += go_rd_rst.eq(go_rd_o | shadows.go_die_o)
m.d.comb += go_wr_rst.eq(go_wr_o | shadows.go_die_o)
+ #---------
+ # NOTE; this setup is for the instruction order preservation...
+
# connect shadows / go_dies to Computation Units
m.d.comb += cu.shadown_i[0:n_int_fus].eq(shadows.shadown_o[0:n_int_fus])
m.d.comb += cu.go_die_i[0:n_int_fus].eq(shadows.go_die_o[0:n_int_fus])
# if the previous is completed (!busy) don't cast the shadow!
m.d.comb += prev_shadow.eq(~fn_issue_o & fn_issue_prev & cu.busy_o)
for i in range(n_int_fus):
- m.d.comb += shadows.shadow_i[i].eq(prev_shadow)
+ m.d.comb += shadows.shadow_i[i][0:n_int_fus].eq(prev_shadow)
+
+ #---------
+ # ... and this is for branch speculation. it uses the extra bit
+ # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
+ # only needs to set shadow_i, s_fail_i and s_good_i
+
+ m.d.comb += shadows.s_good_i[n_int_fus].eq(bspec.good_o[i])
+ m.d.comb += shadows.s_fail_i[n_int_fus].eq(bspec.fail_o[i])
+
+ with m.If(self.branch_succ_i | self.branch_fail_i):
+ for i in range(n_int_fus):
+ m.d.comb += shadows.shadow_i[i][n_int_fus].eq(1)
+
+ # finally, we need an indicator to the test infrastructure as to
+ # whether the branch succeeded or failed, plus, link up to the
+ # "recorder" of whether the instruction was under shadow or not
+
+ m.d.comb += bspec.issue_i.eq(fn_issue_o)
+ m.d.comb += bspec.good_i.eq(self.branch_succ_i)
+ m.d.comb += bspec.fail_i.eq(self.branch_fail_i)
+ # branch is active (TODO: a better signal: this is over-using the
+ # go_write signal - actually the branch should not be "writing")
+ with m.If(cu.br1.go_wr_i):
+ m.d.sync += self.branch_direction_o.eq(cu.br1.data_o+Const(1, 2))
+ m.d.comb += bspec.branch_i.eq(1)
#---------
# Connect Register File(s)
yield self.int_src1_i
yield self.int_src2_i
yield self.issue_o
- #yield from self.int_src1
- #yield from self.int_dest
- #yield from self.int_src1
- #yield from self.int_src2
- #yield from self.fp_dest
- #yield from self.fp_src1
- #yield from self.fp_src2
+ yield self.branch_succ_i
+ yield self.branch_fail_i
+ yield self.branch_direction_o
def ports(self):
return list(self)
ISUB = 1
IMUL = 2
ISHF = 3
+IBGT = 4
+IBLT = 5
+IBEQ = 6
+IBNE = 7
class RegSim:
def __init__(self, rwidth, nregs):
def op(self, op, src1, src2, dest):
maxbits = (1 << self.rwidth) - 1
- src1 = self.regs[src1]
- src2 = self.regs[src2]
+ src1 = self.regs[src1] & maxbits
+ src2 = self.regs[src2] & maxbits
if op == IADD:
val = src1 + src2
elif op == ISUB:
val = src1 * src2
elif op == ISHF:
val = src1 >> (src2 & maxbits)
+ elif op == IBGT:
+ val = int(src1 > src2)
+ elif op == IBLT:
+ val = int(src1 < src2)
+ elif op == IBEQ:
+ val = int(src1 == src2)
+ elif op == IBNE:
+ val = int(src1 != src2)
val &= maxbits
self.regs[dest] = val
yield from self.dump(dut)
assert False
-def int_instr(dut, alusim, op, src1, src2, dest):
+def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
for i in range(len(dut.int_insn_i)):
yield dut.int_insn_i[i].eq(0)
yield dut.int_dest_i.eq(dest)
yield dut.int_src2_i.eq(src2)
yield dut.int_insn_i[op].eq(1)
yield dut.reg_enable_i.eq(1)
- alusim.op(op, src1, src2, dest)
+
+ # these indicate that the instruction is to be made shadow-dependent on
+ # (either) branch success or branch fail
+ yield dut.branch_fail_i.eq(branch_fail)
+ yield dut.branch_succ_i.eq(branch_success)
def print_reg(dut, rnums):
print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
-def scoreboard_sim(dut, alusim):
+def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
+ insts = []
+ for i in range(n_ops):
+ src1 = randint(1, dut.n_regs-1)
+ src2 = randint(1, dut.n_regs-1)
+ dest = randint(1, dut.n_regs-1)
+ op = randint(0, max_opnums)
+
+ if shadowing:
+ insts.append((src1, src2, dest, op, (False, False)))
+ else:
+ insts.append((src1, src2, dest, op))
+ return insts
+
+
+def wait_for_busy_clear(dut):
+ while True:
+ busy_o = yield dut.busy_o
+ if not busy_o:
+ break
+ print ("busy",)
+ yield
+
+
+def wait_for_issue(dut):
+ while True:
+ issue_o = yield dut.issue_o
+ if issue_o:
+ for i in range(len(dut.int_insn_i)):
+ yield dut.int_insn_i[i].eq(0)
+ yield dut.reg_enable_i.eq(0)
+ break
+ #print ("busy",)
+ #yield from print_reg(dut, [1,2,3])
+ yield
+ #yield from print_reg(dut, [1,2,3])
+
+def scoreboard_branch_sim(dut, alusim):
yield dut.int_store_i.eq(1)
for i in range(2):
+ # set random values in the registers
+ for i in range(1, dut.n_regs):
+ val = 31+i*3
+ val = randint(0, (1<<alusim.rwidth)-1)
+ yield dut.intregs.regs[i].reg.eq(val)
+ alusim.setval(i, val)
+
+ # create some instructions: branches create a tree
+ insts = create_random_ops(dut, 5)
+
+ src1 = randint(1, dut.n_regs-1)
+ src2 = randint(1, dut.n_regs-1)
+ op = randint(4, 7)
+
+ branch_ok = create_random_ops(dut, 5)
+ branch_fail = create_random_ops(dut, 5)
+
+ insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
+
+ # issue instruction(s)
+ i = -1
+ instrs = insts
+ branch_direction = 0
+ while instrs:
+ i += 1
+ (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop()
+ if branch_direction == 1 and shadow_off:
+ continue # branch was "success" and this is a "failed"... skip
+ if branch_direction == 2 and shadow_on:
+ continue # branch was "fail" and this is a "success"... skip
+ is_branch = op >= 4
+ if is_branch:
+ branch_ok, branch_fail = dest
+ dest = None
+ # ok zip up the branch success / fail instructions and
+ # drop them into the queue, one marked "to have branch success"
+ # the other to be marked shadow branch "fail".
+ # one out of each of these will be cancelled
+ for ok, fl in zip(branch_ok, branch_fail):
+ instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
+ instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
+ print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
+ yield from int_instr(dut, op, src1, src2, dest,
+ shadow_on, shadow_off)
+ yield
+ yield from wait_for_issue(dut)
+ branch_direction = dut.branch_direction_o # which way branch went
+
+ # wait for all instructions to stop before checking
+ yield
+ yield from wait_for_busy_clear(dut)
+
+ for (src1, src2, dest, op, (shadow_on, shadow_off)) in insts:
+ is_branch = op >= 4
+ if is_branch:
+ branch_ok, branch_fail = dest
+ dest = None
+ branch_res = alusim.op(op, src1, src2, dest)
+ if is_branch:
+ if branch_res:
+ insts.append(branch_ok)
+ else:
+ insts.append(branch_fail)
+
+ # check status
+ yield from alusim.check(dut)
+ yield from alusim.dump(dut)
+
+
+def scoreboard_sim(dut, alusim):
+
+ yield dut.int_store_i.eq(1)
+
+ for i in range(20):
+
# set random values in the registers
for i in range(1, dut.n_regs):
val = 31+i*3
# create some instructions (some random, some regression tests)
instrs = []
if True:
- for i in range(10):
- src1 = randint(1, dut.n_regs-1)
- src2 = randint(1, dut.n_regs-1)
- while True:
- dest = randint(1, dut.n_regs-1)
- break
- if dest not in [src1, src2]:
- break
- #src1 = 2
- #src2 = 3
- #dest = 2
-
- op = randint(0, 3)
- #op = i % 2
- #op = 0
-
- instrs.append((src1, src2, dest, op))
+ instrs = create_random_ops(dut, 10, False, 4)
if False:
instrs.append((2, 3, 3, 0))
for i, (src1, src2, dest, op) in enumerate(instrs):
print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
- yield from int_instr(dut, alusim, op, src1, src2, dest)
+ alusim.op(op, src1, src2, dest)
+ yield from int_instr(dut, op, src1, src2, dest, 0, 0)
yield
- while True:
- issue_o = yield dut.issue_o
- if issue_o:
- for i in range(len(dut.int_insn_i)):
- yield dut.int_insn_i[i].eq(0)
- yield dut.reg_enable_i.eq(0)
- break
- #print ("busy",)
- #yield from print_reg(dut, [1,2,3])
- yield
- #yield from print_reg(dut, [1,2,3])
+ yield from wait_for_issue(dut)
# wait for all instructions to stop before checking
yield
- while True:
- busy_o = yield dut.busy_o
- if not busy_o:
- break
- print ("busy",)
- yield
+ yield from wait_for_busy_clear(dut)
# check status
yield from alusim.check(dut)
yield from alusim.dump(dut)
-def explore_groups(dut):
- from nmigen.hdl.ir import Fragment
- from nmigen.hdl.xfrm import LHSGroupAnalyzer
-
- fragment = dut.elaborate(platform=None)
- fr = Fragment.get(fragment, platform=None)
-
- groups = LHSGroupAnalyzer()(fragment._statements)
-
- print (groups)
-
-
def test_scoreboard():
dut = Scoreboard(16, 8)
alusim = RegSim(16, 8)