From eaacf6aa884dd5b7db2399f3ec36247f963ffb3e Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Tue, 7 May 2019 05:43:49 +0100 Subject: [PATCH] add scoreboard source (moving from ieee754fpu repo) --- scoreboard/dependence_cell.py | 109 ++++++++++++ scoreboard/fn_unit.py | 327 ++++++++++++++++++++++++++++++++++ scoreboard/fu_dep_cell.py | 84 +++++++++ scoreboard/fu_fu_matrix.py | 157 ++++++++++++++++ scoreboard/fu_picker_vec.py | 21 +++ scoreboard/fu_reg_matrix.py | 225 +++++++++++++++++++++++ scoreboard/fu_wr_pending.py | 23 +++ scoreboard/global_pending.py | 93 ++++++++++ scoreboard/group_picker.py | 111 ++++++++++++ scoreboard/issue_unit.py | 143 +++++++++++++++ scoreboard/ldst_dep_cell.py | 95 ++++++++++ scoreboard/ldst_matrix.py | 135 ++++++++++++++ scoreboard/reg_select.py | 23 +++ scoreboard/shadow_fn.py | 79 ++++++++ 14 files changed, 1625 insertions(+) create mode 100644 scoreboard/dependence_cell.py create mode 100644 scoreboard/fn_unit.py create mode 100644 scoreboard/fu_dep_cell.py create mode 100644 scoreboard/fu_fu_matrix.py create mode 100644 scoreboard/fu_picker_vec.py create mode 100644 scoreboard/fu_reg_matrix.py create mode 100644 scoreboard/fu_wr_pending.py create mode 100644 scoreboard/global_pending.py create mode 100644 scoreboard/group_picker.py create mode 100644 scoreboard/issue_unit.py create mode 100644 scoreboard/ldst_dep_cell.py create mode 100644 scoreboard/ldst_matrix.py create mode 100644 scoreboard/reg_select.py create mode 100644 scoreboard/shadow_fn.py diff --git a/scoreboard/dependence_cell.py b/scoreboard/dependence_cell.py new file mode 100644 index 00000000..18e8d755 --- /dev/null +++ b/scoreboard/dependence_cell.py @@ -0,0 +1,109 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable +from nmutil.latch import SRLatch + + +class DependenceCell(Elaboratable): + """ implements 11.4.7 mitch alsup dependence cell, p27 + """ + def __init__(self): + # inputs + self.dest_i = Signal(reset_less=True) # Dest in (top) + self.src1_i = Signal(reset_less=True) # oper1 in (top) + self.src2_i = Signal(reset_less=True) # oper2 in (top) + self.issue_i = Signal(reset_less=True) # Issue in (top) + + self.go_write_i = Signal(reset_less=True) # Go Write in (left) + self.go_read_i = Signal(reset_less=True) # Go Read in (left) + + # for Register File Select Lines (vertical) + self.dest_rsel_o = Signal(reset_less=True) # dest reg sel (bottom) + self.src1_rsel_o = Signal(reset_less=True) # src1 reg sel (bottom) + self.src2_rsel_o = Signal(reset_less=True) # src2 reg sel (bottom) + + # for Function Unit "forward progress" (horizontal) + self.dest_fwd_o = Signal(reset_less=True) # dest FU fw (right) + self.src1_fwd_o = Signal(reset_less=True) # src1 FU fw (right) + self.src2_fwd_o = Signal(reset_less=True) # src2 FU fw (right) + + def elaborate(self, platform): + m = Module() + m.submodules.dest_l = dest_l = SRLatch() + m.submodules.src1_l = src1_l = SRLatch() + m.submodules.src2_l = src2_l = SRLatch() + + # destination latch: reset on go_write HI, set on dest and issue + m.d.comb += dest_l.s.eq(self.issue_i & self.dest_i) + m.d.comb += dest_l.r.eq(self.go_write_i) + + # src1 latch: reset on go_read HI, set on src1_i and issue + m.d.comb += src1_l.s.eq(self.issue_i & self.src1_i) + m.d.comb += src1_l.r.eq(self.go_read_i) + + # src2 latch: reset on go_read HI, set on op2_i and issue + m.d.comb += src2_l.s.eq(self.issue_i & self.src2_i) + m.d.comb += src2_l.r.eq(self.go_read_i) + + # FU "Forward Progress" (read out horizontally) + m.d.comb += self.dest_fwd_o.eq(dest_l.qn & self.dest_i) + m.d.comb += self.src1_fwd_o.eq(src1_l.qn & self.src1_i) + m.d.comb += self.src2_fwd_o.eq(src2_l.qn & self.src2_i) + + # Register File Select (read out vertically) + m.d.comb += self.dest_rsel_o.eq(dest_l.qn & self.go_write_i) + m.d.comb += self.src1_rsel_o.eq(src1_l.qn & self.go_read_i) + m.d.comb += self.src2_rsel_o.eq(src2_l.qn & self.go_read_i) + + return m + + def __iter__(self): + yield self.dest_i + yield self.src1_i + yield self.src2_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.dest_rsel_o + yield self.src1_rsel_o + yield self.src2_rsel_o + yield self.dest_fwd_o + yield self.src1_fwd_o + yield self.src2_fwd_o + + def ports(self): + return list(self) + + +def dcell_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_dcell(): + dut = DependenceCell() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_dcell.il", "w") as f: + f.write(vl) + + run_simulation(dut, dcell_sim(dut), vcd_name='test_dcell.vcd') + +if __name__ == '__main__': + test_dcell() diff --git a/scoreboard/fn_unit.py b/scoreboard/fn_unit.py new file mode 100644 index 00000000..b2ef9468 --- /dev/null +++ b/scoreboard/fn_unit.py @@ -0,0 +1,327 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Array, Const, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + +from shadow_fn import ShadowFn + + +class FnUnit(Elaboratable): + """ implements 11.4.8 function unit, p31 + also implements optional shadowing 11.5.1, p55 + + shadowing can be used for branches as well as exceptions (interrupts), + load/store hold (exceptions again), and vector-element predication + (once the predicate is known, which it may not be at instruction issue) + + Inputs + + * :wid: register file width + * :shadow_wid: number of shadow/fail/good/go_die sets + * :n_dests: number of destination regfile(s) (index: rfile_sel_i) + * :wr_pend: if true, writable observes the g_wr_pend_i vector + otherwise observes g_rd_pend_i + + notes: + + * dest_i / src1_i / src2_i are in *binary*, whereas... + * ...g_rd_pend_i / g_wr_pend_i and rd_pend_o / wr_pend_o are UNARY + * req_rel_i (request release) is the direct equivalent of pipeline + "output valid" (valid_o) + * recover is a local python variable (actually go_die_o) + * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing) + * wr_pend is set False for the majority of uses: however for + use in a STORE Function Unit it is set to True + """ + def __init__(self, wid, shadow_wid=0, n_dests=1, wr_pend=False): + self.reg_width = wid + self.n_dests = n_dests + self.shadow_wid = shadow_wid + self.wr_pend = wr_pend + + # inputs + if n_dests > 1: + self.rfile_sel_i = Signal(max=n_dests, reset_less=True) + else: + self.rfile_sel_i = Const(0) # no selection. gets Array[0] + self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in (top) + self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in (top) + self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in (top) + self.issue_i = Signal(reset_less=True) # Issue in (top) + + self.go_write_i = Signal(reset_less=True) # Go Write in (left) + self.go_read_i = Signal(reset_less=True) # Go Read in (left) + self.req_rel_i = Signal(reset_less=True) # request release (left) + + self.g_xx_pend_i = Array(Signal(wid, reset_less=True, name="g_pend_i") \ + for i in range(n_dests)) # global rd (right) + self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right) + + if shadow_wid: + self.shadow_i = Signal(shadow_wid, reset_less=True) + self.s_fail_i = Signal(shadow_wid, reset_less=True) + self.s_good_i = Signal(shadow_wid, reset_less=True) + self.go_die_o = Signal(reset_less=True) + + # outputs + self.readable_o = Signal(reset_less=True) # Readable out (right) + self.writable_o = Array(Signal(reset_less=True, name="writable_o") \ + for i in range(n_dests)) # writable out (right) + self.busy_o = Signal(reset_less=True) # busy out (left) + + self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right) + self.xx_pend_o = Array(Signal(wid, reset_less=True, name="pend_o") \ + for i in range(n_dests))# wr pending (right) + + def elaborate(self, platform): + m = Module() + m.submodules.rd_l = rd_l = SRLatch(sync=False) + m.submodules.wr_l = wr_l = SRLatch(sync=False) + m.submodules.dest_d = dest_d = Decoder(self.reg_width) + m.submodules.src1_d = src1_d = Decoder(self.reg_width) + m.submodules.src2_d = src2_d = Decoder(self.reg_width) + s_latches = [] + for i in range(self.shadow_wid): + sh = ShadowFn() + setattr(m.submodules, "shadow%d" % i, sh) + s_latches.append(sh) + + # shadow / recover (optional: shadow_wid > 0) + if self.shadow_wid: + recover = self.go_die_o + shadown = Signal(reset_less=True) + i_l = [] + fail_l = [] + good_l = [] + shi_l = [] + sho_l = [] + rec_l = [] + # get list of latch signals. really must be a better way to do this + for l in s_latches: + i_l.append(l.issue_i) + shi_l.append(l.shadow_i) + fail_l.append(l.s_fail_i) + good_l.append(l.s_good_i) + sho_l.append(l.shadow_o) + rec_l.append(l.recover_o) + m.d.comb += Cat(*i_l).eq(self.issue_i) + m.d.comb += Cat(*fail_l).eq(self.s_fail_i) + m.d.comb += Cat(*good_l).eq(self.s_good_i) + m.d.comb += Cat(*shi_l).eq(self.shadow_i) + m.d.comb += shadown.eq(~(Cat(*sho_l).bool())) + m.d.comb += recover.eq(Cat(*rec_l).bool()) + else: + shadown = Const(1) + recover = Const(0) + + # selector + xx_pend_o = self.xx_pend_o[self.rfile_sel_i] + writable_o = self.writable_o[self.rfile_sel_i] + g_pend_i = self.g_xx_pend_i[self.rfile_sel_i] + + for i in range(self.n_dests): + m.d.comb += self.xx_pend_o[i].eq(0) # initialise all array + m.d.comb += self.writable_o[i].eq(0) # to zero + + # go_write latch: reset on go_write HI, set on issue + m.d.comb += wr_l.s.eq(self.issue_i) + m.d.comb += wr_l.r.eq(self.go_write_i | recover) + + # src1 latch: reset on go_read HI, set on issue + m.d.comb += rd_l.s.eq(self.issue_i) + m.d.comb += rd_l.r.eq(self.go_read_i | recover) + + # dest decoder: write-pending out + m.d.comb += dest_d.i.eq(self.dest_i) + m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted + m.d.comb += self.busy_o.eq(wr_l.q) # busy if set + m.d.comb += xx_pend_o.eq(dest_d.o) + + # src1/src2 decoder: read-pending out + m.d.comb += src1_d.i.eq(self.src1_i) + m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted + m.d.comb += src2_d.i.eq(self.src2_i) + m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted + m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o) + + # readable output signal + g_rd = Signal(self.reg_width, reset_less=True) + m.d.comb += g_rd.eq(self.g_wr_pend_i & self.rd_pend_o) + m.d.comb += self.readable_o.eq(g_rd.bool()) + + # writable output signal + g_wr_v = Signal(self.reg_width, reset_less=True) + g_wr = Signal(reset_less=True) + wo = Signal(reset_less=True) + m.d.comb += g_wr_v.eq(g_pend_i & xx_pend_o) + m.d.comb += g_wr.eq(~g_wr_v.bool()) + m.d.comb += wo.eq(g_wr & rd_l.q & self.req_rel_i & shadown) + m.d.comb += writable_o.eq(wo) + + return m + + def __iter__(self): + yield self.dest_i + yield self.src1_i + yield self.src2_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.req_rel_i + yield from self.g_xx_pend_i + yield self.g_wr_pend_i + yield self.readable_o + yield from self.writable_o + yield self.rd_pend_o + yield from self.xx_pend_o + + def ports(self): + return list(self) + +############# ############### +# --- --- # +# --- renamed / redirected from base class --- # +# --- --- # +# --- below are convenience classes which match the names --- # +# --- of the various mitch alsup book chapter gate diagrams --- # +# --- --- # +############# ############### + + +class IntFnUnit(FnUnit): + def __init__(self, wid, shadow_wid=0): + FnUnit.__init__(self, wid, shadow_wid) + self.int_rd_pend_o = self.rd_pend_o + self.int_wr_pend_o = self.xx_pend_o[0] + self.g_int_wr_pend_i = self.g_wr_pend_i + self.g_int_rd_pend_i = self.g_xx_pend_i[0] + self.int_readable_o = self.readable_o + self.int_writable_o = self.writable_o[0] + + self.int_rd_pend_o.name = "int_rd_pend_o" + self.int_wr_pend_o.name = "int_wr_pend_o" + self.g_int_rd_pend_i.name = "g_int_rd_pend_i" + self.g_int_wr_pend_i.name = "g_int_wr_pend_i" + self.int_readable_o.name = "int_readable_o" + self.int_writable_o.name = "int_writable_o" + + +class FPFnUnit(FnUnit): + def __init__(self, wid, shadow_wid=0): + FnUnit.__init__(self, wid, shadow_wid) + self.fp_rd_pend_o = self.rd_pend_o + self.fp_wr_pend_o = self.xx_pend_o[0] + self.g_fp_wr_pend_i = self.g_wr_pend_i + self.g_fp_rd_pend_i = self.g_xx_pend_i[0] + self.fp_writable_o = self.writable_o[0] + self.fp_readable_o = self.readable_o + + self.fp_rd_pend_o.name = "fp_rd_pend_o" + self.fp_wr_pend_o.name = "fp_wr_pend_o" + self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i" + self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i" + self.fp_writable_o.name = "fp_writable_o" + self.fp_readable_o.name = "fp_readable_o" + + +class LDFnUnit(FnUnit): + """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile) + * when rfile_sel_i == 0, int_wr_pend_o is set + * when rfile_sel_i == 1, fp_wr_pend_o is set + """ + def __init__(self, wid, shadow_wid=0): + FnUnit.__init__(self, wid, shadow_wid, n_dests=2) + self.int_rd_pend_o = self.rd_pend_o + self.int_wr_pend_o = self.xx_pend_o[0] + self.fp_wr_pend_o = self.xx_pend_o[1] + self.g_int_wr_pend_i = self.g_wr_pend_i + self.g_int_rd_pend_i = self.g_xx_pend_i[0] + self.g_fp_rd_pend_i = self.g_xx_pend_i[1] + self.int_readable_o = self.readable_o + self.int_writable_o = self.writable_o[0] + self.fp_writable_o = self.writable_o[1] + + self.int_rd_pend_o.name = "int_rd_pend_o" + self.int_wr_pend_o.name = "int_wr_pend_o" + self.fp_wr_pend_o.name = "fp_wr_pend_o" + self.g_int_wr_pend_i.name = "g_int_wr_pend_i" + self.g_int_rd_pend_i.name = "g_int_rd_pend_i" + self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i" + self.int_readable_o.name = "int_readable_o" + self.int_writable_o.name = "int_writable_o" + self.fp_writable_o.name = "fp_writable_o" + + +class STFnUnit(FnUnit): + """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile) + * wr_pend=False indicates to observe global fp write pending + * when rfile_sel_i == 0, int_wr_pend_o is set + * when rfile_sel_i == 1, fp_wr_pend_o is set + * + """ + def __init__(self, wid, shadow_wid=0): + FnUnit.__init__(self, wid, shadow_wid, n_dests=2, wr_pend=True) + self.int_rd_pend_o = self.rd_pend_o # 1st int read-pending vector + self.int2_rd_pend_o = self.xx_pend_o[0] # 2nd int read-pending vector + self.fp_rd_pend_o = self.xx_pend_o[1] # 1x FP read-pending vector + # yes overwrite FnUnit base class g_wr_pend_i vector + self.g_int_wr_pend_i = self.g_wr_pend_i = self.g_xx_pend_i[0] + self.g_fp_wr_pend_i = self.g_xx_pend_i[1] + self.int_readable_o = self.readable_o + self.int_writable_o = self.writable_o[0] + self.fp_writable_o = self.writable_o[1] + + self.int_rd_pend_o.name = "int_rd_pend_o" + self.int2_rd_pend_o.name = "int2_rd_pend_o" + self.fp_rd_pend_o.name = "fp_rd_pend_o" + self.g_int_wr_pend_i.name = "g_int_wr_pend_i" + self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i" + self.int_readable_o.name = "int_readable_o" + self.int_writable_o.name = "int_writable_o" + self.fp_writable_o.name = "fp_writable_o" + + + +def int_fn_unit_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_int_fn_unit(): + dut = FnUnit(32, 2, 2) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_fn_unit.il", "w") as f: + f.write(vl) + + dut = LDFnUnit(32, 2) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_ld_fn_unit.il", "w") as f: + f.write(vl) + + dut = STFnUnit(32, 0) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_st_fn_unit.il", "w") as f: + f.write(vl) + + run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_fn_unit.vcd') + +if __name__ == '__main__': + test_int_fn_unit() diff --git a/scoreboard/fu_dep_cell.py b/scoreboard/fu_dep_cell.py new file mode 100644 index 00000000..93ef28d3 --- /dev/null +++ b/scoreboard/fu_dep_cell.py @@ -0,0 +1,84 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable +from nmutil.latch import SRLatch + + +class FUDependenceCell(Elaboratable): + """ implements 11.4.7 mitch alsup dependence cell, p27 + """ + def __init__(self): + # inputs + self.rd_pend_i = Signal(reset_less=True) # read pending in (left) + self.wr_pend_i = Signal(reset_less=True) # write pending in (left) + self.issue_i = Signal(reset_less=True) # Issue in (top) + + self.go_write_i = Signal(reset_less=True) # Go Write in (left) + self.go_read_i = Signal(reset_less=True) # Go Read in (left) + + # outputs (latched rd/wr pend) + self.rd_pend_o = Signal(reset_less=True) # read pending out (right) + self.wr_pend_o = Signal(reset_less=True) # write pending out (right) + + def elaborate(self, platform): + m = Module() + m.submodules.rd_l = rd_l = SRLatch() + m.submodules.wr_l = wr_l = SRLatch() + + # write latch: reset on go_write HI, set on write pending and issue + m.d.comb += wr_l.s.eq(self.issue_i & self.wr_pend_i) + m.d.comb += wr_l.r.eq(self.go_write_i) + + # read latch: reset on go_read HI, set on read pending and issue + m.d.comb += rd_l.s.eq(self.issue_i & self.rd_pend_i) + m.d.comb += rd_l.r.eq(self.go_read_i) + + # Read/Write Pending Latches (read out horizontally) + m.d.comb += self.wr_pend_o.eq(wr_l.qn) + m.d.comb += self.rd_pend_o.eq(rd_l.qn) + + return m + + def __iter__(self): + yield self.rd_pend_i + yield self.wr_pend_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.rd_pend_o + yield self.wr_pend_o + + def ports(self): + return list(self) + + +def dcell_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_dcell(): + dut = FUDependenceCell() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_fu_dcell.il", "w") as f: + f.write(vl) + + run_simulation(dut, dcell_sim(dut), vcd_name='test_fu_dcell.vcd') + +if __name__ == '__main__': + test_dcell() diff --git a/scoreboard/fu_fu_matrix.py b/scoreboard/fu_fu_matrix.py new file mode 100644 index 00000000..6ffd4442 --- /dev/null +++ b/scoreboard/fu_fu_matrix.py @@ -0,0 +1,157 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable, Array, Cat + +#from nmutil.latch import SRLatch +from fu_dep_cell import FUDependenceCell +from fu_picker_vec import FU_Pick_Vec + +""" + + 6600 Function Unit Dependency Table Matrix inputs / outputs + ----------------------------------------------------------- + +""" + +class FUFUDepMatrix(Elaboratable): + """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26 + """ + def __init__(self, n_fu_row, n_fu_col): + self.n_fu_row = n_fu_row # Y (FU row#) ^v + self.n_fu_col = n_fu_col # X (FU col #) <> + self.rd_pend_i = Signal(n_fu_row, reset_less=True) # Rd pending (left) + self.wr_pend_i = Signal(n_fu_row, reset_less=True) # Wr pending (left) + self.issue_i = Signal(n_fu_col, reset_less=True) # Issue in (top) + + self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left) + self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left) + + # for Function Unit Readable/Writable (horizontal) + self.readable_o = Signal(n_fu_col, reset_less=True) # readable (bot) + self.writable_o = Signal(n_fu_col, reset_less=True) # writable (bot) + + def elaborate(self, platform): + m = Module() + + # --- + # matrix of dependency cells + # --- + dm = Array(Array(FUDependenceCell() for r in range(self.n_fu_row)) \ + for f in range(self.n_fu_col)) + for x in range(self.n_fu_col): + for y in range(self.n_fu_row): + setattr(m.submodules, "dm_fx%d_fy%d" % (x, y), dm[x][y]) + + # --- + # array of Function Unit Readable/Writable: row-length, horizontal + # --- + fur = Array(FU_Pick_Vec(self.n_fu_row) for r in range(self.n_fu_col)) + for x in range(self.n_fu_col): + setattr(m.submodules, "fur_x%d" % (x), fur[x]) + + # --- + # connect FU Readable/Writable vector + # --- + readable = [] + writable = [] + for x in range(self.n_fu_col): + fu = fur[x] + rd_pend_o = [] + wr_pend_o = [] + for y in range(self.n_fu_row): + dc = dm[x][y] + # accumulate cell outputs rd/wr-pending + rd_pend_o.append(dc.rd_pend_o) + wr_pend_o.append(dc.wr_pend_o) + # connect cell reg-select outputs to Reg Vector In + m.d.comb += [fu.rd_pend_i.eq(Cat(*rd_pend_o)), + fu.wr_pend_i.eq(Cat(*wr_pend_o)), + ] + # accumulate Readable/Writable Vector outputs + readable.append(fu.readable_o) + writable.append(fu.writable_o) + + # ... and output them from this module (horizontal, width=REGs) + m.d.comb += self.readable_o.eq(Cat(*readable)) + m.d.comb += self.writable_o.eq(Cat(*writable)) + + # --- + # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i + # --- + for y in range(self.n_fu_row): + issue_i = [] + for x in range(self.n_fu_col): + dc = dm[x][y] + # accumulate cell inputs issue + issue_i.append(dc.issue_i) + # wire up inputs from module to row cell inputs (Cat is gooood) + m.d.comb += Cat(*issue_i).eq(self.issue_i) + + # --- + # connect Matrix go_read_i/go_write_i to module readable/writable + # --- + for x in range(self.n_fu_col): + go_read_i = [] + go_write_i = [] + rd_pend_i = [] + wr_pend_i = [] + for y in range(self.n_fu_row): + dc = dm[x][y] + # accumulate cell rd_pend/wr_pend/go_read/go_write + rd_pend_i.append(dc.rd_pend_i) + wr_pend_i.append(dc.wr_pend_i) + go_read_i.append(dc.go_read_i) + go_write_i.append(dc.go_write_i) + # wire up inputs from module to row cell inputs (Cat is gooood) + m.d.comb += [Cat(*go_read_i).eq(self.go_read_i), + Cat(*go_write_i).eq(self.go_write_i), + Cat(*rd_pend_i).eq(self.rd_pend_i), + Cat(*wr_pend_i).eq(self.wr_pend_i), + ] + + return m + + def __iter__(self): + yield self.rd_pend_i + yield self.wr_pend_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.readable_o + yield self.writable_o + + def ports(self): + return list(self) + +def d_matrix_sim(dut): + """ XXX TODO + """ + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_fu_fu_matrix(): + dut = FUFUDepMatrix(n_fu_row=3, n_fu_col=4) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_fu_fu_matrix.il", "w") as f: + f.write(vl) + + run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_fu_matrix.vcd') + +if __name__ == '__main__': + test_fu_fu_matrix() diff --git a/scoreboard/fu_picker_vec.py b/scoreboard/fu_picker_vec.py new file mode 100644 index 00000000..fd44c45f --- /dev/null +++ b/scoreboard/fu_picker_vec.py @@ -0,0 +1,21 @@ +from nmigen import Elaboratable, Module, Signal, Cat + + +class FU_Pick_Vec(Elaboratable): + """ these are allocated per-FU (horizontally), + and are of length fu_row_n + """ + def __init__(self, fu_row_n): + self.fu_row_n = fu_row_n + self.rd_pend_i = Signal(fu_row_n, reset_less=True) + self.wr_pend_i = Signal(fu_row_n, reset_less=True) + + self.readable_o = Signal(reset_less=True) + self.writable_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.readable_o.eq(self.rd_pend_i.bool()) + m.d.comb += self.writable_o.eq(self.wr_pend_i.bool()) + return m + diff --git a/scoreboard/fu_reg_matrix.py b/scoreboard/fu_reg_matrix.py new file mode 100644 index 00000000..0826ea56 --- /dev/null +++ b/scoreboard/fu_reg_matrix.py @@ -0,0 +1,225 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable, Array, Cat + +#from nmutil.latch import SRLatch +from dependence_cell import DependenceCell +from fu_wr_pending import FU_RW_Pend +from reg_select import Reg_Rsv + +""" + + 6600 Dependency Table Matrix inputs / outputs + --------------------------------------------- + + d s1 s2 i d s1 s2 i d s1 s2 i d s1 s2 i + | | | | | | | | | | | | | | | | + v v v v v v v v v v v v v v v v + go_rd/go_wr -> dm-r0-fu0 dm-r1-fu0 dm-r2-fu0 dm-r3-fu0 -> wr/rd-pend + go_rd/go_wr -> dm-r0-fu1 dm-r1-fu1 dm-r2-fu1 dm-r3-fu1 -> wr/rd-pend + go_rd/go_wr -> dm-r0-fu2 dm-r1-fu2 dm-r2-fu2 dm-r3-fu2 -> wr/rd-pend + | | | | | | | | | | | | + v v v v v v v v v v v v + d s1 s2 d s1 s2 d s1 s2 d s1 s2 + reg sel reg sel reg sel reg sel + +""" + +class FURegDepMatrix(Elaboratable): + """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26 + """ + def __init__(self, n_fu_row, n_reg_col): + self.n_fu_row = n_fu_row # Y (FUs) ^v + self.n_reg_col = n_reg_col # X (Regs) <> + self.dest_i = Signal(n_reg_col, reset_less=True) # Dest in (top) + self.src1_i = Signal(n_reg_col, reset_less=True) # oper1 in (top) + self.src2_i = Signal(n_reg_col, reset_less=True) # oper2 in (top) + self.issue_i = Signal(n_reg_col, reset_less=True) # Issue in (top) + + self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left) + self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left) + + # for Register File Select Lines (horizontal), per-reg + self.dest_rsel_o = Signal(n_reg_col, reset_less=True) # dest reg (bot) + self.src1_rsel_o = Signal(n_reg_col, reset_less=True) # src1 reg (bot) + self.src2_rsel_o = Signal(n_reg_col, reset_less=True) # src2 reg (bot) + + # for Function Unit "forward progress" (vertical), per-FU + self.wr_pend_o = Signal(n_fu_row, reset_less=True) # wr pending (right) + self.rd_pend_o = Signal(n_fu_row, reset_less=True) # rd pending (right) + + def elaborate(self, platform): + m = Module() + + # --- + # matrix of dependency cells + # --- + dm = Array(Array(DependenceCell() for r in range(self.n_fu_row)) \ + for f in range(self.n_reg_col)) + for rn in range(self.n_reg_col): + for fu in range(self.n_fu_row): + setattr(m.submodules, "dm_r%d_fu%d" % (rn, fu), dm[rn][fu]) + + # --- + # array of Function Unit Pending vectors + # --- + fupend = Array(FU_RW_Pend(self.n_reg_col) for f in range(self.n_fu_row)) + for fu in range(self.n_fu_row): + setattr(m.submodules, "fu_fu%d" % (fu), fupend[fu]) + + # --- + # array of Register Reservation vectors + # --- + regrsv = Array(Reg_Rsv(self.n_fu_row) for r in range(self.n_reg_col)) + for rn in range(self.n_reg_col): + setattr(m.submodules, "rr_r%d" % (rn), regrsv[rn]) + + # --- + # connect Function Unit vector + # --- + wr_pend = [] + rd_pend = [] + for fu in range(self.n_fu_row): + fup = fupend[fu] + dest_fwd_o = [] + src1_fwd_o = [] + src2_fwd_o = [] + for rn in range(self.n_reg_col): + dc = dm[rn][fu] + # accumulate cell fwd outputs for dest/src1/src2 + dest_fwd_o.append(dc.dest_fwd_o) + src1_fwd_o.append(dc.src1_fwd_o) + src2_fwd_o.append(dc.src2_fwd_o) + # connect cell fwd outputs to FU Vector in [Cat is gooood] + m.d.comb += [fup.dest_fwd_i.eq(Cat(*dest_fwd_o)), + fup.src1_fwd_i.eq(Cat(*src1_fwd_o)), + fup.src2_fwd_i.eq(Cat(*src2_fwd_o)) + ] + # accumulate FU Vector outputs + wr_pend.append(fup.reg_wr_pend_o) + rd_pend.append(fup.reg_rd_pend_o) + + # ... and output them from this module (vertical, width=FUs) + m.d.comb += self.wr_pend_o.eq(Cat(*wr_pend)) + m.d.comb += self.rd_pend_o.eq(Cat(*rd_pend)) + + # --- + # connect Reg Selection vector + # --- + dest_rsel = [] + src1_rsel = [] + src2_rsel = [] + for rn in range(self.n_reg_col): + rsv = regrsv[rn] + dest_rsel_o = [] + src1_rsel_o = [] + src2_rsel_o = [] + for fu in range(self.n_fu_row): + dc = dm[rn][fu] + # accumulate cell reg-select outputs dest/src1/src2 + dest_rsel_o.append(dc.dest_rsel_o) + src1_rsel_o.append(dc.src1_rsel_o) + src2_rsel_o.append(dc.src2_rsel_o) + # connect cell reg-select outputs to Reg Vector In + m.d.comb += [rsv.dest_rsel_i.eq(Cat(*dest_rsel_o)), + rsv.src1_rsel_i.eq(Cat(*src1_rsel_o)), + rsv.src2_rsel_i.eq(Cat(*src2_rsel_o)), + ] + # accumulate Reg-Sel Vector outputs + dest_rsel.append(rsv.dest_rsel_o) + src1_rsel.append(rsv.src1_rsel_o) + src2_rsel.append(rsv.src2_rsel_o) + + # ... and output them from this module (horizontal, width=REGs) + m.d.comb += self.dest_rsel_o.eq(Cat(*dest_rsel)) + m.d.comb += self.src1_rsel_o.eq(Cat(*src1_rsel)) + m.d.comb += self.src2_rsel_o.eq(Cat(*src2_rsel)) + + # --- + # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i + # --- + for rn in range(self.n_reg_col): + dest_i = [] + src1_i = [] + src2_i = [] + issue_i = [] + for fu in range(self.n_fu_row): + dc = dm[rn][fu] + # accumulate cell inputs dest/src1/src2 + dest_i.append(dc.dest_i) + src1_i.append(dc.src1_i) + src2_i.append(dc.src2_i) + issue_i.append(dc.issue_i) + # wire up inputs from module to row cell inputs (Cat is gooood) + m.d.comb += [Cat(*dest_i).eq(self.dest_i), + Cat(*src1_i).eq(self.src1_i), + Cat(*src2_i).eq(self.src2_i), + Cat(*issue_i).eq(self.issue_i), + ] + + # --- + # connect Dependency Matrix go_read_i/go_write_i to module go_rd/go_wr + # --- + for fu in range(self.n_fu_row): + go_read_i = [] + go_write_i = [] + for rn in range(self.n_reg_col): + dc = dm[rn][fu] + # accumulate cell fwd outputs for dest/src1/src2 + go_read_i.append(dc.go_read_i) + go_write_i.append(dc.go_write_i) + # wire up inputs from module to row cell inputs (Cat is gooood) + m.d.comb += [Cat(*go_read_i).eq(self.go_read_i), + Cat(*go_write_i).eq(self.go_write_i), + ] + + return m + + def __iter__(self): + yield self.dest_i + yield self.src1_i + yield self.src2_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.dest_rsel_o + yield self.src1_rsel_o + yield self.src2_rsel_o + yield self.wr_pend_o + yield self.rd_pend_o + + def ports(self): + return list(self) + +def d_matrix_sim(dut): + """ XXX TODO + """ + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_d_matrix(): + dut = FURegDepMatrix(n_fu_row=3, n_reg_col=4) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_fu_reg_matrix.il", "w") as f: + f.write(vl) + + run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_reg_matrix.vcd') + +if __name__ == '__main__': + test_d_matrix() diff --git a/scoreboard/fu_wr_pending.py b/scoreboard/fu_wr_pending.py new file mode 100644 index 00000000..9b177ff0 --- /dev/null +++ b/scoreboard/fu_wr_pending.py @@ -0,0 +1,23 @@ +from nmigen import Elaboratable, Module, Signal, Cat + + +class FU_RW_Pend(Elaboratable): + """ these are allocated per-FU (horizontally), + and are of length reg_count + """ + def __init__(self, reg_count): + self.reg_count = reg_count + self.dest_fwd_i = Signal(reg_count, reset_less=True) + self.src1_fwd_i = Signal(reg_count, reset_less=True) + self.src2_fwd_i = Signal(reg_count, reset_less=True) + + self.reg_wr_pend_o = Signal(reset_less=True) + self.reg_rd_pend_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + srces = Cat(self.src1_fwd_i, self.src2_fwd_i) + m.d.comb += self.reg_wr_pend_o.eq(self.dest_fwd_i.bool()) + m.d.comb += self.reg_rd_pend_o.eq(srces.bool()) + return m + diff --git a/scoreboard/global_pending.py b/scoreboard/global_pending.py new file mode 100644 index 00000000..50e43378 --- /dev/null +++ b/scoreboard/global_pending.py @@ -0,0 +1,93 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + + +class GlobalPending(Elaboratable): + """ implements Global Pending Vector, basically ORs all incoming Function + Unit vectors together. Can be used for creating Read or Write Global + Pending. Can be used for INT or FP Global Pending. + + Inputs: + * :wid: register file width + * :fu_vecs: a python list of function unit "pending" vectors, each + vector being a Signal of width equal to the reg file. + + Notes: + + * the regfile may be Int or FP, this code doesn't care which. + obviously do not try to put in a mixture of regfiles into fu_vecs. + * this code also doesn't care if it's used for Read Pending or Write + pending, it can be used for both: again, obviously, do not try to + put in a mixture of read *and* write pending vectors in. + * if some Function Units happen not to be uniform (don't operate + on a particular register (extremely unusual), they must set a Const + zero bit in the vector. + """ + def __init__(self, wid, fu_vecs): + self.reg_width = wid + # inputs + self.fu_vecs = fu_vecs + for v in fu_vecs: + assert len(v) == wid, "FU Vector must be same width as regfile" + + self.g_pend_o = Signal(wid, reset_less=True) # global pending vector + + def elaborate(self, platform): + m = Module() + + pend_l = [] + for i in range(self.reg_width): # per-register + vec_bit_l = [] + for v in self.fu_vecs: + vec_bit_l.append(v[i]) # fu bit for same register + pend_l.append(Cat(*vec_bit_l).bool()) # OR all bits for same reg + m.d.comb += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits + + return m + + def __iter__(self): + yield from self.fu_vecs + yield self.g_pend_o + + def ports(self): + return list(self) + + +def g_vec_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_g_vec(): + vecs = [] + for i in range(3): + vecs.append(Signal(32, name="fu%d" % i)) + dut = GlobalPending(32, vecs) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_global_pending.il", "w") as f: + f.write(vl) + + run_simulation(dut, g_vec_sim(dut), vcd_name='test_global_pending.vcd') + +if __name__ == '__main__': + test_g_vec() diff --git a/scoreboard/group_picker.py b/scoreboard/group_picker.py new file mode 100644 index 00000000..8f959a18 --- /dev/null +++ b/scoreboard/group_picker.py @@ -0,0 +1,111 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Elaboratable + + +class PriorityPicker(Elaboratable): + """ implements a priority-picker. input: N bits, output: N bits + """ + def __init__(self, wid): + self.wid = wid + # inputs + self.i = Signal(wid, reset_less=True) + self.o = Signal(wid, reset_less=True) + + def elaborate(self, platform): + m = Module() + + res = [] + for i in range(0, self.wid): + tmp = Signal(reset_less = True) + if i == 0: + m.d.comb += tmp.eq(self.i[0]) + else: + m.d.comb += tmp.eq((~tmp) & self.i[i]) + res.append(tmp) + + # we like Cat(*xxx). turn lists into concatenated bits + m.d.comb += self.o.eq(Cat(*res)) + + return m + + def __iter__(self): + yield self.i + yield self.o + + def ports(self): + return list(self) + + +class GroupPicker(Elaboratable): + """ implements 10.5 mitch alsup group picker, p27 + """ + def __init__(self, wid): + self.gp_wid = wid + # inputs + self.readable_i = Signal(wid, reset_less=True) # readable in (top) + self.writable_i = Signal(wid, reset_less=True) # writable in (top) + self.rel_req_i = Signal(wid, reset_less=True) # release request in (top) + + # outputs + self.go_rd_o = Signal(wid, reset_less=True) # go read (bottom) + self.go_wr_o = Signal(wid, reset_less=True) # go write (bottom) + + def elaborate(self, platform): + m = Module() + + m.submodules.rpick = rpick = PriorityPicker(self.gp_wid) + m.submodules.wpick = wpick = PriorityPicker(self.gp_wid) + + # combine release (output ready signal) with writeable + m.d.comb += wpick.i.eq(self.writable_i & self.rel_req_i) + m.d.comb += self.go_wr_o.eq(wpick.o) + + m.d.comb += rpick.i.eq(self.readable_i) + m.d.comb += self.go_rd_o.eq(rpick.o) + + return m + + def __iter__(self): + yield self.readable_i + yield self.writable_i + yield self.rel_req_i + yield self.go_rd_o + yield self.go_wr_o + + def ports(self): + return list(self) + + +def grp_pick_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_grp_pick(): + dut = GroupPicker(4) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_grp_pick.il", "w") as f: + f.write(vl) + + run_simulation(dut, grp_pick_sim(dut), vcd_name='test_grp_pick.vcd') + +if __name__ == '__main__': + test_grp_pick() diff --git a/scoreboard/issue_unit.py b/scoreboard/issue_unit.py new file mode 100644 index 00000000..d1f58d11 --- /dev/null +++ b/scoreboard/issue_unit.py @@ -0,0 +1,143 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Array, Const, Record, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + +from shadow_fn import ShadowFn + + +class IssueUnit(Elaboratable): + """ implements 11.4.14 issue unit, p50 + + Inputs + + * :wid: register file width + * :n_insns: number of instructions in this issue unit. + """ + def __init__(self, wid, n_insns): + self.reg_width = wid + self.n_insns = n_insns + + # inputs + self.store_i = Signal(reset_less=True) # instruction is a store + self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in + self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in + self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in + + self.g_wr_pend_i = Signal(wid, reset_less=True) # write pending vector + + self.insn_i = Array(Signal(reset_less=True, name="insn_i") \ + for i in range(n_insns)) + self.busy_i = Array(Signal(reset_less=True, name="busy_i") \ + for i in range(n_insns)) + + # outputs + self.fn_issue_o = Array(Signal(reset_less=True, name="fn_issue_o") \ + for i in range(n_insns)) + self.g_issue_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.submodules.dest_d = dest_d = Decoder(self.reg_width) + + # temporaries + waw_stall = Signal(reset_less=True) + fu_stall = Signal(reset_less=True) + pend = Signal(self.reg_width, reset_less=True) + + # dest decoder: write-pending + m.d.comb += dest_d.i.eq(self.dest_i) + m.d.comb += dest_d.n.eq(~self.store_i) # decode is inverted + m.d.comb += pend.eq(dest_d.o & self.g_wr_pend_i) + m.d.comb += waw_stall.eq(pend.bool()) + + ib_l = [] + for i in range(self.n_insns): + ib_l.append(self.insn_i[i] & self.busy_i[i]) + m.d.comb += fu_stall.eq(Cat(*ib_l).bool()) + m.d.comb += self.g_issue_o.eq(~(waw_stall | fu_stall)) + for i in range(self.n_insns): + m.d.comb += self.fn_issue_o[i].eq(self.g_issue_o & self.insn_i[i]) + + return m + + def __iter__(self): + yield self.store_i + yield self.dest_i + yield self.src1_i + yield self.src2_i + yield self.g_wr_pend_i + yield from self.insn_i + yield from self.busy_i + yield from self.fn_issue_o + yield self.g_issue_o + + def ports(self): + return list(self) + + +class IntFPIssueUnit(Elaboratable): + def __init__(self, wid, n_int_insns, n_fp_insns): + self.i = IssueUnit(wid, n_int_insns) + self.f = IssueUnit(wid, n_fp_insns) + self.issue_o = Signal(reset_less=True) + + # some renames + self.int_write_pending_i = self.i.g_wr_pend_i + self.fp_write_pending_i = self.f.g_wr_pend_i + self.int_write_pending_i.name = 'int_write_pending_i' + self.fp_write_pending_i.name = 'fp_write_pending_i' + + def elaborate(self, platform): + m = Module() + m.submodules.intissue = self.i + m.submodules.fpissue = self.f + + m.d.comb += self.issue_o.eq(self.i.g_issue_o | self.f.g_issue_o) + + return m + + def ports(self): + yield self.issue_o + yield from self.i + yield from self.f + + +def issue_unit_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_issue_unit(): + dut = IssueUnit(32, 3) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_issue_unit.il", "w") as f: + f.write(vl) + + dut = IntFPIssueUnit(32, 3, 3) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_intfp_issue_unit.il", "w") as f: + f.write(vl) + + run_simulation(dut, issue_unit_sim(dut), vcd_name='test_issue_unit.vcd') + +if __name__ == '__main__': + test_issue_unit() diff --git a/scoreboard/ldst_dep_cell.py b/scoreboard/ldst_dep_cell.py new file mode 100644 index 00000000..40e1ffbc --- /dev/null +++ b/scoreboard/ldst_dep_cell.py @@ -0,0 +1,95 @@ +""" Mitch Alsup 6600-style LD/ST scoreboard Dependency Cell + +Relevant bugreports: +* http://bugs.libre-riscv.org/show_bug.cgi?id=81 + +""" + +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable +from nmutil.latch import SRLatch + + +class LDSTDepCell(Elaboratable): + """ implements 11.4.12 mitch alsup load/store dependence cell, p45 + """ + def __init__(self): + # inputs + self.load_i = Signal(reset_less=True) # load pending in (top) + self.stor_i = Signal(reset_less=True) # store pending in (top) + self.issue_i = Signal(reset_less=True) # Issue in (top) + + self.load_hit_i = Signal(reset_less=True) # load hit in (right) + self.stwd_hit_i = Signal(reset_less=True) # store w/ data hit in (right) + + # outputs (latched rd/wr pend) + self.ld_hold_st_o = Signal(reset_less=True) # load holds st out (left) + self.st_hold_ld_o = Signal(reset_less=True) # st holds load out (left) + + def elaborate(self, platform): + m = Module() + m.submodules.war_l = war_l = SRLatch(sync=False) # WriteAfterRead Latch + m.submodules.raw_l = raw_l = SRLatch(sync=False) # ReadAfterWrite Latch + + # issue & store & load - used for both WAR and RAW Setting + i_s_l = Signal(reset_less=True) + m.d.comb += i_s_l.eq(self.issue_i & self.stor_i & self.load_i) + + # write after read latch: loads block stores + m.d.comb += war_l.s.eq(i_s_l) + m.d.comb += war_l.r.eq(self.load_i) # reset on LD + + # read after write latch: stores block loads + m.d.comb += raw_l.s.eq(i_s_l) + m.d.comb += raw_l.r.eq(self.stor_i) # reset on ST + + # Hold results (read out horizontally, accumulate in OR fashion) + m.d.comb += self.ld_hold_st_o.eq(war_l.qn & self.load_hit_i) + m.d.comb += self.st_hold_ld_o.eq(raw_l.qn & self.stwd_hit_i) + + return m + + def __iter__(self): + yield self.load_i + yield self.stor_i + yield self.issue_i + yield self.load_hit_i + yield self.stwd_hit_i + yield self.ld_hold_st_o + yield self.st_hold_ld_o + + def ports(self): + return list(self) + + +def dcell_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_dcell(): + dut = LDSTDepCell() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_ldst_dcell.il", "w") as f: + f.write(vl) + + run_simulation(dut, dcell_sim(dut), vcd_name='test_ldst_dcell.vcd') + +if __name__ == '__main__': + test_dcell() diff --git a/scoreboard/ldst_matrix.py b/scoreboard/ldst_matrix.py new file mode 100644 index 00000000..b872155d --- /dev/null +++ b/scoreboard/ldst_matrix.py @@ -0,0 +1,135 @@ +""" Mitch Alsup 6600-style LD/ST Memory Scoreboard Matrix (sparse vector) + +6600 LD/ST Dependency Table Matrix inputs / outputs +--------------------------------------------------- + +Relevant comments (p45-46): + +* If there are no WAR dependencies on a Load instruction with a computed + address it can assert Bank_Addressable and Translate_Addressable. + +* If there are no RAW dependencies on a Store instruction with both a + write permission and store data present it can assert Bank_Addressable + +Relevant bugreports: +* http://bugs.libre-riscv.org/show_bug.cgi?id=81 + +""" + +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable, Array, Cat, Const + +from ldst_dep_cell import LDSTDepCell + + +class LDSTDepMatrix(Elaboratable): + """ implements 11.4.12 mitch alsup LD/ST Dependency Matrix, p46 + actually a sparse matrix along the diagonal. + + load-hold-store and store-hold-load accumulate in a priority-picking + fashion, ORing together. the OR gate from the dependency cell is + here. + """ + def __init__(self, n_ldst): + self.n_ldst = n_ldst # X and Y (FUs) + self.load_i = Signal(n_ldst, reset_less=True) # load pending in + self.stor_i = Signal(n_ldst, reset_less=True) # store pending in + self.issue_i = Signal(n_ldst, reset_less=True) # Issue in + + self.load_hit_i = Signal(n_ldst, reset_less=True) # load hit in + self.stwd_hit_i = Signal(n_ldst, reset_less=True) # store w/data hit in + + # outputs + self.ld_hold_st_o = Signal(reset_less=True) # load holds st out + self.st_hold_ld_o = Signal(reset_less=True) # st holds load out + + def elaborate(self, platform): + m = Module() + + # --- + # matrix of dependency cells + # --- + dm = Array(LDSTDepCell() for f in range(self.n_ldst)) + for fu in range(self.n_ldst): + setattr(m.submodules, "dm_fu%d" % (fu), dm[fu]) + + # --- + # connect Function Unit vector + # --- + lhs_l = [] + shl_l = [] + load_l = [] + stor_l = [] + issue_l = [] + lh_l = [] + sh_l = [] + for fu in range(self.n_ldst): + dc = dm[fu] + # accumulate load-hold-store / store-hold-load bits + lhs_l.append(dc.ld_hold_st_o) + shl_l.append(dc.st_hold_ld_o) + # accumulate inputs (for Cat'ing later) - TODO: must be a better way + load_l.append(dc.load_i) + stor_l.append(dc.stor_i) + issue_l.append(dc.issue_i) + lh_l.append(dc.load_hit_i) + sh_l.append(dc.stwd_hit_i) + + # connect cell inputs using Cat(*list_of_stuff) + m.d.comb += [Cat(*load_l).eq(self.load_i), + Cat(*stor_l).eq(self.stor_i), + Cat(*issue_l).eq(self.issue_i), + Cat(*lh_l).eq(self.load_hit_i), + Cat(*sh_l).eq(self.stwd_hit_i), + ] + # set the load-hold-store / store-hold-load OR-accumulated outputs + m.d.comb += self.ld_hold_st_o.eq(Cat(*lhs_l).bool()) + m.d.comb += self.st_hold_ld_o.eq(Cat(*shl_l).bool()) + + return m + + def __iter__(self): + yield self.load_i + yield self.stor_i + yield self.issue_i + yield self.load_hit_i + yield self.stwd_hit_i + yield self.ld_hold_st_o + yield self.st_hold_ld_o + + def ports(self): + return list(self) + +def d_matrix_sim(dut): + """ XXX TODO + """ + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_d_matrix(): + dut = LDSTDepMatrix(n_ldst=4) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_ld_st_matrix.il", "w") as f: + f.write(vl) + + run_simulation(dut, d_matrix_sim(dut), vcd_name='test_ld_st_matrix.vcd') + +if __name__ == '__main__': + test_d_matrix() diff --git a/scoreboard/reg_select.py b/scoreboard/reg_select.py new file mode 100644 index 00000000..eca3328e --- /dev/null +++ b/scoreboard/reg_select.py @@ -0,0 +1,23 @@ +from nmigen import Elaboratable, Module, Signal + + +class Reg_Rsv(Elaboratable): + """ these are allocated per-Register (vertically), + and are each of length fu_count + """ + def __init__(self, fu_count): + self.fu_count = fu_count + self.dest_rsel_i = Signal(fu_count, reset_less=True) + self.src1_rsel_i = Signal(fu_count, reset_less=True) + self.src2_rsel_i = Signal(fu_count, reset_less=True) + self.dest_rsel_o = Signal(reset_less=True) + self.src1_rsel_o = Signal(reset_less=True) + self.src2_rsel_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.dest_rsel_o.eq(self.dest_rsel_i.bool()) + m.d.comb += self.src1_rsel_o.eq(self.src1_rsel_i.bool()) + m.d.comb += self.src2_rsel_o.eq(self.src2_rsel_i.bool()) + return m + diff --git a/scoreboard/shadow_fn.py b/scoreboard/shadow_fn.py new file mode 100644 index 00000000..a60f9d95 --- /dev/null +++ b/scoreboard/shadow_fn.py @@ -0,0 +1,79 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + + +class ShadowFn(Elaboratable): + """ implements shadowing 11.5.1, p55, just the individual shadow function + """ + def __init__(self): + + # inputs + self.issue_i = Signal(reset_less=True) + self.shadow_i = Signal(reset_less=True) + self.s_fail_i = Signal(reset_less=True) + self.s_good_i = Signal(reset_less=True) + + # outputs + self.shadow_o = Signal(reset_less=True) + self.recover_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.submodules.sl = sl = SRLatch(sync=False) + + m.d.comb += sl.s.eq(self.shadow_i & self.issue_i) + m.d.comb += sl.r.eq(self.s_good_i) + m.d.comb += self.recover_o.eq(sl.q & self.s_fail_i) + m.d.comb += self.shadow_o.eq(sl.q) + + return m + + def __iter__(self): + yield self.issue_i + yield self.shadow_i + yield self.s_fail_i + yield self.s_good_i + yield self.shadow_o + yield self.recover_o + + def ports(self): + return list(self) + + +def shadow_fn_unit_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + + +def test_shadow_fn_unit(): + dut = ShadowFn() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_shadow_fn_unit.il", "w") as f: + f.write(vl) + + run_simulation(dut, shadow_fn_unit_sim(dut), + vcd_name='test_shadow_fn_unit.vcd') + +if __name__ == '__main__': + test_shadow_fn_unit() -- 2.30.2