From 5b9c8d3cc7005f3b4b4db14162e507c47d87fe6d Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sat, 27 Nov 2021 11:19:57 +0000 Subject: [PATCH] convert DependencyRow to multiple destination latches --- src/soc/scoreboard/dependence_cell.py | 73 ++++++++++++++++++--------- src/soc/scoreboard/fu_reg_matrix.py | 4 +- 2 files changed, 53 insertions(+), 24 deletions(-) diff --git a/src/soc/scoreboard/dependence_cell.py b/src/soc/scoreboard/dependence_cell.py index 9dc1ffc7..a105d171 100644 --- a/src/soc/scoreboard/dependence_cell.py +++ b/src/soc/scoreboard/dependence_cell.py @@ -9,8 +9,6 @@ from nmigen.compat.sim import run_simulation from nmigen.cli import verilog, rtlil from nmigen import Module, Signal, Elaboratable, Cat, Repl from nmutil.latch import SRLatch -from functools import reduce -from operator import or_ class DependencyRow(Elaboratable): @@ -34,10 +32,11 @@ class DependencyRow(Elaboratable): asynchronous) would be reset at the exact moment that GO was requested, and the RSEL would be garbage. """ - def __init__(self, n_reg, n_src, cancel_mode=False): + def __init__(self, n_reg, n_src, n_dst, cancel_mode=False): self.cancel_mode = cancel_mode self.n_reg = n_reg self.n_src = n_src + self.n_dst = n_dst # arrays src = [] rsel = [] @@ -47,11 +46,19 @@ class DependencyRow(Elaboratable): src.append(Signal(n_reg, name="src%d" % j, reset_less=True)) rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True)) fwd.append(Signal(n_reg, name="src%d_fwd_o" % j, reset_less=True)) + dst = [] + dsel = [] + dfwd = [] + for i in range(n_dst): + j = i + 1 # name numbering to match src1/src2 + dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True)) + dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True)) + dfwd.append(Signal(n_reg, name="dst%d_fwd_o" % j, reset_less=True)) # inputs - self.dest_i = Signal(n_reg, reset_less=True) # Dest in (top) - self.src_i = tuple(src) # operands in (top) - self.issue_i = Signal(reset_less=True) # Issue in (top) + self.dst_i = tuple(dst) # Dest in (top) + self.src_i = tuple(src) # operands in (top) + self.issue_i = Signal(reset_less=True) # Issue in (top) self.rd_pend_i = Signal(n_reg, reset_less=True) # Read pend in (top) self.wr_pend_i = Signal(n_reg, reset_less=True) # Write pend in (top) @@ -66,20 +73,32 @@ class DependencyRow(Elaboratable): self.go_die_i = Signal(reset_less=True) # Go Die in (left) # for Register File Select Lines (vertical) - self.dest_rsel_o = Signal(n_reg, reset_less=True) # dest reg sel (bot) - self.src_rsel_o = tuple(rsel) # src reg sel (bot) + self.dst_rsel_o = tuple(dsel) # dest reg sel (bot) + self.src_rsel_o = tuple(rsel) # src reg sel (bot) # for Function Unit "forward progress" (horizontal) - self.dest_fwd_o = Signal(n_reg, reset_less=True) # dest FU fw (right) - self.src_fwd_o = tuple(fwd) # src FU fw (right) + self.dst_fwd_o = tuple(dfwd) # dest FU fw (right) + self.src_fwd_o = tuple(fwd) # src FU fw (right) + + # for temporary (transitional) compatibility with old API + # number of dests used to be 1 (one) - increasing to n_dst + self.dest_i = self.dst_i[0] + self.dest_rsel_o = self.dst_rsel_o[0] + self.dest_fwd_o = self.dst_fwd_o[0] def elaborate(self, platform): m = Module() - m.submodules.dest_c = dest_c = SRLatch(sync=False, llen=self.n_reg) + # create source and dest SRLatches + dst_c = [] + for i in range(self.n_dst): + dst_l = SRLatch(sync=False, llen=self.n_reg) + m.submodules["dst%d_c" % (i+1)] = dst_l + dst_c.append(dst_l) + src_c = [] for i in range(self.n_src): src_l = SRLatch(sync=False, llen=self.n_reg) - setattr(m.submodules, "src%d_c" % (i+1), src_l) + m.submodules["src%d_c" % (i+1)] = src_l src_c.append(src_l) # connect go_rd / go_wr (dest->wr, src->rd) @@ -91,25 +110,29 @@ class DependencyRow(Elaboratable): go_die = Repl(self.go_die_i, self.n_reg) m.d.comb += wr_die.eq(Repl(self.go_wr_i, self.n_reg) | go_die) m.d.comb += rd_die.eq(Repl(self.go_rd_i, self.n_reg) | go_die) - m.d.comb += dest_c.r.eq(wr_die) + for i in range(self.n_dst): + m.d.comb += dst_c[i].r.eq(wr_die) for i in range(self.n_src): m.d.comb += src_c[i].r.eq(rd_die) # connect input reg bit (unary) i_ext = Repl(self.issue_i, self.n_reg) - m.d.comb += dest_c.s.eq(i_ext & self.dest_i) + for i in range(self.n_dst): + m.d.comb += dst_c[i].s.eq(i_ext & self.dst_i[i]) for i in range(self.n_src): m.d.comb += src_c[i].s.eq(i_ext & self.src_i[i]) # connect up hazard checks: read-after-write and write-after-read - m.d.comb += self.dest_fwd_o.eq(dest_c.q & self.rd_pend_i) + for i in range(self.n_dst): + m.d.comb += self.dst_fwd_o[i].eq(dst_c[i].q & self.rd_pend_i) for i in range(self.n_src): m.d.comb += self.src_fwd_o[i].eq(src_c[i].q & self.wr_pend_i) # connect reg-sel outputs rd_ext = Repl(self.go_rd_i, self.n_reg) wr_ext = Repl(self.go_wr_i, self.n_reg) - m.d.comb += self.dest_rsel_o.eq(dest_c.qlq & wr_ext) + for i in range(self.n_dst): + m.d.comb += self.dst_rsel_o[i].eq(dst_c[i].qlq & wr_ext) for i in range(self.n_src): m.d.comb += self.src_rsel_o[i].eq(src_c[i].qlq & rd_ext) @@ -118,13 +141,16 @@ class DependencyRow(Elaboratable): src_q = [] for i in range(self.n_src): src_q.append(src_c[i].qlq) - m.d.comb += self.v_rd_rsel_o.eq(reduce(or_, src_q)) - m.d.comb += self.v_wr_rsel_o.eq(dest_c.qlq) + m.d.comb += self.v_rd_rsel_o.eq(Cat(*src_q).bool()) + dst_q = [] + for i in range(self.n_dst): + dst_q.append(dst_c[i].qlq) + m.d.comb += self.v_wr_rsel_o.eq(Cat(*dst_q).bool()) return m def __iter__(self): - yield self.dest_i + yield from self.dst_i yield from self.src_i yield self.rd_pend_i yield self.wr_pend_i @@ -132,22 +158,23 @@ class DependencyRow(Elaboratable): yield self.go_wr_i yield self.go_rd_i yield self.go_die_i - yield self.dest_rsel_o + yield from self.dst_rsel_o yield from self.src_rsel_o - yield self.dest_fwd_o + yield from self.dst_fwd_o yield from self.src_fwd_o def ports(self): return list(self) +# XXX not up-to-date but hey def dcell_sim(dut): yield dut.dest_i.eq(1) yield dut.issue_i.eq(1) yield yield dut.issue_i.eq(0) yield - yield dut.src1_i.eq(1) + yield dut.src_i[0].eq(1) yield dut.issue_i.eq(1) yield yield @@ -164,7 +191,7 @@ def dcell_sim(dut): yield def test_dcell(): - dut = DependencyRow(4, 2, True) + dut = DependencyRow(4, 2, 2, True) vl = rtlil.convert(dut, ports=dut.ports()) with open("test_drow.il", "w") as f: f.write(vl) diff --git a/src/soc/scoreboard/fu_reg_matrix.py b/src/soc/scoreboard/fu_reg_matrix.py index 7c35c357..826f688d 100644 --- a/src/soc/scoreboard/fu_reg_matrix.py +++ b/src/soc/scoreboard/fu_reg_matrix.py @@ -38,6 +38,7 @@ class FURegDepMatrix(Elaboratable): """ def __init__(self, n_fu_row, n_reg_col, n_src, cancel=None): self.n_src = n_src + self.n_dst = 1 # XXX TODO self.n_fu_row = nf = n_fu_row # Y (FUs) ^v self.n_reg_col = n_reg = n_reg_col # X (Regs) <> @@ -89,7 +90,8 @@ class FURegDepMatrix(Elaboratable): # matrix of dependency cells # --- cancel_mode = self.cancel is not None - dm = tuple(DependencyRow(self.n_reg_col, self.n_src, cancel_mode) \ + dm = tuple(DependencyRow(self.n_reg_col, self.n_src, self.n_dst, + cancel_mode=cancel_mode) \ for r in range(self.n_fu_row)) for fu in range(self.n_fu_row): setattr(m.submodules, "dr_fu%d" % fu, dm[fu]) -- 2.30.2